diff --git a/.gitignore b/.gitignore index 83f6e11b3b..7c8ee7b21d 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,4 @@ build/ *.vcd *.log *.out +work-ver/* diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index f81d81ac67..97f8d30ade 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,77 +1,84 @@ before_script: - - export CXX=g++-4.8.3 CC=gcc-4.8.3 - # paths to local or network installation (the riscv toolchain and - # verilator are not built in a ci job in this case) - - export QUESTASIM_HOME=/scratch/$USER/questasim + - export CXX=g++-7 CC=gcc-7 + # paths to local or network installations (the riscv toolchain and + # verilator are not built in the ci job as in travis) + - export QUESTASIM_HOME= - export QUESTASIM_VERSION= - - export RISCV=/scratch/$USER/riscv_install - - export VERILATOR_ROOT=/scratch/$USER/verilator-3.924 + - export QUESTASIM_FLAGS= + - export RISCV=/scratch/$USER/projects/riscv_install + - export VERILATOR_ROOT=/scratch/$USER/projects/verilator-3.924 # setup dependent paths - export PATH=${RISCV}/bin:$VERILATOR_ROOT/bin:${PATH} - - export LIBRARY_PATH=$CI_PROJECT_DIR/tmp/lib - - export LD_LIBRARY_PATH=$CI_PROJECT_DIR/tmp/lib - - export C_INCLUDE_PATH=$CI_PROJECT_DIR/tmp/include:$VERILATOR_ROOT/include - - export CPLUS_INCLUDE_PATH=$CI_PROJECT_DIR/tmp/include:$VERILATOR_ROOT/include + - export LIBRARY_PATH=$RISCV/lib + - export LD_LIBRARY_PATH=$RISCV/lib + - export C_INCLUDE_PATH=$RISCV/include:$VERILATOR_ROOT/include + - export CPLUS_INCLUDE_PATH=$RISCV/include:$VERILATOR_ROOT/include # number of parallel jobs to use for make commands and simulation - export NUM_JOBS=4 - ci/make-tmp.sh - git submodule update --init --recursive - + variables: GIT_SUBMODULE_STRATEGY: recursive stages: - build - - test + - test_std +# prepare build: stage: build script: - - ci/install-fesvr.sh - ci/build-riscv-tests.sh + - ci/get-torture.sh - make clean - - make build questa_version=$QUESTASIM_VERSION - - make verilate verilator=$VERILATOR_ROOT/bin/verilator + - make torture-gen artifacts: paths: - tmp # rv64ui-p-* and rv64ui-v-* tests run-asm-tests-questa: - stage: test + stage: test_std script: - - make -j${NUM_JOBS} run-asm-tests questa_version=$QUESTASIM_VERSION + - make -j${NUM_JOBS} run-asm-tests dependencies: - build run-benchmarks-questa: - stage: test + stage: test_std script: - - make -j${NUM_JOBS} run-benchmarks questa_version=$QUESTASIM_VERSION + - make -j${NUM_JOBS} run-benchmarks dependencies: - build # rv64ui-p-* tests run-asm-tests1-verilator: - stage: test + stage: test_std script: - - make -j${NUM_JOBS} run-asm-tests1-verilator verilator=$VERILATOR_ROOT/bin/verilator + - make -j${NUM_JOBS} run-asm-tests1-verilator dependencies: - build # rv64ui-v-* tests run-asm-tests2-verilator: - stage: test + stage: test_std script: - - make -j${NUM_JOBS} run-asm-tests2-verilator verilator=$VERILATOR_ROOT/bin/verilator + - make -j${NUM_JOBS} run-asm-tests2-verilator dependencies: - build run-benchmarks-verilator: - stage: test + stage: test_std script: - - make -j${NUM_JOBS} run-benchmarks-verilator verilator=$VERILATOR_ROOT/bin/verilator + - make -j${NUM_JOBS} run-benchmarks-verilator dependencies: - build - +torture: + stage: test_std + script: + - make torture-rtest + - make torture-rtest-verilator + dependencies: + - build diff --git a/.gitmodules b/.gitmodules index d4a445c131..38c356f40f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,12 +1,6 @@ -[submodule "riscv-torture"] - path = riscv-torture - url = https://github.com/pulp-platform/riscv-torture.git [submodule "src/axi_mem_if"] path = src/axi_mem_if url = https://github.com/pulp-platform/axi_mem_if.git -[submodule "src/axi_slice"] - path = src/axi_slice - url = https://github.com/pulp-platform/axi_slice.git [submodule "src/axi_node"] path = src/axi_node url = https://github.com/pulp-platform/axi_node.git @@ -15,4 +9,7 @@ url = https://github.com/pulp-platform/fpga-support.git [submodule "src/common_cells"] path = src/common_cells - url = https://github.com/pulp-platform/common_cells.git + url = https://github.com/pulp-platform/common_cells.git +[submodule "src/axi"] + path = src/axi + url = https://github.com/pulp-platform/axi.git diff --git a/.travis.yml b/.travis.yml index 2ed114006f..f98b7a6827 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,8 +13,8 @@ addons: sources: - ubuntu-toolchain-r-test packages: - - gcc-4.8 - - g++-4.8 + - gcc-7 + - g++-7 - gperf - autoconf - automake @@ -29,7 +29,7 @@ addons: - texinfo - python-pexpect - libusb-1.0-0-dev - - device-tree-compiler + - default-jdk env: global: - RISCV="/home/travis/riscv_install" @@ -41,49 +41,60 @@ branches: - ariane_next before_install: - - export CXX=g++-4.8 CC=gcc-4.8 + - export CXX=g++-7 CC=gcc-7 # setup dependent paths - export PATH=$RISCV/bin:$VERILATOR_ROOT/bin:$PATH - - export LIBRARY_PATH=$TRAVIS_BUILD_DIR/tmp/lib - - export LD_LIBRARY_PATH=$TRAVIS_BUILD_DIR/tmp/lib - - export C_INCLUDE_PATH=$TRAVIS_BUILD_DIR/tmp/include:$VERILATOR_ROOT/include - - export CPLUS_INCLUDE_PATH=$TRAVIS_BUILD_DIR/tmp/include:$VERILATOR_ROOT/include + - export LIBRARY_PATH=$RISCV/lib + - export LD_LIBRARY_PATH=$RISCV/lib + - export C_INCLUDE_PATH=$RISCV/include:$VERILATOR_ROOT/include + - export CPLUS_INCLUDE_PATH=$RISCV/include:$VERILATOR_ROOT/include # number of parallel jobs to use for make commands and simulation - export NUM_JOBS=4 - ci/make-tmp.sh - git submodule update --init --recursive stages: - - compile + - compile1 + - compile2 - test jobs: include: - - stage: compile - name: prepare cache + - stage: compile1 + name: build gcc script: - ci/build-riscv-gcc.sh + - stage: compile2 + name: build tools + script: + - ci/install-fesvr.sh - ci/install-verilator.sh + - ci/install-dtc.sh + - ci/install-spike.sh - stage: test name: run riscv benchmarks script: - - ci/install-fesvr.sh - ci/build-riscv-tests.sh - - make -j${NUM_JOBS} run-benchmarks-verilator verilator=$VERILATOR_ROOT/bin/verilator + - make -j${NUM_JOBS} run-benchmarks-verilator # rv64ui-p-* tests - stage: test - name: run rv64ui-p-* asm tests + name: run asm tests1 script: - - ci/install-fesvr.sh - ci/build-riscv-tests.sh - - make -j${NUM_JOBS} run-asm-tests1-verilator verilator=$VERILATOR_ROOT/bin/verilator + - make -j${NUM_JOBS} run-asm-tests1-verilator # rv64ui-v-* tests - stage: test - name: run rv64ui-v-* asm tests + name: run asm tests2 script: - - ci/install-fesvr.sh - ci/build-riscv-tests.sh - - make -j${NUM_JOBS} run-asm-tests2-verilator verilator=$VERILATOR_ROOT/bin/verilator - + - make -j${NUM_JOBS} run-asm-tests2-verilator + - stage: test + name: run torture + script: + - ci/get-torture.sh + - make clean + - make torture-gen + - make torture-rtest-verilator + # extra time during long builds install: travis_wait diff --git a/Bender.yml b/Bender.yml index 0ebabaeecd..1b12de0610 100644 --- a/Bender.yml +++ b/Bender.yml @@ -6,10 +6,10 @@ dependencies: axi: { git: "git@iis-git.ee.ethz.ch:sasa/axi.git", rev: master } axi2per: { git: "git@iis-git.ee.ethz.ch:pulp-open/axi2per.git", rev: master } axi_mem_if: { git: "git@github.com:pulp-platform/axi_mem_if.git", rev: master } - axi_node: { git: "git@iis-git.ee.ethz.ch:pulp-open/axi_node.git", version: 1.0.3 } + axi_node: { git: "git@iis-git.ee.ethz.ch:pulp-open/axi_node.git", version: v1.1.0 } axi_slice: { git: "git@iis-git.ee.ethz.ch:sasa/axi_slice.git", version: 1.1.2 } tech_cells_generic: { git: "git@iis-git.ee.ethz.ch:pulp-open/tech_cells_generic.git", rev: master } - common_cells: { git: "git@iis-git.ee.ethz.ch:sasa/common_cells.git", version: v1.7.0 } + common_cells: { git: "git@iis-git.ee.ethz.ch:sasa/common_cells.git", version: v1.7.4 } fpga-support: { git: "https://github.com/pulp-platform/fpga-support.git", version: v0.3.2 } sources: - include/riscv_pkg.sv @@ -31,8 +31,11 @@ sources: - src/csr_regfile.sv - src/decoder.sv - src/ex_stage.sv - - src/fetch_fifo.sv - - src/frontend.sv + - src/frontend/btb.sv, + - src/frontend/bht.sv, + - src/frontend/ras.sv, + - src/frontend/instr_scan.sv, + - src/frontend/frontend.sv - src/icache.sv - src/id_stage.sv - src/instr_realigner.sv @@ -58,7 +61,17 @@ sources: - src/store_buffer.sv - src/store_unit.sv - src/tlb.sv + - src/commit_stage.sv + - src/axi_adapter.sv + - src/cache_subsystem/cache_ctrl.sv + - src/cache_subsystem/miss_handler.sv + - src/cache_subsystem/std_cache_subsystem.sv + - src/cache_subsystem/std_icache.sv + - src/cache_subsystem/std_nbdcache.sv + - src/debug/debug_rom/debug_rom.sv - src/debug/dm_csrs.sv + - src/clint/clint.sv + - src/clint/axi_lite_interface.sv - src/debug/dm_mem.sv - src/debug/dm_top.sv - src/debug/dmi_cdc.sv diff --git a/CHANGELOG.md b/CHANGELOG.md index f131bc7691..483e014ede 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,11 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] +### Added + +- Commit log feature +- Support for A-Extension + ### 3.0.0 ### Added diff --git a/Makefile b/Makefile index 03685767c3..e498b69baa 100755 --- a/Makefile +++ b/Makefile @@ -2,11 +2,14 @@ # Date: 03/19/2017 # Description: Makefile for linting and testing Ariane. -# compile everything in the following library +# questa library library ?= work +# verilator lib +ver-library ?= work-ver +# library for DPI +dpi-library ?= work-dpi # Top level module to compile top_level ?= ariane_tb -test_top_level ?= ariane_tb # Maximum amount of cycles for a successful simulation run max_cycles ?= 10000000 # Test case to run @@ -17,55 +20,64 @@ questa_version ?= ${QUESTASIM_VERSION} verilator ?= verilator # traget option target-options ?= +# additional definess +defines ?= # Sources # Package files -> compile first ariane_pkg := include/riscv_pkg.sv \ src/debug/dm_pkg.sv \ include/ariane_pkg.sv \ include/std_cache_pkg.sv \ - include/axi_if.sv + src/axi/src/axi_pkg.sv \ + include/axi_intf.sv # utility modules util := $(wildcard src/util/*.svh) \ src/util/instruction_tracer_pkg.sv \ src/util/instruction_tracer_if.sv \ src/util/cluster_clock_gating.sv \ - src/util/sram.sv - + src/util/sram.sv + # Test packages test_pkg := $(wildcard tb/test/*/*sequence_pkg.sv*) \ $(wildcard tb/test/*/*_pkg.sv*) # DPI -dpi := $(patsubst tb/dpi/%.cc,work/%.o,$(wildcard tb/dpi/*.cc)) +dpi := $(patsubst tb/dpi/%.cc,${dpi-library}/%.o,$(wildcard tb/dpi/*.cc)) dpi_hdr := $(wildcard tb/dpi/*.h) # this list contains the standalone components src := $(filter-out src/ariane_regfile.sv, $(wildcard src/*.sv)) \ + $(wildcard src/frontend/*.sv) \ $(wildcard src/cache_subsystem/*.sv) \ $(wildcard bootrom/*.sv) \ - $(wildcard src/axi_slice/*.sv) \ $(wildcard src/clint/*.sv) \ - $(wildcard src/axi_node/*.sv) \ + $(wildcard src/axi_node/src/*.sv) \ $(wildcard src/axi_mem_if/src/*.sv) \ $(filter-out src/debug/dm_pkg.sv, $(wildcard src/debug/*.sv)) \ $(wildcard src/debug/debug_rom/*.sv) \ + src/axi/src/axi_cut.sv \ + src/axi/src/axi_join.sv \ src/fpga-support/rtl/SyncSpRamBeNx64.sv \ - src/common_cells/src/deprecated/generic_fifo.sv \ - src/common_cells/src/deprecated/pulp_sync.sv \ + src/common_cells/src/sync.sv \ + src/common_cells/src/cdc_2phase.sv \ + src/common_cells/src/spill_register.sv \ + src/common_cells/src/sync_wedge.sv \ src/common_cells/src/fifo_v2.sv \ + src/common_cells/src/fifo_v1.sv \ src/common_cells/src/lzc.sv \ src/common_cells/src/rrarbiter.sv \ src/common_cells/src/lfsr_8bit.sv \ tb/ariane_testharness.sv \ tb/common/SimDTM.sv \ - tb/common/SimJTAG.sv - - - - + tb/common/SimJTAG.sv + + + +# root path +root-dir := $(shell pwd) # look for testbenches tbs := tb/ariane_tb.sv tb/ariane_testharness.sv # RISCV asm tests and benchmark setup (used for CI) -# there is a defined test-list with selected CI tests +# there is a definesd test-list with selected CI tests riscv-test-dir := tmp/riscv-tests/build/isa/ riscv-benchmarks-dir := tmp/riscv-tests/build/benchmarks/ riscv-asm-tests-list := ci/riscv-asm-tests.list @@ -74,24 +86,27 @@ riscv-asm-tests := $(shell xargs printf '\n%s' < $(riscv-asm-tests-list) riscv-benchmarks := $(shell xargs printf '\n%s' < $(riscv-benchmarks-list) | cut -b 1-) # preset which runs a single test riscv-test ?= rv64ui-p-add -# failed test directory -failed-tests := $(wildcard failedtests/*.S) + # Search here for include files (e.g.: non-standalone components) -incdir := ./includes +incdir := # Compile and sim flags -compile_flag += +cover=bcfst+/dut -incr -64 -nologo -quiet -suppress 13262 -permissive +compile_flag += +cover=bcfst+/dut -incr -64 -nologo -quiet -suppress 13262 -permissive +define+$(defines) uvm-flags += +UVM_NO_RELNOTES # Iterate over all include directories and write them with +incdir+ prefixed # +incdir+ works for Verilator and QuestaSim list_incdir := $(foreach dir, ${incdir}, +incdir+$(dir)) +# RISCV torture setup +riscv-torture-dir := tmp/riscv-torture/ +riscv-torture-bin := java -Xmx1G -Xss8M -XX:MaxPermSize=128M -jar sbt-launch.jar + # Build the TB and module using QuestaSim -build: $(library) $(library)/.build-srcs $(library)/.build-tb $(library)/ariane_dpi.so +build: $(library) $(library)/.build-srcs $(library)/.build-tb $(dpi-library)/ariane_dpi.so # Optimize top level - vopt$(questa_version) $(compile_flag) -work $(library) $(test_top_level) -o $(test_top_level)_optimized +acc -check_synthesis + vopt$(questa_version) $(compile_flag) -work $(library) $(top_level) -o $(top_level)_optimized +acc -check_synthesis # src files -$(library)/.build-srcs: $(ariane_pkg) $(util) $(src) +$(library)/.build-srcs: $(ariane_pkg) $(util) $(src) $(library) vlog$(questa_version) $(compile_flag) -work $(library) $(filter %.sv,$(ariane_pkg)) $(list_incdir) -suppress 2583 vlog$(questa_version) $(compile_flag) -work $(library) $(filter %.sv,$(util)) $(list_incdir) -suppress 2583 # Suppress message that always_latch may not be checked thoroughly by QuestaSim. @@ -104,71 +119,71 @@ $(library)/.build-tb: $(dpi) $(tbs) vlog$(questa_version) -sv $(tbs) -work $(library) touch $(library)/.build-tb +$(library): + vlib${questa_version} ${library} + # compile DPIs -work/%.o: tb/dpi/%.cc $(dpi_hdr) +$(dpi-library)/%.o: tb/dpi/%.cc $(dpi_hdr) + mkdir -p $(dpi-library) $(CXX) -shared -fPIC -std=c++0x -Bsymbolic -I$(QUESTASIM_HOME)/include -o $@ $< -$(library)/ariane_dpi.so: $(dpi) +$(dpi-library)/ariane_dpi.so: $(dpi) + mkdir -p $(dpi-library) # Compile C-code and generate .so file - g++ -shared -m64 -o $(library)/ariane_dpi.so $? -lfesvr + $(CXX) -shared -m64 -o $(dpi-library)/ariane_dpi.so $? -lfesvr -$(library): - # Create the library - vlib${questa_version} ${library} -# +jtag_rbb_enable=1 -sim: build $(library)/ariane_dpi.so - vsim${questa_version} +permissive -64 -lib ${library} +max-cycles=$(max_cycles) +UVM_TESTNAME=${test_case} \ - +BASEDIR=$(riscv-test-dir) $(uvm-flags) "+UVM_VERBOSITY=LOW" -coverage -classdebug +jtag_rbb_enable=0 \ - $(QUESTASIM_FLAGS) \ - -gblso $(RISCV)/lib/libfesvr.so -sv_lib $(library)/ariane_dpi -do " do tb/wave/wave_core.do; run -all; exit" \ +sim: build + vsim${questa_version} +permissive -64 -lib ${library} +max-cycles=$(max_cycles) +UVM_TESTNAME=${test_case} \ + +BASEDIR=$(riscv-test-dir) $(uvm-flags) "+UVM_VERBOSITY=LOW" -coverage -classdebug +jtag_rbb_enable=0 \ + $(QUESTASIM_FLAGS) \ + -gblso $(RISCV)/lib/libfesvr.so -sv_lib $(dpi-library)/ariane_dpi -do " log -r /*; run -all; exit" \ ${top_level}_optimized +permissive-off ++$(riscv-test-dir)/$(riscv-test) ++$(target-options) -simc: build $(library)/ariane_dpi.so +simc: build vsim${questa_version} +permissive -64 -c -lib ${library} +max-cycles=$(max_cycles) +UVM_TESTNAME=${test_case} \ - +BASEDIR=$(riscv-test-dir) $(uvm-flags) "+UVM_VERBOSITY=LOW" -coverage -classdebug +jtag_rbb_enable=0 \ - $(QUESTASIM_FLAGS) \ - -gblso $(RISCV)/lib/libfesvr.so -sv_lib $(library)/ariane_dpi -do " do tb/wave/wave_core.do; run -all; exit" \ + +BASEDIR=$(riscv-test-dir) $(uvm-flags) "+UVM_VERBOSITY=LOW" -coverage -classdebug +jtag_rbb_enable=0 \ + $(QUESTASIM_FLAGS) \ + -gblso $(RISCV)/lib/libfesvr.so -sv_lib $(dpi-library)/ariane_dpi -do " run -all; exit" \ ${top_level}_optimized +permissive-off ++$(riscv-test-dir)/$(riscv-test) ++$(target-options) - -$(riscv-asm-tests): build $(library)/ariane_dpi.so +$(riscv-asm-tests): build vsim${questa_version} +permissive -64 -c -lib ${library} +max-cycles=$(max_cycles) +UVM_TESTNAME=${test_case} \ - +BASEDIR=$(riscv-test-dir) $(uvm-flags) "+UVM_VERBOSITY=LOW" -coverage -classdebug +jtag_rbb_enable=0 \ - $(QUESTASIM_FLAGS) \ - -gblso $(RISCV)/lib/libfesvr.so -sv_lib $(library)/ariane_dpi \ + +BASEDIR=$(riscv-test-dir) $(uvm-flags) "+UVM_VERBOSITY=LOW" -coverage -classdebug +jtag_rbb_enable=0 \ + $(QUESTASIM_FLAGS) \ + -gblso $(RISCV)/lib/libfesvr.so -sv_lib $(dpi-library)/ariane_dpi \ -do "coverage save -onexit tmp/$@.ucdb; run -a; quit -code [coverage attribute -name TESTSTATUS -concise]" \ - ${top_level}_optimized +permissive-off ++$(riscv-test-dir)/$@ ++$(target-options) | tee tmp/riscv-asm-tests-$@.log + ${top_level}_optimized +permissive-off ++$(riscv-test-dir)/$@ ++$(target-options) | tee tmp/riscv-asm-tests-$@.log -$(riscv-benchmarks): build $(library)/ariane_dpi.so +$(riscv-benchmarks): build vsim${questa_version} +permissive -64 -c -lib ${library} +max-cycles=$(max_cycles) +UVM_TESTNAME=${test_case} \ +BASEDIR=$(riscv-benchmarks-dir) $(uvm-flags) "+UVM_VERBOSITY=LOW" -coverage -classdebug +jtag_rbb_enable=0 \ - $(QUESTASIM_FLAGS) \ - -gblso $(RISCV)/lib/libfesvr.so -sv_lib $(library)/ariane_dpi \ + $(QUESTASIM_FLAGS) \ + -gblso $(RISCV)/lib/libfesvr.so -sv_lib $(dpi-library)/ariane_dpi \ -do "coverage save -onexit tmp/$@.ucdb; run -a; quit -code [coverage attribute -name TESTSTATUS -concise]" \ - ${top_level}_optimized +permissive-off ++$(riscv-benchmarks-dir)/$@ ++$(target-options) | tee tmp/riscv-benchmarks-$@.log - + ${top_level}_optimized +permissive-off ++$(riscv-benchmarks-dir)/$@ ++$(target-options) | tee tmp/riscv-benchmarks-$@.log # can use -jX to run ci tests in parallel using X processes run-asm-tests: $(riscv-asm-tests) - make check-asm-tests + make check-asm-tests -check-asm-tests: - ci/check-tests.sh tmp/riscv-asm-tests- $(riscv-asm-tests-list) +check-asm-tests: + ci/check-tests.sh tmp/riscv-asm-tests- $(shell wc -l $(riscv-asm-tests-list) | awk -F " " '{ print $1 }') # can use -jX to run ci tests in parallel using X processes run-benchmarks: $(riscv-benchmarks) make check-benchmarks -check-benchmarks: - ci/check-tests.sh tmp/riscv-benchmarks- $(riscv-benchmarks-list) - +check-benchmarks: + ci/check-tests.sh tmp/riscv-benchmarks- $(shell wc -l $(riscv-benchmarks-list) | awk -F " " '{ print $1 }') +# verilator-specific verilate_command := $(verilator) \ $(ariane_pkg) \ $(filter-out tb/ariane_bt.sv,$(src)) \ + +define+$(defines) \ src/util/sram.sv \ - +incdir+src/axi_node \ + +incdir+src/axi_node \ --unroll-count 256 \ -Werror-PINMISSING \ -Werror-IMPLICIT \ @@ -183,39 +198,72 @@ verilate_command := $(verilator) $(if $(DEBUG),--trace-structs --trace,) \ -LDFLAGS "-lfesvr" -CFLAGS "-std=c++11 -I../tb/dpi" -Wall --cc --vpi \ $(list_incdir) --top-module ariane_testharness \ - --Mdir build -O3 \ + --Mdir $(ver-library) -O3 \ --exe tb/ariane_tb.cpp tb/dpi/SimDTM.cc tb/dpi/SimJTAG.cc tb/dpi/remote_bitbang.cc # User Verilator, at some point in the future this will be auto-generated verilate: $(verilate_command) - cd build && make -j${NUM_JOBS} -f Variane_testharness.mk + cd $(ver-library) && make -j${NUM_JOBS} -f Variane_testharness.mk $(addsuffix -verilator,$(riscv-asm-tests)): verilate - build/Variane_testharness $(riscv-test-dir)/$(subst -verilator,,$@) + $(ver-library)/Variane_testharness $(riscv-test-dir)/$(subst -verilator,,$@) run-asm-tests-verilator: $(addsuffix -verilator, $(riscv-asm-tests)) # split into two halfs for travis jobs (otherwise they will time out) -run-asm-tests1-verilator: $(addsuffix -verilator, $(filter rv64ui-p-% ,$(riscv-asm-tests))) +run-asm-tests1-verilator: $(addsuffix -verilator, $(filter rv64ui-v-% ,$(riscv-asm-tests))) -run-asm-tests2-verilator: $(addsuffix -verilator, $(filter rv64ui-v-% ,$(riscv-asm-tests))) +run-asm-tests2-verilator: $(addsuffix -verilator, $(filter-out rv64ui-v-% ,$(riscv-asm-tests))) $(addsuffix -verilator,$(riscv-benchmarks)): verilate - build/Variane_testharness $(riscv-benchmarks-dir)/$(subst -verilator,,$@) + $(ver-library)/Variane_testharness $(riscv-benchmarks-dir)/$(subst -verilator,,$@) run-benchmarks-verilator: $(addsuffix -verilator,$(riscv-benchmarks)) +# torture-specific +torture-gen: + cd $(riscv-torture-dir) && $(riscv-torture-bin) 'generator/run' + +torture-itest: + cd $(riscv-torture-dir) && $(riscv-torture-bin) 'testrun/run -a output/test.S' + +torture-rtest: build + cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && make run-torture defines=$(defines)" > call.sh && chmod +x call.sh + cd $(riscv-torture-dir) && $(riscv-torture-bin) 'testrun/run -r ./call.sh -a output/test.S' | tee output/test.log + make check-torture + +torture-rtest-verilator: verilate + cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && make run-torture-verilator defines=$(defines)" > call.sh && chmod +x call.sh + cd $(riscv-torture-dir) && $(riscv-torture-bin) 'testrun/run -r ./call.sh -a output/test.S' | tee output/test.log + make check-torture + +run-torture: build + vsim${questa_version} +permissive -64 -c -lib ${library} +max-cycles=$(max_cycles)+UVM_TESTNAME=${test_case} \ + +BASEDIR=$(riscv-torture-dir) $(uvm-flags) "+UVM_VERBOSITY=LOW" -coverage -classdebug +jtag_rbb_enable=0 \ + $(QUESTASIM_FLAGS) \ + -gblso $(RISCV)/lib/libfesvr.so -sv_lib $(dpi-library)/ariane_dpi \ + -do "coverage save -onexit tmp/$@.ucdb; run -a; quit -code [coverage attribute -name TESTSTATUS -concise]" \ + ${top_level}_optimized +permissive-off \ + +signature=$(riscv-torture-dir)/output/test.rtlsim.sig ++$(riscv-torture-dir)/output/test ++$(target-options) + +run-torture-verilator: verilate + $(ver-library)/Variane_testharness +max-cycles=$(max_cycles) +signature=$(riscv-torture-dir)/output/test.rtlsim.sig $(riscv-torture-dir)/output/test -verify: - qverify vlog -sv src/csr_regfile.sv +check-torture: + grep 'All signatures match for output/test' $(riscv-torture-dir)/output/test.log + diff -s $(riscv-torture-dir)/output/test.spike.sig $(riscv-torture-dir)/output/test.rtlsim.sig clean: - rm -rf work/ *.ucdb - rm -rf build - rm -f tmp/*.ucdb - rm -f tmp/*.log + rm -rf $(riscv-torture-dir)/output/test* + rm -rf $(library)/ $(dpi-library)/ $(ver-library)/ + rm -f tmp/*.ucdb tmp/*.log *.wlf *vstf wlft* *.ucdb .PHONY: - build lint build-moore $(riscv-asm-tests) $(addsuffix _verilator,$(riscv-asm-tests)) $(riscv-benchmarks) $(addsuffix _verilator,$(riscv-benchmarks)) check simc sim verilate clean verilate + build sim simc verilate clean \ + $(riscv-asm-tests) $(addsuffix _verilator,$(riscv-asm-tests)) \ + $(riscv-benchmarks) $(addsuffix _verilator,$(riscv-benchmarks)) \ + check-benchmarks check-asm-tests \ + torture-gen torture-itest torture-rtest \ + run-torture run-torture-verilator check-torture check-torture-verilator diff --git a/README.md b/README.md index 6a02ffba23..72ec74dc77 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ # Ariane RISC-V CPU -Ariane is a 6-stage, single issue, in-order CPU which implements the 64-bit RISC-V instruction set. It fully implements I, M and C extensions as specified in Volume I: User-Level ISA V 2.1 as well as the draft privilege extension 1.10. It implements three privilege levels M, S, U to fully support a Unix-like operating system. Furthermore it is compliant to the draft external debug spec 0.13. +Ariane is a 6-stage, single issue, in-order CPU which implements the 64-bit RISC-V instruction set. It fully implements I, M, A and C extensions as specified in Volume I: User-Level ISA V 2.3 as well as the draft privilege extension 1.10. It implements three privilege levels M, S, U to fully support a Unix-like operating system. Furthermore it is compliant to the draft external debug spec 0.13. It has configurable size, separate TLBs, a hardware PTW and branch-prediction (branch target buffer and branch history table). The primary design goal was on reducing critical path length. @@ -25,11 +25,15 @@ Build the Verilator model of Ariane by using the Makefile: ``` $ make verilate ``` +To build the verilator model with support for vcd files run +``` +$ make verilate DEBUG=1 +``` -This will create a C++ model of the core including a SystemVerilog wrapper and link it against a C++ testbench (in the `tb` subfolder). The binary can be found in the `build` and accepts a RISC-V ELF binary as an argument, e.g.: +This will create a C++ model of the core including a SystemVerilog wrapper and link it against a C++ testbench (in the `tb` subfolder). The binary can be found in the `work-ver` and accepts a RISC-V ELF binary as an argument, e.g.: ``` -$ build/Variane_testharness rv64um-v-divuw +$ work-ver/Variane_testharness rv64um-v-divuw ``` The Verilator testbench makes use of the `riscv-fesvr`. This means that you can use the `riscv-tests` repository as well as `riscv-pk` out-of-the-box. As a general rule of thumb the Verilator model will behave like Spike (exception for being orders of magnitudes slower). @@ -40,29 +44,39 @@ Both, the Verilator model as well as the Questa simulation will produce trace lo $ spike-dasm < trace_core_00_0.dasm > logfile.txt ``` -### Running Applications +### Running User-Space Applications -It is possible to run user-space binaries on Ariane with `riscv-pk` ([link](https://github.com/riscv/riscv-pk)). As Ariane currently does not support atomics and floating point extensions make sure that you configure `riscv-pk` with: -`--with-arch=rv64imc`. In particular inside the `riscv-pk` directory do: +It is possible to run user-space binaries on Ariane with `riscv-pk` ([link](https://github.com/riscv/riscv-pk)). ``` $ mkdir build $ cd build -$ ../configure --prefix=$RISCV --host=riscv64-unknown-elf --with-arch=rv64imc +$ ../configure --prefix=$RISCV --host=riscv64-unknown-elf $ make $ make install ``` Then to run a RISC-V ELF using the Verilator model do: +``` +$ echo ' +#include + +int main(int argc, char const *argv[]) { + printf("Hello Ariane!\\n"); + return 0; +}' > hello.c +$ riscv64-unknown-elf-gcc hello.c -o hello.elf +``` + ``` $ make verilate -$ build/Variane_testharness /path/to/pk path/to/riscv.elf +$ work-ver/Variane_testharness $RISCV/riscv64-unknown-elf/bin/pk hello.elf ``` If you want to use QuestaSim to run it you can use the following command: ``` -$ make simc riscv-test=/path/to/pk target-options=path/to/riscv.elf +$ make simc riscv-test-dir=$RISCV/riscv64-unknown-elf/bin riscv-test=pk target-options=hello.elf ``` > Be patient! RTL simulation is way slower than Spike. If you think that you ran into problems you can inspect the trace files. @@ -73,7 +87,7 @@ Coming. ## Planned Improvements -While developing Ariane it has become evident that, in order to support Linux, the atomic extension is going to be mandatory. While the core is currently booting Linux by emulating Atomics in BBL (in a single core environment this is trivially met by disabling interrupts) this is not the behavior which is intended. For that reason we are going to fully support all atomic extensions in the very near future. +> Atomics are implemented for a single core environment. They will semantically fail in a multi-core setup. ## Going Beyond @@ -86,9 +100,46 @@ $ make sim riscv-test=tmp/risc-tests/build/isa/rv64ui-p-sraw If you call `simc` instead of `sim` it will run without the GUI. QuestaSim uses `riscv-fesvr` for communication as well. -### Randomized Constrained Testing with Torture +### CI Testsuites and Randomized Constrained Testing with Torture + +We provide two CI configuration files for Travis CI and GitLab CI that run the RISCV assembly tests, the RISCV benchmarks and a randomized RISCV Torture test. The difference between the two is that Travis CI runs these tests only on Verilator, whereas GitLab CI runs the same tests on QuestaSim and Verilator. + +If you would like to run the CI test suites locally on your machine, follow any of the two scripts `ci/travis-ci-emul.sh` and `ci/travis-ci-emul.sh` (depending on whether you have QuestaSim or not). In particular, you have to get the required packages for your system, the paths in `ci/path-setup.sh` to match your setup, and run the installation and build scripts prior to running any of the tests suites. + +Once everything is set up and installed, you can run the tests suites as follows (using Verilator): + +``` +$ make verilate +$ make run-asm-tests-verilator +$ make run-benchmarks-verilator +``` + +In order to run randomized Torture tests, you first have to generate the randomized program prior to running the simulation: + +``` +$ ./ci/get-torture.sh +$ make torture-gen +$ make torture-rtest-verilator +``` +This runs the randomized program on Spike and on the RTL target, and checks whether the two signatures match. The random instruction mix can be configured in the `./tmp/riscv-torture/config/default.config` file. + +Ariane can dump a trace-log in Questa which can be easily diffed against Spike with commit log enabled. In `include/ariane_pkg.sv` set: + +```verilog +localparam bit ENABLE_SPIKE_COMMIT_LOG = 1'b1; +``` +This will dump a file called `trace_core_*_*_commit.log`. -Currently not up-to-date. +This can be helpful for debugging long traces (e.g.: torture traces). To compile Spike with the commit log feature do: + +``` +$ apt-get install device-tree-compiler +$ mkdir build +$ cd build +$ ../configure --prefix=$RISCV --with-fesvr=$RISCV --enable-commitlog +$ make +$ [sudo] make install +``` # Contributing diff --git a/ariane-run-torture b/ariane-run-torture deleted file mode 100755 index b8da5e5b3d..0000000000 --- a/ariane-run-torture +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/sh -# make the verilog first -cd output && make -cd ../.. -# start the simulation -vsim-10.6b -c -lib work core_tb_optimized +UVM_TESTNAME=core_test $2 +BASEDIR=riscv-torture $1 +ASMTEST=$3 +UVM_VERBOSITY=LOW -coverage -classdebug -do "run -a" - diff --git a/bootrom/bootrom.img b/bootrom/bootrom.img index 1cc077cab4..e15ccba055 100644 Binary files a/bootrom/bootrom.img and b/bootrom/bootrom.img differ diff --git a/bootrom/bootrom.sv b/bootrom/bootrom.sv index 52efe82513..8add90eafd 100644 --- a/bootrom/bootrom.sv +++ b/bootrom/bootrom.sv @@ -20,15 +20,15 @@ module bootrom ( input logic [63:0] addr_i, output logic [63:0] rdata_o ); - localparam int RomSize = 141; + localparam int RomSize = 143; const logic [RomSize-1:0][63:0] mem = { - 64'h0064, - 64'h65646e65_7478652d, - 64'h73747075_72726574, - 64'h6e690073_65676e61, - 64'h7200656c_646e6168, - 64'h70007265_6c6c6f72, + 64'h00646564_6e657478, + 64'h652d7374_70757272, + 64'h65746e69_00736567, + 64'h6e617200_656c646e, + 64'h6168702c_78756e69, + 64'h6c007265_6c6c6f72, 64'h746e6f63_2d747075, 64'h72726574_6e690073, 64'h6c6c6563_2d747075, @@ -60,7 +60,7 @@ module bootrom ( 64'h4b000000_10000000, 64'h03000000_07000000, 64'h01000000_03000000, - 64'h01000000_ae000000, + 64'h01000000_b4000000, 64'h10000000_03000000, 64'h00000000_30746e69, 64'h6c632c76_63736972, @@ -68,7 +68,7 @@ module bootrom ( 64'h03000000_00000030, 64'h30303030_30324074, 64'h6e696c63_01000000, - 64'ha7000000_00000000, + 64'had000000_00000000, 64'h03000000_00007375, 64'h622d656c_706d6973, 64'h00636f73_2d657261, @@ -91,6 +91,8 @@ module bootrom ( 64'h6f6d656d_01000000, 64'h02000000_02000000, 64'h02000000_01000000, + 64'ha5000000_04000000, + 64'h03000000_01000000, 64'h9f000000_04000000, 64'h03000000_00006374, 64'h6e692d75_70632c76, @@ -143,11 +145,11 @@ module bootrom ( 64'h00000000_01000000, 64'h00000000_00000000, 64'h00000000_00000000, - 64'he8020000_c2000000, + 64'hf8020000_c8000000, 64'h00000000_10000000, 64'h11000000_28000000, - 64'h20030000_38000000, - 64'he2030000_edfe0dd0, + 64'h30030000_38000000, + 64'hf8030000_edfe0dd0, 64'h00000000_00000000, 64'h00000000_00000000, 64'h00000000_00000000, diff --git a/ci/build-riscv-gcc.sh b/ci/build-riscv-gcc.sh index 233364e0bc..4548bc3706 100755 --- a/ci/build-riscv-gcc.sh +++ b/ci/build-riscv-gcc.sh @@ -1,6 +1,7 @@ #!/bin/bash set -e ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd) +VERSION="691e4e826251c7ec59f883cab18440c87baf45e7" cd $ROOT/tmp if [ -z ${NUM_JOBS} ]; then @@ -10,7 +11,7 @@ fi if ! [ -e $RISCV/bin ]; then [ -d $ROOT/tmp/riscv-gnu-toolchain ] || git clone https://github.com/riscv/riscv-gnu-toolchain.git cd riscv-gnu-toolchain - git checkout 691e4e826251c7ec59f883cab18440c87baf45e7 + git checkout $VERSION git submodule update --init --recursive mkdir -p $RISCV diff --git a/ci/build-riscv-tests.sh b/ci/build-riscv-tests.sh index d21f747fdc..2c335cbccc 100755 --- a/ci/build-riscv-tests.sh +++ b/ci/build-riscv-tests.sh @@ -1,6 +1,8 @@ #!/bin/bash set -e ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd) +VERSION="294bfce8a1ca2fc501b8939292146e44f813a2b8" + cd $ROOT/tmp if [ -z ${NUM_JOBS} ]; then @@ -9,7 +11,7 @@ fi [ -d $ROOT/tmp/riscv-tests ] || git clone https://github.com/riscv/riscv-tests.git cd riscv-tests -git checkout 294bfce8a1ca2fc501b8939292146e44f813a2b8 +git checkout $VERSION git submodule update --init --recursive autoconf mkdir -p build diff --git a/ci/check-tests.sh b/ci/check-tests.sh index f7fa3f9a47..e7b6e52a8c 100755 --- a/ci/check-tests.sh +++ b/ci/check-tests.sh @@ -24,13 +24,9 @@ if [ ! -f "${1}"*.log ]; then exit 1; fi -if [ ! -f "$2" ]; then - echo -e "${RED}FAILED file $2 does not exist ${NC}" - exit 1; -fi # get NUM_TOTAL number of tests -NUM_TOTAL=`wc -l $2 | awk -F " " '{ print $1 }'` +NUM_TOTAL=$2 echo "list containint tests: $2" @@ -38,10 +34,10 @@ echo "checking files:" ls "${1}"*.log # check for patterns -NUM_PASSED=`grep -s "SUCCESS" ${1}*.log | wc -l` -NUM_FAILED=`grep -s "FAILED" ${1}*.log | wc -l` -NUM_FATAL=`grep -s "Fatal:" ${1}*.log | wc -l` -NUM_ERROR=`grep -s "Error:" ${1}*.log | wc -l` +NUM_PASSED=`grep -i -s "SUCCESS" ${1}*.log | wc -l` +NUM_FAILED=`grep -i -s "FAILED" ${1}*.log | wc -l` +NUM_FATAL=`grep -i -s "Fatal:" ${1}*.log | wc -l` +NUM_ERROR=`grep -i -s "Error:" ${1}*.log | wc -l` echo "NUM_TOTAL: $NUM_TOTAL" echo "NUM_PASSED: $NUM_PASSED" diff --git a/ci/default.config b/ci/default.config new file mode 100644 index 0000000000..f405d4d8e6 --- /dev/null +++ b/ci/default.config @@ -0,0 +1,52 @@ +torture.generator.nseqs 1000 +torture.generator.memsize 1024 +torture.generator.fprnd 0 +torture.generator.amo true +torture.generator.mul true +torture.generator.divider true +torture.generator.segment true +torture.generator.loop true +torture.generator.loop_size 64 + +torture.generator.mix.xmem 15 +torture.generator.mix.xbranch 25 +torture.generator.mix.xalu 60 +torture.generator.mix.fgen 0 +torture.generator.mix.fpmem 0 +torture.generator.mix.fax 0 +torture.generator.mix.fdiv 0 +torture.generator.mix.vec 0 + +torture.generator.vec.vf 1 +torture.generator.vec.seq 20 +torture.generator.vec.memsize 128 +torture.generator.vec.numsregs 64 +torture.generator.vec.mul false +torture.generator.vec.div false +torture.generator.vec.mix true +torture.generator.vec.fpu false +torture.generator.vec.fma false +torture.generator.vec.fcvt false +torture.generator.vec.fdiv false +torture.generator.vec.amo false +torture.generator.vec.seg false +torture.generator.vec.stride false +torture.generator.vec.pred_alu true +torture.generator.vec.pred_mem true + +torture.generator.vec.mix.valu 20 +torture.generator.vec.mix.vpop 60 +torture.generator.vec.mix.vmem 20 +torture.generator.vec.mix.vonly 0 + +torture.testrun.maxcycles 10000000 +torture.testrun.virtual false +torture.testrun.seek true +torture.testrun.dump false +torture.testrun.vec false + +torture.overnight.errors 1 +torture.overnight.minutes 1 +torture.overnight.outdir output/failedtests +torture.overnight.email your@email.address + diff --git a/ci/get-torture.sh b/ci/get-torture.sh new file mode 100755 index 0000000000..22ec2100c6 --- /dev/null +++ b/ci/get-torture.sh @@ -0,0 +1,21 @@ +#!/bin/bash +set -e +ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd) +VERSION="59b0f0f224ff4f1eb6ebb1b4dd7eaf1ab3fac2e5" + +cd $ROOT/tmp + +if [ -z ${NUM_JOBS} ]; then + NUM_JOBS=1 +fi + +[ -d $ROOT/tmp/riscv-torture ] || git clone https://github.com/ucb-bar/riscv-torture.git +cd riscv-torture +git checkout $VERSION +git submodule update --init --recursive + +# copy ariane specific config +cp config/default.config config/default.config.bak +cp $ROOT/ci/default.config config/default.config +git apply $ROOT/ci/torture_make.patch + diff --git a/ci/gitlab-ci-emul.sh b/ci/gitlab-ci-emul.sh index 287d21dbc8..1417d89cab 100755 --- a/ci/gitlab-ci-emul.sh +++ b/ci/gitlab-ci-emul.sh @@ -1,10 +1,10 @@ -# !/bin/bash +#!/bin/bash # This script emulates what the gitlab ci config does (not on public server) # source this with a bash shell in the project root # comment out next command if you don't want to use sudo sudo apt install \ - gcc-4.8 \ - g++-4.8 \ + gcc-7 \ + g++-7 \ gperf \ autoconf \ automake \ @@ -19,25 +19,34 @@ sudo apt install \ texinfo \ python-pexpect \ libusb-1.0-0-dev \ - device-tree-compiler + default-jdk # customize your paths here source ci/path-setup.sh +# install the required tools git submodule update --init --recursive ci/make-tmp.sh ci/build-riscv-gcc.sh ci/install-fesvr.sh ci/install-verilator.sh ci/build-riscv-tests.sh +ci/install-dtc.sh +ci/install-spike.sh +ci/get-torture.sh + +# clean up and generate randomized test make clean +make torture-gen # run asm tests on verilator -make -j${NUM_JOBS} verilate verilator=$VERILATOR_ROOT/bin/verilator -make -j${NUM_JOBS} run-asm-tests-verilator verilator=$VERILATOR_ROOT/bin/verilator -make -j${NUM_JOBS} run-benchmarks-verilator verilator=$VERILATOR_ROOT/bin/verilator +make -j${NUM_JOBS} verilate +make -j${NUM_JOBS} run-asm-tests-verilator +make -j${NUM_JOBS} run-benchmarks-verilator +make -j${NUM_JOBS} torture-rtest-verilator # run asm tests on questa -make -j${NUM_JOBS} build questa_version=$QUESTASIM_VERSION -make -j${NUM_JOBS} run-asm-tests questa_version=$QUESTASIM_VERSION -make -j${NUM_JOBS} run-benchmarks questa_version=$QUESTASIM_VERSION +make -j${NUM_JOBS} build +make -j${NUM_JOBS} run-asm-tests +make -j${NUM_JOBS} run-benchmarks +make -j${NUM_JOBS} torture-rtest diff --git a/ci/install-dtc.sh b/ci/install-dtc.sh new file mode 100755 index 0000000000..60dd0d62ed --- /dev/null +++ b/ci/install-dtc.sh @@ -0,0 +1,22 @@ +#!/bin/bash +set -e +ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd) +VERSION="b94c056b137e59deefc62fbfe0cd3a23edfcc07c" + +cd $ROOT/tmp + +if [ -z ${NUM_JOBS} ]; then + NUM_JOBS=1 +fi + +if [ ! -e "$RISCV/dtc/dtc" ]; then + echo "Installing DTC" + git clone https://git.kernel.org/pub/scm/utils/dtc/dtc.git + cd dtc + git checkout $VERSION + make -j${NUM_JOBS} PREFIX=$RISCV/ NO_PYTHON=1 + make -j${NUM_JOBS} check NO_PYTHON=1 + make -j${NUM_JOBS} install PREFIX=$RISCV/ NO_PYTHON=1 +else + echo "Using DTC from cached directory." +fi diff --git a/ci/install-fesvr.sh b/ci/install-fesvr.sh index 899083126b..2b9c0d7aa7 100755 --- a/ci/install-fesvr.sh +++ b/ci/install-fesvr.sh @@ -1,19 +1,27 @@ #!/bin/bash set -e ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd) +VERSION=30e85ce253788b29bd4ac0b5e5c23a077d96dc24 + cd $ROOT/tmp -RELEASE=0.1.0 if [ -z ${NUM_JOBS} ]; then NUM_JOBS=1 fi -if ! [ -e $ROOT/tmp/riscv-fesvr ]; then +if [ ! -e "${RISCV}/lib/libfesvr.so" ]; then + echo "Installing RISCV FESVR" git clone https://github.com/riscv/riscv-fesvr.git + cd riscv-fesvr + git checkout $VERSION + mkdir -p build + cd build + ../configure --prefix="$RISCV/" + make -j${NUM_JOBS} + make install +else + echo "Using RISCV FESVR from cached directory." fi -cd $ROOT/tmp/riscv-fesvr -mkdir -p build -cd build -../configure --prefix="$ROOT/tmp" -make -j${NUM_JOBS} -make install + + + diff --git a/ci/install-spike.sh b/ci/install-spike.sh new file mode 100755 index 0000000000..77fd803b66 --- /dev/null +++ b/ci/install-spike.sh @@ -0,0 +1,27 @@ +#!/bin/bash +set -e +ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd) +VERSION="def4c5b104efd382e633d5fdca49508757bb5e23" + +cd $ROOT/tmp + +if [ -z ${NUM_JOBS} ]; then + NUM_JOBS=1 +fi + +if [ ! -e "${RISCV}/bin/spike" ]; then + echo "Installing Spike" + git clone https://github.com/riscv/riscv-isa-sim.git + cd riscv-isa-sim + git checkout $VERSION + mkdir -p build + cd build + ../configure --prefix="$RISCV/" + make -j${NUM_JOBS} + make install +else + echo "Using Spike from cached directory." +fi + + + diff --git a/ci/path-setup.sh b/ci/path-setup.sh index d7d4b33f0c..66394262af 100644 --- a/ci/path-setup.sh +++ b/ci/path-setup.sh @@ -5,17 +5,18 @@ export CI_BUILD_DIR=$TOP/ariane-repo #customize this to your setup export QUESTASIM_HOME= export QUESTASIM_VERSION= -export CXX=g++-4.8 CC=gcc-4.8 +export QUESTASIM_FLAGS= +export CXX=g++-7 CC=gcc-7 # where to install the tools export RISCV=$TOP/riscv_install export VERILATOR_ROOT=$TOP/verilator-3.924/ export PATH=$RISCV/bin:$VERILATOR_ROOT/bin:$PATH -export LIBRARY_PATH=$CI_BUILD_DIR/tmp/lib -export LD_LIBRARY_PATH=$CI_BUILD_DIR/tmp/lib -export C_INCLUDE_PATH=$CI_BUILD_DIR/tmp/include:$VERILATOR_ROOT/include -export CPLUS_INCLUDE_PATH=$CI_BUILD_DIR/tmp/include:$VERILATOR_ROOT/include +export LIBRARY_PATH=$RISCV/lib +export LD_LIBRARY_PATH=$RISCV/lib +export C_INCLUDE_PATH=$RISCV/include:$VERILATOR_ROOT/include +export CPLUS_INCLUDE_PATH=$RISCV/include:$VERILATOR_ROOT/include # number of parallel jobs to use for make commands and simulation export NUM_JOBS=8 diff --git a/ci/riscv-asm-tests.list b/ci/riscv-asm-tests.list index 3ecc8933c5..01e12844b0 100644 --- a/ci/riscv-asm-tests.list +++ b/ci/riscv-asm-tests.list @@ -127,3 +127,41 @@ rv64um-v-divw rv64um-v-divuw rv64um-v-remw rv64um-v-remuw +rv64ua-p-amoadd_d +rv64ua-p-amoadd_w +rv64ua-p-amoor_d +rv64ua-p-amoor_w +rv64ua-p-amoand_d +rv64ua-p-amoand_w +rv64ua-p-amoswap_d +rv64ua-p-amoswap_w +rv64ua-p-amoxor_d +rv64ua-p-amoxor_w +rv64ua-p-amomax_d +rv64ua-p-amomaxu_d +rv64ua-p-amomaxu_w +rv64ua-p-amomax_w +rv64ua-p-amomin_d +rv64ua-p-amomin_w +rv64ua-p-amominu_d +rv64ua-p-amominu_w +rv64ua-p-lrsc +rv64ua-v-amoadd_d +rv64ua-v-amoadd_w +rv64ua-v-amoor_d +rv64ua-v-amoor_w +rv64ua-v-amoand_d +rv64ua-v-amoand_w +rv64ua-v-amoswap_d +rv64ua-v-amoswap_w +rv64ua-v-amoxor_d +rv64ua-v-amoxor_w +rv64ua-v-amomax_d +rv64ua-v-amomaxu_d +rv64ua-v-amomaxu_w +rv64ua-v-amomax_w +rv64ua-v-amomin_d +rv64ua-v-amomin_w +rv64ua-v-amominu_d +rv64ua-v-amominu_w +rv64ua-v-lrsc \ No newline at end of file diff --git a/ci/torture_make.patch b/ci/torture_make.patch new file mode 100644 index 0000000000..35114604c4 --- /dev/null +++ b/ci/torture_make.patch @@ -0,0 +1,41 @@ +diff --git a/output/Makefile b/output/Makefile +index cf1214f..c81bccc 100644 +--- a/output/Makefile ++++ b/output/Makefile +@@ -20,9 +20,9 @@ extra_files = + #-------------------------------------------------------------------- + + RISCV_GCC = riscv64-unknown-elf-gcc +-RISCV_GCC_OPTS = -nostdlib -nostartfiles -Wa,-march=RVIMAFDXhwacha ++RISCV_GCC_OPTS = -nostdlib -nostartfiles -Wa,-march=rv64imc + RISCV_OBJDUMP = riscv64-unknown-elf-objdump --disassemble-all --section=.text --section=.data --section=.bss +-RISCV_SIM = spike --extension=hwacha ++RISCV_SIM = spike + + #------------------------------------------------------------ + # Build assembly tests +@@ -38,9 +38,6 @@ $(asm_tests_dump): %.dump: % + $(asm_tests_bin): %: %.S $(extra_files) + $(RISCV_GCC) $(RISCV_GCC_OPTS) -I../env/p -T../env/p/link.ld $< -o $@ + +-$(asm_tests_hex): %.hex: % $(extra_files) +- elf2hex 16 16384 $< > $@ +- + $(asm_tests_sig): %.sig: % + $(RISCV_SIM) +signature=$@ $< + +@@ -51,12 +48,12 @@ run: $(asm_tests_sig) + echo; perl -ne 'print " [$$1] $$ARGV \t$$2\n" if /\*{3}(.{8})\*{3}(.*)/' \ + $(asm_tests_sig); echo; + +-junk += $(asm_tests_bin) $(asm_tests_dump) $(asm_tests_sig) $(asm_tests_hex) ++junk += $(asm_tests_bin) $(asm_tests_dump) $(asm_tests_sig) + + #------------------------------------------------------------ + # Default + +-all: $(asm_tests_dump) $(asm_tests_hex) ++all: $(asm_tests_dump) + + #------------------------------------------------------------ + # Clean up diff --git a/ci/travis-ci-emul.sh b/ci/travis-ci-emul.sh index ce9c98d7a3..bb6d9df22c 100644 --- a/ci/travis-ci-emul.sh +++ b/ci/travis-ci-emul.sh @@ -3,8 +3,8 @@ # source this with a bash shell in the project root # comment out next command if you don't want to use sudo sudo apt install \ - gcc-4.8 \ - g++-4.8 \ + gcc-7 \ + g++-7 \ gperf \ autoconf \ automake \ @@ -19,20 +19,28 @@ sudo apt install \ texinfo \ python-pexpect \ libusb-1.0-0-dev \ - device-tree-compiler + default-jdk # customize your paths here source ci/path-setup.sh +# install the required tools git submodule update --init --recursive ci/make-tmp.sh ci/build-riscv-gcc.sh ci/install-fesvr.sh ci/install-verilator.sh ci/build-riscv-tests.sh +ci/install-dtc.sh +ci/install-spike.sh +ci/get-torture.sh + +# clean up and generate randomized test make clean +make torture-gen # run asm tests on verilator -make -j${NUM_JOBS} verilate verilator=$VERILATOR_ROOT/bin/verilator -make -j${NUM_JOBS} run-asm-tests-verilator verilator=$VERILATOR_ROOT/bin/verilator -make -j${NUM_JOBS} run-benchmarks-verilator verilator=$VERILATOR_ROOT/bin/verilator +make -j${NUM_JOBS} verilate +make -j${NUM_JOBS} run-asm-tests-verilator +make -j${NUM_JOBS} run-benchmarks-verilator +make -j${NUM_JOBS} torture-rtest-verilator diff --git a/fpga/.gitignore b/fpga/.gitignore new file mode 100644 index 0000000000..c7bcdbac20 --- /dev/null +++ b/fpga/.gitignore @@ -0,0 +1,9 @@ +.* +!.gitignore +vivado* +*.cache +*.hw +*.ip_user_files +*.runs +*.srcs +*.xpr \ No newline at end of file diff --git a/fpga/ariane.xdc b/fpga/ariane.xdc new file mode 100644 index 0000000000..329269aeeb --- /dev/null +++ b/fpga/ariane.xdc @@ -0,0 +1,33 @@ +set_property PACKAGE_PIN AM13 [get_ports sys_rst] +set_property IOSTANDARD LVCMOS33 [get_ports sys_rst] +set_property PACKAGE_PIN A20 [get_ports tck] +set_property IOSTANDARD LVCMOS33 [get_ports tck] +set_property PACKAGE_PIN B20 [get_ports tdi] +set_property PACKAGE_PIN A22 [get_ports tdo] +set_property PACKAGE_PIN A21 [get_ports tms] +set_property PACKAGE_PIN B21 [get_ports trst_n] +set_property IOSTANDARD LVCMOS33 [get_ports tdi] +set_property IOSTANDARD LVCMOS33 [get_ports tdo] +set_property IOSTANDARD LVCMOS33 [get_ports tms] +set_property IOSTANDARD LVCMOS33 [get_ports trst_n] +# accept sub-optimal placement +set_property CLOCK_DEDICATED_ROUTE FALSE [get_nets tck_IBUF_inst/O] + + +create_clock -period 100.000 -name tck -waveform {0.000 50.000} [get_ports tck] +set_input_jitter tck 15.000 + +#################################################################################### +# Constraints from file : 'axi_clock_converter_0_clocks.xdc' +#################################################################################### + +set_max_delay -datapath_only -from [get_clocks tck] -to [get_clocks mmcm_clkout0] 5.000 +set_max_delay -datapath_only -from [get_clocks mmcm_clkout0] -to [get_clocks tck] 5.000 + +set_max_delay -datapath_only -from [get_clocks tck] -to [get_clocks c0_sys_clk_p] 5.000 +set_max_delay -datapath_only -from [get_clocks c0_sys_clk_p] -to [get_clocks tck] 5.000 + +set_property C_CLK_INPUT_FREQ_HZ 300000000 [get_debug_cores dbg_hub] +set_property C_ENABLE_CLK_DIVIDER false [get_debug_cores dbg_hub] +set_property C_USER_SCAN_CHAIN 1 [get_debug_cores dbg_hub] +connect_debug_port dbg_hub/clk [get_nets clk_1] diff --git a/fpga/ariane_xilinx.sv b/fpga/ariane_xilinx.sv new file mode 100644 index 0000000000..a48a78a872 --- /dev/null +++ b/fpga/ariane_xilinx.sv @@ -0,0 +1,543 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +module ariane_xilinx ( + input logic c0_sys_clk_p, // Clock + input logic c0_sys_clk_n, // Clock + input logic sys_rst, // active high reset + output logic c0_ddr4_act_n, + output logic [16:0] c0_ddr4_adr, + output logic [1:0] c0_ddr4_ba, + output logic [0:0] c0_ddr4_bg, + output logic [0:0] c0_ddr4_cke, + output logic [0:0] c0_ddr4_odt, + output logic [0:0] c0_ddr4_cs_n, + output logic [0:0] c0_ddr4_ck_t, + output logic [0:0] c0_ddr4_ck_c, + output logic c0_ddr4_reset_n, + inout logic [1:0] c0_ddr4_dm_dbi_n, + inout logic [15:0] c0_ddr4_dq, + inout logic [1:0] c0_ddr4_dqs_c, + inout logic [1:0] c0_ddr4_dqs_t, + input logic tck, + input logic tms, + input logic trst_n, + input logic tdi, + output logic tdo +); + +localparam logic [63:0] RomBase = 64'h10000; +localparam NBSlave = 4; // debug, Instruction fetch, data bypass, data +localparam NBMaster = 3; // debug, ROM, RAM + +localparam logic [63:0] CacheStartAddr = 64'h8000_0000; +localparam AxiAddrWidth = 64; +localparam AxiDataWidth = 64; +localparam AxiIdWidthMaster = 2; +localparam AxiIdWidthSlaves = AxiIdWidthMaster + $clog2(NBSlave); // 4 +localparam AxiUserWidth = 1; + +AXI_BUS #( + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthMaster ), + .AXI_USER_WIDTH ( AxiUserWidth ) +) slave[NBSlave-1:0](); + +AXI_BUS #( + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthMaster ), + .AXI_USER_WIDTH ( AxiUserWidth ) +) slave_slice[NBSlave-1:0](); + +AXI_BUS #( + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_ID_WIDTH ( AxiIdWidthSlaves ), + .AXI_USER_WIDTH ( AxiUserWidth ) +) master[NBMaster-1:0](); + +// disable test-enable +logic test_en; +logic ndmreset; +logic ndmreset_n; +logic debug_req; +logic clk; +logic rst_n; + +// DDR +logic c0_ddr4_ui_clk; +logic c0_init_calib_complete; // left open +logic c0_ddr4_ui_clk_sync_rst; +logic addn_ui_clkout1; + +logic [3:0] s_axi_awid; +logic [31:0] s_axi_awaddr; +logic [7:0] s_axi_awlen; +logic [2:0] s_axi_awsize; +logic [1:0] s_axi_awburst; +logic [0:0] s_axi_awlock; +logic [3:0] s_axi_awcache; +logic [2:0] s_axi_awprot; +logic [3:0] s_axi_awregion; +logic [3:0] s_axi_awqos; +logic s_axi_awvalid; +logic s_axi_awready; +logic [63:0] s_axi_wdata; +logic [7:0] s_axi_wstrb; +logic s_axi_wlast; +logic s_axi_wvalid; +logic s_axi_wready; +logic [3:0] s_axi_bid; +logic [1:0] s_axi_bresp; +logic s_axi_bvalid; +logic s_axi_bready; +logic [3:0] s_axi_arid; +logic [31:0] s_axi_araddr; +logic [7:0] s_axi_arlen; +logic [2:0] s_axi_arsize; +logic [1:0] s_axi_arburst; +logic [0:0] s_axi_arlock; +logic [3:0] s_axi_arcache; +logic [2:0] s_axi_arprot; +logic [3:0] s_axi_arregion; +logic [3:0] s_axi_arqos; +logic s_axi_arvalid; +logic s_axi_arready; +logic [3:0] s_axi_rid; +logic [63:0] s_axi_rdata; +logic [1:0] s_axi_rresp; +logic s_axi_rlast; +logic s_axi_rvalid; +logic s_axi_rready; + +logic [31:0] m_axi_awaddr; +logic [7:0] m_axi_awlen; +logic [2:0] m_axi_awsize; +logic [1:0] m_axi_awburst; +logic [0:0] m_axi_awlock; +logic [3:0] m_axi_awcache; +logic [2:0] m_axi_awprot; +logic [3:0] m_axi_awregion; +logic [3:0] m_axi_awqos; +logic m_axi_awvalid; +logic m_axi_awready; +logic [127:0] m_axi_wdata; +logic [15:0] m_axi_wstrb; +logic m_axi_wlast; +logic m_axi_wvalid; +logic m_axi_wready; +logic [1:0] m_axi_bresp; +logic m_axi_bvalid; +logic m_axi_bready; +logic [31:0] m_axi_araddr; +logic [7:0] m_axi_arlen; +logic [2:0] m_axi_arsize; +logic [1:0] m_axi_arburst; +logic [0:0] m_axi_arlock; +logic [3:0] m_axi_arcache; +logic [2:0] m_axi_arprot; +logic [3:0] m_axi_arregion; +logic [3:0] m_axi_arqos; +logic m_axi_arvalid; +logic m_axi_arready; +logic [127:0] m_axi_rdata; +logic [1:0] m_axi_rresp; +logic m_axi_rlast; +logic m_axi_rvalid; +logic m_axi_rready; + +logic debug_req_valid; +logic debug_req_ready; +logic [6:0] debug_req_bits_addr; +logic [1:0] debug_req_bits_op; +logic [31:0] debug_req_bits_data; +logic debug_resp_valid; +logic debug_resp_ready; +logic [1:0] debug_resp_bits_resp; +logic [31:0] debug_resp_bits_data; + +assign clk = addn_ui_clkout1; +assign rst_n = ~c0_ddr4_ui_clk_sync_rst; +assign test_en = 1'b0; +assign ndmreset_n = ~ndmreset ; + +// Slice the AXI Masters (slave ports on the XBar) +for (genvar i = 0; i < NBSlave; i++) begin : slave_cut_gen + axi_cut #( + .ADDR_WIDTH ( AxiAddrWidth ), + .DATA_WIDTH ( AxiDataWidth ), + .ID_WIDTH ( AxiIdWidthMaster ), + .USER_WIDTH ( AxiUserWidth ) + ) i_axi_cut ( + .clk_i ( clk ), + .rst_ni ( ndmreset_n ), + .in ( slave_slice[i] ), + .out ( slave[i] ) + ); +end + +// --------------- +// AXI Xbar +// --------------- +axi_node_intf_wrap #( + // three ports from Ariane (instruction, data and bypass) + .NB_SLAVE ( NBSlave ), + .NB_MASTER ( NBMaster ), // debug unit, memory unit + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_USER_WIDTH ( AxiUserWidth ), + .AXI_ID_WIDTH ( AxiIdWidthMaster ) +) i_axi_xbar ( + .clk ( clk ), + .rst_n ( ndmreset_n ), + .test_en_i ( test_en ), + .slave ( slave ), + .master ( master ), + .start_addr_i ( {64'h0, RomBase, CacheStartAddr} ), + .end_addr_i ( {64'hFFF, RomBase + 64'hFFFF, CacheStartAddr + 2**24} ) +); + +dm::dmi_req_t debug_req;; +dm::dmi_resp_t debug_resp; + +// --------------- +// Debug Module +// --------------- +dmi_jtag i_dmi_jtag ( + .clk_i ( clk ), + .rst_ni ( rst_n ), + .dmi_rst_no ( ), // keep open + .dmi_req_valid_o ( debug_req_valid ), + .dmi_req_ready_i ( debug_req_ready ), + .dmi_req_o ( debug_req ), + .dmi_resp_valid_i ( debug_resp_valid ), + .dmi_resp_ready_o ( debug_resp_ready ), + .dmi_resp_i ( debug_resp ), + .tck_i ( tck ), + .tms_i ( tms ), + .trst_ni ( trst_n ), + .td_i ( tdi ), + .td_o ( tdo ), + .tdo_oe_o ( ) +); + +// debug module +dm_top #( + // current implementation only supports 1 hart + .NrHarts ( 1 ), + .AxiIdWidth ( AxiIdWidthSlaves ), + .AxiAddrWidth ( AxiAddrWidth ), + .AxiDataWidth ( AxiDataWidth ), + .AxiUserWidth ( AxiUserWidth ) +) i_dm_top ( + .clk_i ( clk ), + .rst_ni ( rst_n ), // PoR + .testmode_i ( test_en ), + .ndmreset_o ( ndmreset ), + .dmactive_o ( ), // active debug session + .debug_req_o ( debug_req ), + .unavailable_i ( '0 ), + .axi_master ( slave_slice[3] ), + .axi_slave ( master[2] ), + .dmi_rst_ni ( rst_n ), + .dmi_req_valid_i ( debug_req_valid ), + .dmi_req_ready_o ( debug_req_ready ), + .dmi_req_i ( debug_req ), + .dmi_resp_valid_o ( debug_resp_valid ), + .dmi_resp_ready_i ( debug_resp_ready ), + .dmi_resp_o ( debug_resp ) +); + +// --------------- +// Core +// --------------- +ariane #( + .CACHE_START_ADDR ( CacheStartAddr ), + .AXI_ID_WIDTH ( AxiIdWidthMaster ), + .AXI_USER_WIDTH ( AxiUserWidth ) +) i_ariane ( + .clk_i ( clk ), + .rst_ni ( ndmreset_n ), + .boot_addr_i ( RomBase ), // start fetching from ROM + .core_id_i ( '0 ), + .cluster_id_i ( '0 ), + .irq_i ( '0 ), + .ipi_i ( '0 ), + .time_irq_i ( '0 ), + .debug_req_i ( debug_req ), + .data_if ( slave_slice[2] ), + .bypass_if ( slave_slice[1] ), + .instr_if ( slave_slice[0] ) +); + +// --------------- +// ROM +// --------------- +logic rom_req; +logic [AxiAddrWidth-1:0] rom_addr; +logic [AxiDataWidth-1:0] rom_rdata; + +axi2mem #( + .AXI_ID_WIDTH ( AxiIdWidthSlaves ), + .AXI_ADDR_WIDTH ( AxiAddrWidth ), + .AXI_DATA_WIDTH ( AxiDataWidth ), + .AXI_USER_WIDTH ( AxiUserWidth ) +) i_axi2rom ( + .clk_i ( clk ), + .rst_ni ( ndmreset_n ), + .slave ( master[1] ), + .req_o ( rom_req ), + .we_o ( ), + .addr_o ( rom_addr ), + .be_o ( ), + .data_o ( ), + .data_i ( rom_rdata ) +); + +bootrom i_bootrom ( + .clk_i ( clk ), + .req_i ( rom_req ), + .addr_i ( rom_addr ), + .rdata_o ( rom_rdata ) +); + +// DDR 4 Subsystem +axi_clock_converter_0 axi_clock_converter ( + .s_axi_aclk(clk), + .s_axi_aresetn(ndmreset_n), + .s_axi_awid(master[0].aw_id), + .s_axi_awaddr(master[0].aw_addr), + .s_axi_awlen(master[0].aw_len), + .s_axi_awsize(master[0].aw_size), + .s_axi_awburst(master[0].aw_burst), + .s_axi_awlock(master[0].aw_lock), + .s_axi_awcache(master[0].aw_cache), + .s_axi_awprot(master[0].aw_prot), + .s_axi_awregion(master[0].aw_region), + .s_axi_awqos(master[0].aw_qos), + .s_axi_awvalid(master[0].aw_valid), + .s_axi_awready(master[0].aw_ready), + .s_axi_wdata(master[0].w_data), + .s_axi_wstrb(master[0].w_strb), + .s_axi_wlast(master[0].w_last), + .s_axi_wvalid(master[0].w_valid), + .s_axi_wready(master[0].w_ready), + .s_axi_bid(master[0].b_id), + .s_axi_bresp(master[0].b_resp), + .s_axi_bvalid(master[0].b_valid), + .s_axi_bready(master[0].b_ready), + .s_axi_arid(master[0].ar_id), + .s_axi_araddr(master[0].ar_addr[31:0]), + .s_axi_arlen(master[0].ar_len), + .s_axi_arsize(master[0].ar_size), + .s_axi_arburst(master[0].ar_burst), + .s_axi_arlock(master[0].ar_lock), + .s_axi_arcache(master[0].ar_cache), + .s_axi_arprot(master[0].ar_prot), + .s_axi_arregion(master[0].ar_region), + .s_axi_arqos(master[0].ar_qos), + .s_axi_arvalid(master[0].ar_valid), + .s_axi_arready(master[0].ar_ready), + .s_axi_rid(master[0].r_id), + .s_axi_rdata(master[0].r_data), + .s_axi_rresp(master[0].r_resp), + .s_axi_rlast(master[0].r_last), + .s_axi_rvalid(master[0].r_valid), + .s_axi_rready(master[0].r_ready), + // to size converter + .m_axi_aclk(c0_ddr4_ui_clk), + .m_axi_aresetn(ndmreset_n), + .m_axi_awid(s_axi_awid), + .m_axi_awaddr(s_axi_awaddr), + .m_axi_awlen(s_axi_awlen), + .m_axi_awsize(s_axi_awsize), + .m_axi_awburst(s_axi_awburst), + .m_axi_awlock(s_axi_awlock), + .m_axi_awcache(s_axi_awcache), + .m_axi_awprot(s_axi_awprot), + .m_axi_awregion(s_axi_awregion), + .m_axi_awqos(s_axi_awqos), + .m_axi_awvalid(s_axi_awvalid), + .m_axi_awready(s_axi_awready), + .m_axi_wdata(s_axi_wdata), + .m_axi_wstrb(s_axi_wstrb), + .m_axi_wlast(s_axi_wlast), + .m_axi_wvalid(s_axi_wvalid), + .m_axi_wready(s_axi_wready), + .m_axi_bid(s_axi_bid), + .m_axi_bresp(s_axi_bresp), + .m_axi_bvalid(s_axi_bvalid), + .m_axi_bready(s_axi_bready), + .m_axi_arid(s_axi_arid), + .m_axi_araddr(s_axi_araddr), + .m_axi_arlen(s_axi_arlen), + .m_axi_arsize(s_axi_arsize), + .m_axi_arburst(s_axi_arburst), + .m_axi_arlock(s_axi_arlock), + .m_axi_arcache(s_axi_arcache), + .m_axi_arprot(s_axi_arprot), + .m_axi_arregion(s_axi_arregion), + .m_axi_arqos(s_axi_arqos), + .m_axi_arvalid(s_axi_arvalid), + .m_axi_arready(s_axi_arready), + .m_axi_rid(s_axi_rid), + .m_axi_rdata(s_axi_rdata), + .m_axi_rresp(s_axi_rresp), + .m_axi_rlast(s_axi_rlast), + .m_axi_rvalid(s_axi_rvalid), + .m_axi_rready(s_axi_rready) +); + +axi_dwidth_converter_0 axi_size_converter ( + .s_axi_aclk(c0_ddr4_ui_clk), + .s_axi_aresetn(ndmreset_n), + .s_axi_awid, + .s_axi_awaddr, + .s_axi_awlen, + .s_axi_awsize, + .s_axi_awburst, + .s_axi_awlock, + .s_axi_awcache, + .s_axi_awprot, + .s_axi_awregion, + .s_axi_awqos, + .s_axi_awvalid, + .s_axi_awready, + .s_axi_wdata, + .s_axi_wstrb, + .s_axi_wlast, + .s_axi_wvalid, + .s_axi_wready, + .s_axi_bid, + .s_axi_bresp, + .s_axi_bvalid, + .s_axi_bready, + .s_axi_arid, + .s_axi_araddr, + .s_axi_arlen, + .s_axi_arsize, + .s_axi_arburst, + .s_axi_arlock, + .s_axi_arcache, + .s_axi_arprot, + .s_axi_arregion, + .s_axi_arqos, + .s_axi_arvalid, + .s_axi_arready, + .s_axi_rid, + .s_axi_rdata, + .s_axi_rresp, + .s_axi_rlast, + .s_axi_rvalid, + .s_axi_rready, + .m_axi_awaddr, + .m_axi_awlen, + .m_axi_awsize, + .m_axi_awburst, + .m_axi_awlock, + .m_axi_awcache, + .m_axi_awprot, + .m_axi_awregion, + .m_axi_awqos, + .m_axi_awvalid, + .m_axi_awready, + .m_axi_wdata, + .m_axi_wstrb, + .m_axi_wlast, + .m_axi_wvalid, + .m_axi_wready, + .m_axi_bresp, + .m_axi_bvalid, + .m_axi_bready, + .m_axi_araddr, + .m_axi_arlen, + .m_axi_arsize, + .m_axi_arburst, + .m_axi_arlock, + .m_axi_arcache, + .m_axi_arprot, + .m_axi_arregion, + .m_axi_arqos, + .m_axi_arvalid, + .m_axi_arready, + .m_axi_rdata, + .m_axi_rresp, + .m_axi_rlast, + .m_axi_rvalid, + .m_axi_rready +); + +ddr4_0 ddr_i ( + .sys_rst, // input + .c0_sys_clk_p, + .c0_sys_clk_n, + .c0_ddr4_act_n, + .c0_ddr4_adr, + .c0_ddr4_ba, + .c0_ddr4_bg, + .c0_ddr4_cke, + .c0_ddr4_odt, + .c0_ddr4_cs_n, + .c0_ddr4_ck_t, + .c0_ddr4_ck_c, + .c0_ddr4_reset_n, + .c0_ddr4_dm_dbi_n, + .c0_ddr4_dq, + .c0_ddr4_dqs_c, + .c0_ddr4_dqs_t, + .c0_init_calib_complete, + .c0_ddr4_ui_clk, // 1/4 of PHY clock, 300/4 = 75 MHz + .c0_ddr4_ui_clk_sync_rst, + .addn_ui_clkout1, + .dbg_clk(), // output + .c0_ddr4_aresetn(ndmreset_n), + .c0_ddr4_s_axi_awid('0), + .c0_ddr4_s_axi_awaddr(m_axi_awaddr), + .c0_ddr4_s_axi_awlen(m_axi_awlen), + .c0_ddr4_s_axi_awsize(m_axi_awsize), + .c0_ddr4_s_axi_awburst(m_axi_awburst), + .c0_ddr4_s_axi_awlock(m_axi_awlock), + .c0_ddr4_s_axi_awcache(m_axi_awcache), + .c0_ddr4_s_axi_awprot(m_axi_awprot), + .c0_ddr4_s_axi_awqos(m_axi_awqos), + .c0_ddr4_s_axi_awvalid(m_axi_awvalid), + .c0_ddr4_s_axi_awready(m_axi_awready), + .c0_ddr4_s_axi_wdata(m_axi_wdata), + .c0_ddr4_s_axi_wstrb(m_axi_wstrb), + .c0_ddr4_s_axi_wlast(m_axi_wlast), + .c0_ddr4_s_axi_wvalid(m_axi_wvalid), + .c0_ddr4_s_axi_wready(m_axi_wready), + .c0_ddr4_s_axi_bready(m_axi_bready), + .c0_ddr4_s_axi_bid(), + .c0_ddr4_s_axi_bresp(m_axi_bresp), + .c0_ddr4_s_axi_bvalid(m_axi_bvalid), + .c0_ddr4_s_axi_arid('0), + .c0_ddr4_s_axi_araddr(m_axi_araddr), + .c0_ddr4_s_axi_arlen(m_axi_arlen), + .c0_ddr4_s_axi_arsize(m_axi_arsize), + .c0_ddr4_s_axi_arburst(m_axi_arburst), + .c0_ddr4_s_axi_arlock(m_axi_arlock), + .c0_ddr4_s_axi_arcache(m_axi_arcache), + .c0_ddr4_s_axi_arprot(m_axi_arprot), + .c0_ddr4_s_axi_arqos(m_axi_arqos), + .c0_ddr4_s_axi_arvalid(m_axi_arvalid), + .c0_ddr4_s_axi_arready(m_axi_arready), + .c0_ddr4_s_axi_rready(m_axi_rready), + .c0_ddr4_s_axi_rid(), + .c0_ddr4_s_axi_rdata(m_axi_rdata), + .c0_ddr4_s_axi_rresp(m_axi_rresp), + .c0_ddr4_s_axi_rlast(m_axi_rlast), + .c0_ddr4_s_axi_rvalid(m_axi_rvalid), + .dbg_bus() +); + +endmodule \ No newline at end of file diff --git a/include/ariane_pkg.sv b/include/ariane_pkg.sv index c6ac53fe94..b920db5518 100644 --- a/include/ariane_pkg.sv +++ b/include/ariane_pkg.sv @@ -32,7 +32,9 @@ package ariane_pkg; localparam BITS_SATURATION_COUNTER = 2; localparam NR_COMMIT_PORTS = 2; - localparam logic [63:0] ISA_CODE = (1 << 2) // C - Compressed extension + localparam logic [63:0] ISA_CODE = + | (1 << 0) // A - Atomic extension + | (1 << 2) // C - Compressed extension | (1 << 8) // I - RV32I/64I/128I base ISA | (1 << 12) // M - Integer Multiply/Divide extension | (0 << 13) // N - User level interrupts supported @@ -40,7 +42,7 @@ package ariane_pkg; | (1 << 20) // U - User mode implemented | (0 << 23) // X - Non-standard extensions present | (1 << 63); // RV64 - localparam ENABLE_RENAME = 1'b0; + localparam ENABLE_RENAME = 1'b1; // 32 registers + 1 bit for re-naming = 6 localparam REG_ADDR_SIZE = 6; @@ -54,6 +56,17 @@ package ariane_pkg; datasize: dm::DataCount, dataaddr: dm::DataAddr }; + + + // enables a commit log which matches spikes commit log format for easier trace comparison + localparam bit ENABLE_SPIKE_COMMIT_LOG = 1'b0; + + // ------------- Dangerouse ------------- + // if set to zero a flush will not invalidate the cache-lines, in a single core environment + // where coherence is not necessary this can improve performance. This needs to be switched on + // when more than one core is in a system + localparam logic INVALIDATE_ON_FLUSH = 1'b1; + // --------------- // Fetch Stage // --------------- @@ -133,7 +146,13 @@ package ariane_pkg; } bht_prediction_t; typedef enum logic[3:0] { - NONE, LOAD, STORE, ALU, CTRL_FLOW, MULT, CSR + NONE, // 0 + LOAD, // 1 + STORE, // 2 + ALU, // 3 + CTRL_FLOW, // 4 + MULT, // 5 + CSR // 6 } fu_t; localparam EXC_OFF_RST = 8'h80; @@ -143,10 +162,10 @@ package ariane_pkg; // --------------- // I$ - parameter int unsigned ICACHE_INDEX_WIDTH = 12; // in bit - parameter int unsigned ICACHE_TAG_WIDTH = 44; // in bit - parameter int unsigned ICACHE_SET_ASSOC = 4; - parameter int unsigned ICACHE_LINE_WIDTH = 128; // in bit + localparam int unsigned ICACHE_INDEX_WIDTH = 12; // in bit + localparam int unsigned ICACHE_TAG_WIDTH = 44; // in bit + localparam int unsigned ICACHE_SET_ASSOC = 4; + localparam int unsigned ICACHE_LINE_WIDTH = 128; // in bit // D$ localparam int unsigned DCACHE_INDEX_WIDTH = 12; @@ -183,17 +202,12 @@ package ariane_pkg; DIV, DIVU, DIVW, DIVUW, REM, REMU, REMW, REMUW } fu_op; - // ---------------------- - // Extract Bytes from Op - // ---------------------- - // TODO: Add atomics - function automatic logic [1:0] extract_transfer_size (fu_op op); - case (op) - LD, SD: return 2'b11; - LW, LWU, SW: return 2'b10; - LH, LHU, SH: return 2'b01; - LB, SB, LBU: return 2'b00; - default: return 2'b11; + function automatic logic is_amo (fu_op op); + case (op) inside + [AMO_LRW:AMO_MINDU]: begin + return 1'b1; + end + default: return 1'b0; endcase endfunction @@ -245,7 +259,8 @@ package ariane_pkg; // Atomics // -------------------- typedef enum logic [3:0] { - AMO_NONE, AMO_LR, AMO_SC, AMO_SWAP, AMO_ADD, AMO_AND, AMO_OR, AMO_XOR, AMO_MAX, AMO_MAXU, AMO_MIN, AMO_MINU + AMO_NONE, AMO_LR, AMO_SC, AMO_SWAP, AMO_ADD, AMO_AND, + AMO_OR, AMO_XOR, AMO_MAX, AMO_MAXU, AMO_MIN, AMO_MINU } amo_t; typedef struct packed { @@ -266,7 +281,6 @@ package ariane_pkg; // ---------------------- // cache request ports // ---------------------- - // I$ address translation requests typedef struct packed { logic fetch_valid; // address translation valid @@ -295,6 +309,24 @@ package ariane_pkg; exception_t ex; // we've encountered an exception } icache_dreq_o_t; + // AMO request going to cache. this request is unconditionally valid as soon + // as request goes high. + // Furthermore, those signals are kept stable until the response indicates + // completion by asserting ack. + typedef struct packed { + logic req; // this request is valid + amo_t amo_op; // atomic memory operation to perform + logic [1:0] size; // 2'b10 --> word operation, 2'b11 --> double word operation + logic [63:0] operand_a; // address + logic [63:0] operand_b; // data as layuoted in the register + } amo_req_t; + + // AMO response coming from cache. + typedef struct packed { + logic ack; // response is valid + logic [63:0] result; // sign-extended, result + } amo_resp_t; + // D$ data requests typedef struct packed { logic [DCACHE_INDEX_WIDTH-1:0] address_index; @@ -306,7 +338,6 @@ package ariane_pkg; logic [1:0] data_size; logic kill_req; logic tag_valid; - amo_t amo_op; } dcache_req_i_t; typedef struct packed { @@ -337,4 +368,92 @@ package ariane_pkg; return { {51 {instruction_i[31]}}, instruction_i[31], instruction_i[7], instruction_i[30:25], instruction_i[11:8], 1'b0 }; endfunction + // ---------------------- + // LSU Functions + // ---------------------- + // align data to address e.g.: shift data to be naturally 64 + function automatic logic [63:0] data_align (logic [2:0] addr, logic [63:0] data); + case (addr) + 3'b000: return data; + 3'b001: return {data[55:0], data[63:56]}; + 3'b010: return {data[47:0], data[63:48]}; + 3'b011: return {data[39:0], data[63:40]}; + 3'b100: return {data[31:0], data[63:32]}; + 3'b101: return {data[23:0], data[63:24]}; + 3'b110: return {data[15:0], data[63:16]}; + 3'b111: return {data[7:0], data[63:8]}; + endcase + return data; + endfunction + + // generate byte enable mask + function automatic logic [7:0] be_gen(logic [2:0] addr, logic [1:0] size); + case (size) + 2'b11: begin + return 8'b1111_1111; + end + 2'b10: begin + case (addr[2:0]) + 3'b000: return 8'b0000_1111; + 3'b001: return 8'b0001_1110; + 3'b010: return 8'b0011_1100; + 3'b011: return 8'b0111_1000; + 3'b100: return 8'b1111_0000; + endcase + end + 2'b01: begin + case (addr[2:0]) + 3'b000: return 8'b0000_0011; + 3'b001: return 8'b0000_0110; + 3'b010: return 8'b0000_1100; + 3'b011: return 8'b0001_1000; + 3'b100: return 8'b0011_0000; + 3'b101: return 8'b0110_0000; + 3'b110: return 8'b1100_0000; + endcase + end + 2'b00: begin + case (addr[2:0]) + 3'b000: return 8'b0000_0001; + 3'b001: return 8'b0000_0010; + 3'b010: return 8'b0000_0100; + 3'b011: return 8'b0000_1000; + 3'b100: return 8'b0001_0000; + 3'b101: return 8'b0010_0000; + 3'b110: return 8'b0100_0000; + 3'b111: return 8'b1000_0000; + endcase + end + endcase + return 8'b0; + endfunction + + // ---------------------- + // Extract Bytes from Op + // ---------------------- + function automatic logic [1:0] extract_transfer_size(fu_op op); + case (op) + LD, SD, + AMO_LRD, AMO_SCD, + AMO_SWAPD, AMO_ADDD, + AMO_ANDD, AMO_ORD, + AMO_XORD, AMO_MAXD, + AMO_MAXDU, AMO_MIND, + AMO_MINDU: begin + return 2'b11; + end + LW, LWU, SW, + AMO_LRW, AMO_SCW, + AMO_SWAPW, AMO_ADDW, + AMO_ANDW, AMO_ORW, + AMO_XORW, AMO_MAXW, + AMO_MAXWU, AMO_MINW, + AMO_MINWU: begin + return 2'b10; + end + LH, LHU, SH: return 2'b01; + LB, SB, LBU: return 2'b00; + default: return 2'b11; + endcase + endfunction endpackage diff --git a/include/axi_if.sv b/include/axi_if.sv deleted file mode 100644 index 4797d2de90..0000000000 --- a/include/axi_if.sv +++ /dev/null @@ -1,128 +0,0 @@ -// Copyright 2015 - 2018 ETH Zurich and University of Bologna. -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. -// -//////////////////////////////////////////////////////////////////////////////// -// Only general functions and definitions are defined here // -// These functions are not intended to be modified // -//////////////////////////////////////////////////////////////////////////////// - -interface AXI_BUS -#( - parameter AXI_ADDR_WIDTH = 64, - parameter AXI_DATA_WIDTH = 64, - parameter AXI_ID_WIDTH = 10, - parameter AXI_USER_WIDTH = 6 -); - - localparam AXI_STRB_WIDTH = AXI_DATA_WIDTH/8; - - logic [AXI_ADDR_WIDTH-1:0] aw_addr; - logic [2:0] aw_prot; - logic [3:0] aw_region; - logic [7:0] aw_len; - logic [2:0] aw_size; - logic [1:0] aw_burst; - logic aw_lock; - logic [3:0] aw_cache; - logic [3:0] aw_qos; - logic [AXI_ID_WIDTH-1:0] aw_id; - logic [AXI_USER_WIDTH-1:0] aw_user; - logic aw_ready; - logic aw_valid; - - logic [AXI_ADDR_WIDTH-1:0] ar_addr; - logic [2:0] ar_prot; - logic [3:0] ar_region; - logic [7:0] ar_len; - logic [2:0] ar_size; - logic [1:0] ar_burst; - logic ar_lock; - logic [3:0] ar_cache; - logic [3:0] ar_qos; - logic [AXI_ID_WIDTH-1:0] ar_id; - logic [AXI_USER_WIDTH-1:0] ar_user; - logic ar_ready; - logic ar_valid; - - logic w_valid; - logic [AXI_DATA_WIDTH-1:0] w_data; - logic [AXI_STRB_WIDTH-1:0] w_strb; - logic [AXI_USER_WIDTH-1:0] w_user; - logic w_last; - logic w_ready; - - logic [AXI_DATA_WIDTH-1:0] r_data; - logic [1:0] r_resp; - logic r_last; - logic [AXI_ID_WIDTH-1:0] r_id; - logic [AXI_USER_WIDTH-1:0] r_user; - logic r_ready; - logic r_valid; - - logic [1:0] b_resp; - logic [AXI_ID_WIDTH-1:0] b_id; - logic [AXI_USER_WIDTH-1:0] b_user; - logic b_ready; - logic b_valid; - - // Master Side - //*************************************** - modport Master - ( - - output aw_valid, output aw_addr, output aw_prot, output aw_region, - output aw_len, output aw_size, output aw_burst, output aw_lock, - output aw_cache, output aw_qos, output aw_id, output aw_user, - input aw_ready, - - output ar_valid, output ar_addr, output ar_prot, output ar_region, - output ar_len, output ar_size, output ar_burst, output ar_lock, - output ar_cache, output ar_qos, output ar_id, output ar_user, - input ar_ready, - - output w_valid, output w_data, output w_strb, output w_user, output w_last, - input w_ready, - - input r_valid, input r_data, input r_resp, input r_last, input r_id, input r_user, - output r_ready, - - input b_valid, input b_resp, input b_id, input b_user, - output b_ready - - ); - - // Slave Side - //*************************************** - modport Slave - ( - - input aw_valid, input aw_addr, input aw_prot, input aw_region, - input aw_len, input aw_size, input aw_burst, input aw_lock, - input aw_cache, input aw_qos, input aw_id, input aw_user, - output aw_ready, - - input ar_valid, input ar_addr, input ar_prot, input ar_region, - input ar_len, input ar_size, input ar_burst, input ar_lock, - input ar_cache, input ar_qos, input ar_id, input ar_user, - output ar_ready, - - input w_valid, input w_data, input w_strb, input w_user, input w_last, - output w_ready, - - output r_valid, output r_data, output r_resp, output r_last, output r_id, output r_user, - input r_ready, - - output b_valid, output b_resp, output b_id, output b_user, - input b_ready - - ); - -endinterface - diff --git a/include/axi_intf.sv b/include/axi_intf.sv new file mode 100644 index 0000000000..2df625fae1 --- /dev/null +++ b/include/axi_intf.sv @@ -0,0 +1,321 @@ +// Copyright (c) 2014-2018 ETH Zurich, University of Bologna +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Fabian Schuiki +// +// This file defines the interfaces we support. + +import axi_pkg::*; + + +/// An AXI4 interface. +interface AXI_BUS #( + parameter AXI_ADDR_WIDTH = -1, + parameter AXI_DATA_WIDTH = -1, + parameter AXI_ID_WIDTH = -1, + parameter AXI_USER_WIDTH = -1 +); + + localparam AXI_STRB_WIDTH = AXI_DATA_WIDTH / 8; + + typedef logic [AXI_ID_WIDTH-1:0] id_t; + typedef logic [AXI_ADDR_WIDTH-1:0] addr_t; + typedef logic [AXI_DATA_WIDTH-1:0] data_t; + typedef logic [AXI_STRB_WIDTH-1:0] strb_t; + typedef logic [AXI_USER_WIDTH-1:0] user_t; + + id_t aw_id; + addr_t aw_addr; + logic [7:0] aw_len; + logic [2:0] aw_size; + burst_t aw_burst; + logic aw_lock; + cache_t aw_cache; + prot_t aw_prot; + qos_t aw_qos; + region_t aw_region; + user_t aw_user; + logic aw_valid; + logic aw_ready; + + data_t w_data; + strb_t w_strb; + logic w_last; + user_t w_user; + logic w_valid; + logic w_ready; + + id_t b_id; + resp_t b_resp; + user_t b_user; + logic b_valid; + logic b_ready; + + id_t ar_id; + addr_t ar_addr; + logic [7:0] ar_len; + logic [2:0] ar_size; + burst_t ar_burst; + logic ar_lock; + cache_t ar_cache; + prot_t ar_prot; + qos_t ar_qos; + region_t ar_region; + user_t ar_user; + logic ar_valid; + logic ar_ready; + + id_t r_id; + data_t r_data; + resp_t r_resp; + logic r_last; + user_t r_user; + logic r_valid; + logic r_ready; + + modport Master ( + output aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_region, aw_user, aw_valid, input aw_ready, + output w_data, w_strb, w_last, w_user, w_valid, input w_ready, + input b_id, b_resp, b_user, b_valid, output b_ready, + output ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_valid, input ar_ready, + input r_id, r_data, r_resp, r_last, r_user, r_valid, output r_ready + ); + + modport Slave ( + input aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_region, aw_user, aw_valid, output aw_ready, + input w_data, w_strb, w_last, w_user, w_valid, output w_ready, + output b_id, b_resp, b_user, b_valid, input b_ready, + input ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_valid, output ar_ready, + output r_id, r_data, r_resp, r_last, r_user, r_valid, input r_ready + ); + + /// The interface as an output (issuing requests, initiator, master). + modport out ( + output aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_region, aw_user, aw_valid, input aw_ready, + output w_data, w_strb, w_last, w_user, w_valid, input w_ready, + input b_id, b_resp, b_user, b_valid, output b_ready, + output ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_valid, input ar_ready, + input r_id, r_data, r_resp, r_last, r_user, r_valid, output r_ready + ); + + /// The interface as an input (accepting requests, target, slave). + modport in ( + input aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_region, aw_user, aw_valid, output aw_ready, + input w_data, w_strb, w_last, w_user, w_valid, output w_ready, + output b_id, b_resp, b_user, b_valid, input b_ready, + input ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_valid, output ar_ready, + output r_id, r_data, r_resp, r_last, r_user, r_valid, input r_ready + ); + +endinterface + + +/// An asynchronous AXI4 interface. +interface AXI_BUS_ASYNC +#( + parameter AXI_ADDR_WIDTH = -1, + parameter AXI_DATA_WIDTH = -1, + parameter AXI_ID_WIDTH = -1, + parameter AXI_USER_WIDTH = -1, + parameter BUFFER_WIDTH = -1 +); + + localparam AXI_STRB_WIDTH = AXI_DATA_WIDTH / 8; + + + logic [AXI_ID_WIDTH-1:0] aw_id; + logic [AXI_ADDR_WIDTH-1:0] aw_addr; + logic [7:0] aw_len; + logic [2:0] aw_size; + logic [1:0] aw_burst; + logic aw_lock; + logic [3:0] aw_cache; + logic [2:0] aw_prot; + logic [3:0] aw_qos; + logic [3:0] aw_region; + logic [AXI_USER_WIDTH-1:0] aw_user; + logic [BUFFER_WIDTH-1:0] aw_writetoken; + logic [BUFFER_WIDTH-1:0] aw_readpointer; + + logic [AXI_DATA_WIDTH-1:0] w_data; + logic [AXI_STRB_WIDTH-1:0] w_strb; + logic w_last; + logic [AXI_USER_WIDTH-1:0] w_user; + logic [BUFFER_WIDTH-1:0] w_writetoken; + logic [BUFFER_WIDTH-1:0] w_readpointer; + + logic [AXI_ID_WIDTH-1:0] b_id; + logic [1:0] b_resp; + logic [AXI_USER_WIDTH-1:0] b_user; + logic [BUFFER_WIDTH-1:0] b_writetoken; + logic [BUFFER_WIDTH-1:0] b_readpointer; + + logic [AXI_ID_WIDTH-1:0] ar_id; + logic [AXI_ADDR_WIDTH-1:0] ar_addr; + logic [7:0] ar_len; + logic [2:0] ar_size; + logic [1:0] ar_burst; + logic ar_lock; + logic [3:0] ar_cache; + logic [2:0] ar_prot; + logic [3:0] ar_qos; + logic [3:0] ar_region; + logic [AXI_USER_WIDTH-1:0] ar_user; + logic [BUFFER_WIDTH-1:0] ar_writetoken; + logic [BUFFER_WIDTH-1:0] ar_readpointer; + + logic [AXI_ID_WIDTH-1:0] r_id; + logic [AXI_DATA_WIDTH-1:0] r_data; + logic [1:0] r_resp; + logic r_last; + logic [AXI_USER_WIDTH-1:0] r_user; + logic [BUFFER_WIDTH-1:0] r_writetoken; + logic [BUFFER_WIDTH-1:0] r_readpointer; + + modport Master ( + output aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_region, aw_user, aw_writetoken, input aw_readpointer, + output w_data, w_strb, w_last, w_user, w_writetoken, input w_readpointer, + input b_id, b_resp, b_user, b_writetoken, output b_readpointer, + output ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_writetoken, input ar_readpointer, + input r_id, r_data, r_resp, r_last, r_user, r_writetoken, output r_readpointer + ); + + modport Slave ( + input aw_id, aw_addr, aw_len, aw_size, aw_burst, aw_lock, aw_cache, aw_prot, aw_qos, aw_region, aw_user, aw_writetoken, output aw_readpointer, + input w_data, w_strb, w_last, w_user, w_writetoken, output w_readpointer, + output b_id, b_resp, b_user, b_writetoken, input b_readpointer, + input ar_id, ar_addr, ar_len, ar_size, ar_burst, ar_lock, ar_cache, ar_prot, ar_qos, ar_region, ar_user, ar_writetoken, output ar_readpointer, + output r_id, r_data, r_resp, r_last, r_user, r_writetoken, input r_readpointer + ); + +endinterface + + +/// An AXI4-Lite interface. +interface AXI_LITE #( + parameter AXI_ADDR_WIDTH = -1, + parameter AXI_DATA_WIDTH = -1 +); + + localparam AXI_STRB_WIDTH = AXI_DATA_WIDTH / 8; + + typedef logic [AXI_ADDR_WIDTH-1:0] addr_t; + typedef logic [AXI_DATA_WIDTH-1:0] data_t; + typedef logic [AXI_STRB_WIDTH-1:0] strb_t; + + // AW channel + addr_t aw_addr; + logic aw_valid; + logic aw_ready; + + data_t w_data; + strb_t w_strb; + logic w_valid; + logic w_ready; + + resp_t b_resp; + logic b_valid; + logic b_ready; + + addr_t ar_addr; + logic ar_valid; + logic ar_ready; + + data_t r_data; + resp_t r_resp; + logic r_valid; + logic r_ready; + + modport Master ( + output aw_addr, aw_valid, input aw_ready, + output w_data, w_strb, w_valid, input w_ready, + input b_resp, b_valid, output b_ready, + output ar_addr, ar_valid, input ar_ready, + input r_data, r_resp, r_valid, output r_ready + ); + + modport Slave ( + input aw_addr, aw_valid, output aw_ready, + input w_data, w_strb, w_valid, output w_ready, + output b_resp, b_valid, input b_ready, + input ar_addr, ar_valid, output ar_ready, + output r_data, r_resp, r_valid, input r_ready + ); + + /// The interface as an output (issuing requests, initiator, master). + modport out ( + output aw_addr, aw_valid, input aw_ready, + output w_data, w_strb, w_valid, input w_ready, + input b_resp, b_valid, output b_ready, + output ar_addr, ar_valid, input ar_ready, + input r_data, r_resp, r_valid, output r_ready + ); + + /// The interface as an input (accepting requests, target, slave). + modport in ( + input aw_addr, aw_valid, output aw_ready, + input w_data, w_strb, w_valid, output w_ready, + output b_resp, b_valid, input b_ready, + input ar_addr, ar_valid, output ar_ready, + output r_data, r_resp, r_valid, input r_ready + ); + +endinterface + + +/// An AXI routing table. +/// +/// For each slave, multiple rules can be defined. Each rule consists of an +/// address mask and a base. Addresses are masked and then compared against the +/// base to decide where transfers need to go. +interface AXI_ROUTING_RULES #( + /// The address width. + parameter int AXI_ADDR_WIDTH = -1, + /// The number of slaves in the routing table. + parameter int NUM_SLAVE = -1, + /// The number of rules in the routing table. + parameter int NUM_RULES = -1 +); + + struct packed { + logic enabled; + logic [AXI_ADDR_WIDTH-1:0] mask; + logic [AXI_ADDR_WIDTH-1:0] base; + } [NUM_RULES-1:0] rules [NUM_SLAVE]; + + modport xbar(input rules); + modport cfg(output rules); + +endinterface + + +/// An AXI arbitration interface. +interface AXI_ARBITRATION #( + /// The number of requestors. + parameter int NUM_REQ = -1 +); + + // Incoming requests. + logic [NUM_REQ-1:0] in_req; + logic [NUM_REQ-1:0] in_ack; + + // Outgoing request. + logic out_req; + logic out_ack; + logic [$clog2(NUM_REQ)-1:0] out_sel; + + // The arbiter side of the interface. + modport arb(input in_req, out_ack, output out_req, out_sel, in_ack); + + // The requestor side of the interface. + modport req(output in_req, out_ack, input out_req, out_sel, in_ack); + +endinterface diff --git a/include/riscv_pkg.sv b/include/riscv_pkg.sv index 455af79c09..7a223e444c 100644 --- a/include/riscv_pkg.sv +++ b/include/riscv_pkg.sv @@ -127,12 +127,25 @@ package riscv; logic [6:0] opcode; } utype_t; + // atomic instructions + typedef struct packed { + logic [31:27] funct5; + logic aq; + logic rl; + logic [24:20] rs2; + logic [19:15] rs1; + logic [14:12] funct3; + logic [11:7] rd; + logic [6:0] opcode; + } atype_t; + typedef union packed { logic [31:0] instr; rtype_t rtype; itype_t itype; stype_t stype; utype_t utype; + atype_t atype; } instruction_t; // -------------------- @@ -158,7 +171,7 @@ package riscv; localparam OpcodeCJ = 3'b101; localparam OpcodeCBeqz = 3'b110; localparam OpcodeCBnez = 3'b111; - + localparam OpcodeC2JalrMvAdd = 3'b100; // ---------------------- // Performance Counters // ---------------------- @@ -381,4 +394,23 @@ package riscv; function automatic logic [31:0] illegal (); return 32'h00000000; endfunction + + + // trace log compatible to spikes commit log feature + // pragma translate_off + function string spikeCommitLog(logic [63:0] pc, priv_lvl_t priv_lvl, logic [31:0] instr, logic [4:0] rd, logic [63:0] result); + string rd_s; + + if (rd < 10) rd_s = $sformatf("x %0d", rd); + else rd_s = $sformatf("x%0d", rd); + + if (rd != 0) begin + // 0 0x0000000080000118 (0xeecf8f93) x31 0x0000000080004000 + return $sformatf("%d 0x%h (0x%h) %s 0x%h\n", priv_lvl, pc, instr, rd_s, result); + end else begin + // 0 0x000000008000019c (0x0040006f) + return $sformatf("%d 0x%h (0x%h)\n", priv_lvl, pc, instr); + end + endfunction + // pragma translate_on endpackage diff --git a/riscv-torture b/riscv-torture deleted file mode 160000 index 4e1c13adc5..0000000000 --- a/riscv-torture +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 4e1c13adc5c1bb1128c5c57349e7e2d78fafc139 diff --git a/src/amo_buffer.sv b/src/amo_buffer.sv new file mode 100644 index 0000000000..89ed8484ac --- /dev/null +++ b/src/amo_buffer.sv @@ -0,0 +1,83 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 20.09.2018 +// Description: Buffers AMO requests +// This unit buffers an atomic memory operations for the cache subsyste. +// Furthermore it handles interfacing with the commit stage + +module amo_buffer ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // pipeline flush + + input logic valid_i, // AMO is valid + output logic ready_o, // AMO unit is ready + input ariane_pkg::amo_t amo_op_i, // AMO Operation + input logic [63:0] paddr_i, // physical address of store which needs to be placed in the queue + input logic [63:0] data_i, // data which is placed in the queue + input logic [1:0] data_size_i, // type of request we are making (e.g.: bytes to write) + // D$ + output ariane_pkg::amo_req_t amo_req_o, // request to cache subsytem + input ariane_pkg::amo_resp_t amo_resp_i, // response from cache subsystem + // Auxiliary signals + input logic amo_valid_commit_i, // We have a vaild AMO in the commit stage + input logic no_st_pending_i // there is currently no store pending anymore +); + logic flush_amo_buffer; + logic amo_valid; + + typedef struct packed { + ariane_pkg::amo_t op; + logic [63:0] paddr; + logic [63:0] data; + logic [1:0] size; + } amo_op_t ; + + amo_op_t amo_data_in, amo_data_out; + + // validate this request as soon as all stores have drained and the AMO is in the commit stage + assign amo_req_o.req = no_st_pending_i & amo_valid_commit_i & amo_valid; + assign amo_req_o.amo_op = amo_data_out.op; + assign amo_req_o.size = amo_data_out.size; + assign amo_req_o.operand_a = amo_data_out.paddr; + assign amo_req_o.operand_b = amo_data_out.data; + + assign amo_data_in.op = amo_op_i; + assign amo_data_in.data = data_i; + assign amo_data_in.paddr = paddr_i; + assign amo_data_in.size = data_size_i; + + // only flush if we are currently not committing the AMO + // e.g.: it is not speculative anymore + assign flush_amo_buffer = flush_i & !amo_valid_commit_i; + + fifo_v2 #( + .DEPTH ( 1 ), + .ALM_EMPTY_TH ( 0 ), + .ALM_FULL_TH ( 0 ), + .dtype ( amo_op_t ) + ) i_amo_fifo ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( flush_amo_buffer ), + .testmode_i ( 1'b0 ), + .full_o ( amo_valid ), + .empty_o ( ready_o ), + .alm_full_o ( ), // left open + .alm_empty_o ( ), // left open + .data_i ( amo_data_in ), + .push_i ( valid_i ), + .data_o ( amo_data_out ), + .pop_i ( amo_resp_i.ack ) + ); + +endmodule \ No newline at end of file diff --git a/src/ariane.sv b/src/ariane.sv index a9a2a0ceb5..0f8829049f 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -26,7 +26,6 @@ module ariane #( )( input logic clk_i, input logic rst_ni, - input logic test_en_i, // enable all clock gates for testing // Core ID, Cluster ID and boot address are considered more or less static input logic [63:0] boot_addr_i, // reset boot address input logic [ 3:0] core_id_i, // core id in a multicore environment (reflected in a CSR) @@ -130,6 +129,7 @@ module ariane #( logic lsu_commit_commit_ex; logic lsu_commit_ready_ex_commit; logic no_st_pending_ex_commit; + logic amo_valid_commit; // -------------- // ID <-> COMMIT // -------------- @@ -160,7 +160,7 @@ module ariane #( logic tsr_csr_id; logic dcache_en_csr_nbdcache; logic icache_en_csr; - logic debug_mode_csr_id; + logic debug_mode; logic single_step_csr_commit; // ---------------------------- // Performance Counters <-> * @@ -192,11 +192,20 @@ module ariane #( logic dcache_flush_ctrl_cache; logic dcache_flush_ack_cache_ctrl; logic set_debug_pc; + logic flush_commit; icache_areq_i_t icache_areq_ex_cache; icache_areq_o_t icache_areq_cache_ex; icache_dreq_i_t icache_dreq_if_cache; icache_dreq_o_t icache_dreq_cache_if; + + amo_req_t amo_req; + amo_resp_t amo_resp; + + logic debug_req; + // Disable debug during AMO commit + assign debug_req = debug_req_i & ~amo_valid_commit; + // ---------------- // DCache <-> * // ---------------- @@ -209,6 +218,7 @@ module ariane #( frontend i_frontend ( .flush_i ( flush_ctrl_if ), // not entirely correct .flush_bp_i ( 1'b0 ), + .debug_mode_i ( debug_mode ), .boot_addr_i ( boot_addr_i ), .icache_dreq_i ( icache_dreq_cache_if ), .icache_dreq_o ( icache_dreq_if_cache ), @@ -242,7 +252,7 @@ module ariane #( .issue_instr_ack_i ( issue_instr_issue_id ), .priv_lvl_i ( priv_lvl ), - .debug_mode_i ( debug_mode_csr_id ), + .debug_mode_i ( debug_mode ), .tvm_i ( tvm_csr_id ), .tw_i ( tw_csr_id ), .tsr_i ( tsr_csr_id ), @@ -292,6 +302,7 @@ module ariane #( .csr_ready_i ( csr_ready_ex_id ), .csr_valid_o ( csr_valid_id_ex ), + .resolved_branch_i ( resolved_branch ), .trans_id_i ( {alu_trans_id_ex_id, lsu_trans_id_ex_id, branch_trans_id_ex_id, csr_trans_id_ex_id, mult_trans_id_ex_id }), .wbdata_i ( {alu_result_ex_id, lsu_result_ex_id, branch_result_ex_id, csr_result_ex_id, mult_result_ex_id }), .ex_ex_i ( {{$bits(exception_t){1'b0}}, lsu_exception_ex_id, branch_exception_ex_id, {$bits(exception_t){1'b0}}, {$bits(exception_t){1'b0}} }), @@ -345,6 +356,9 @@ module ariane #( .lsu_commit_ready_o ( lsu_commit_ready_ex_commit ), // to commit .lsu_exception_o ( lsu_exception_ex_id ), .no_st_pending_o ( no_st_pending_ex_commit ), + .amo_valid_commit_i ( amo_valid_commit ), + .amo_req_o ( amo_req ), + .amo_resp_i ( amo_resp ), // CSR .csr_ready_o ( csr_ready_ex_id ), .csr_valid_i ( csr_valid_id_ex ), @@ -384,11 +398,13 @@ module ariane #( // Commit // --------- commit_stage commit_stage_i ( + .clk_i, + .rst_ni, .halt_i ( halt_ctrl ), - .flush_dcache_i ( dcache_flush_ctrl_cache ), + .flush_dcache_i ( dcache_flush_ctrl_cache ), .exception_o ( ex_commit ), - .debug_mode_i ( debug_mode_csr_id ), - .debug_req_i ( debug_req_i ), + .debug_mode_i ( debug_mode ), + .debug_req_i ( debug_req ), .single_step_i ( single_step_csr_commit ), .commit_instr_i ( commit_instr_id_commit ), .commit_ack_o ( commit_ack ), @@ -398,6 +414,8 @@ module ariane #( .we_o ( we_commit_id ), .commit_lsu_o ( lsu_commit_commit_ex ), .commit_lsu_ready_i ( lsu_commit_ready_ex_commit ), + .amo_valid_commit_o ( amo_valid_commit ), + .amo_resp_i ( amo_resp ), .commit_csr_o ( csr_commit_commit_ex ), .pc_o ( pc_commit ), .csr_op_o ( csr_op_commit_csr ), @@ -407,6 +425,7 @@ module ariane #( .fence_i_o ( fence_i_commit_controller ), .fence_o ( fence_commit_controller ), .sfence_vma_o ( sfence_vma_commit_controller ), + .flush_commit_o ( flush_commit ), .* ); @@ -418,8 +437,8 @@ module ariane #( ) csr_regfile_i ( .flush_o ( flush_csr_ctrl ), .halt_csr_o ( halt_csr_ctrl ), - .commit_ack_i ( commit_ack ), .commit_instr_i ( commit_instr_id_commit ), + .commit_ack_i ( commit_ack ), .ex_i ( ex_commit ), .csr_op_i ( csr_op_commit_csr ), .csr_addr_i ( csr_addr_ex_csr ), @@ -442,7 +461,7 @@ module ariane #( .tvm_o ( tvm_csr_id ), .tw_o ( tw_csr_id ), .tsr_o ( tsr_csr_id ), - .debug_mode_o ( debug_mode_csr_id ), + .debug_mode_o ( debug_mode ), .single_step_o ( single_step_csr_commit ), .dcache_en_o ( dcache_en_csr_nbdcache ), .icache_en_o ( icache_en_csr ), @@ -450,6 +469,10 @@ module ariane #( .perf_data_o ( data_csr_perf ), .perf_data_i ( data_perf_csr ), .perf_we_o ( we_csr_perf ), + .debug_req_i ( debug_req ), + .ipi_i, + .irq_i, + .time_irq_i, .* ); @@ -486,8 +509,8 @@ module ariane #( .flush_id_o ( flush_ctrl_id ), .flush_ex_o ( flush_ctrl_ex ), .flush_tlb_o ( flush_tlb_ctrl_ex ), - .flush_dcache_o ( dcache_flush_ctrl_cache ), - .flush_dcache_ack_i ( dcache_flush_ack_cache_ctrl ), + .flush_dcache_o ( dcache_flush_ctrl_cache ), + .flush_dcache_ack_i ( dcache_flush_ack_cache_ctrl ), .halt_csr_i ( halt_csr_ctrl ), .halt_o ( halt_ctrl ), @@ -500,6 +523,7 @@ module ariane #( .fence_i_i ( fence_i_commit_controller ), .fence_i ( fence_commit_controller ), .sfence_vma_i ( sfence_vma_commit_controller ), + .flush_commit_i ( flush_commit ), .flush_icache_o ( icache_flush_ctrl_cache ), .* @@ -526,12 +550,11 @@ module ariane #( .dcache_enable_i ( dcache_en_csr_nbdcache ), .dcache_flush_i ( dcache_flush_ctrl_cache ), .dcache_flush_ack_o ( dcache_flush_ack_cache_ctrl ), - // from PTW, Load Unit and Store Unit - .dcache_amo_commit_i ( 1'b0 ), - .dcache_amo_valid_o ( ), - .dcache_amo_result_o ( ), - .dcache_amo_flush_i ( 1'b0 ), + // to commit stage + .amo_req_i ( amo_req ), + .amo_resp_o ( amo_resp ), .dcache_miss_o ( dcache_miss_cache_perf ), + // from PTW, Load Unit and Store Unit .dcache_req_ports_i ( dcache_req_ports_ex_cache ), .dcache_req_ports_o ( dcache_req_ports_cache_ex ), // memory side @@ -579,7 +602,7 @@ module ariane #( assign tracer_if.exception = commit_stage_i.exception_o; // assign current privilege level assign tracer_if.priv_lvl = priv_lvl; - assign tracer_if.debug_mode = debug_mode_csr_id; + assign tracer_if.debug_mode = debug_mode; instr_tracer instr_tracer_i (tracer_if, cluster_id_i, core_id_i); `endif `endif @@ -618,15 +641,24 @@ module ariane #( if (~rst_ni) begin cycles <= 0; end else begin + string mode = ""; + if (debug_mode) mode = "D"; + else begin + case (priv_lvl) + riscv::PRIV_LVL_M: mode = "M"; + riscv::PRIV_LVL_S: mode = "S"; + riscv::PRIV_LVL_U: mode = "U"; + endcase + end for (int i = 0; i < NR_COMMIT_PORTS; i++) begin if (commit_ack[i] && !commit_instr_id_commit[i].ex.valid) begin - $fwrite(f, "%d 0x%0h (0x%h) DASM(%h)\n", cycles, commit_instr_id_commit[i].pc, commit_instr_id_commit[i].ex.tval[31:0], commit_instr_id_commit[i].ex.tval[31:0]); + $fwrite(f, "%d 0x%0h %s (0x%h) DASM(%h)\n", cycles, commit_instr_id_commit[i].pc, mode, commit_instr_id_commit[i].ex.tval[31:0], commit_instr_id_commit[i].ex.tval[31:0]); end else if (commit_ack[i] && commit_instr_id_commit[i].ex.valid) begin if (commit_instr_id_commit[i].ex.cause == 2) begin $fwrite(f, "Exception Cause: Illegal Instructions, DASM(%h) PC=%h\n", commit_instr_id_commit[i].ex.tval[31:0], commit_instr_id_commit[i].pc); end else begin - if (debug_mode_csr_id) begin - $fwrite(f, "%d 0x%0h (0x%h) DASM(%h)\n", cycles, commit_instr_id_commit[i].pc, commit_instr_id_commit[i].ex.tval[31:0], commit_instr_id_commit[i].ex.tval[31:0]); + if (debug_mode) begin + $fwrite(f, "%d 0x%0h %s (0x%h) DASM(%h)\n", cycles, commit_instr_id_commit[i].pc, mode, commit_instr_id_commit[i].ex.tval[31:0], commit_instr_id_commit[i].ex.tval[31:0]); end else begin $fwrite(f, "Exception Cause: %5d, DASM(%h) PC=%h\n", commit_instr_id_commit[i].ex.cause, commit_instr_id_commit[i].ex.tval[31:0], commit_instr_id_commit[i].pc); end diff --git a/src/axi b/src/axi new file mode 160000 index 0000000000..328cbe05a4 --- /dev/null +++ b/src/axi @@ -0,0 +1 @@ +Subproject commit 328cbe05a42a31aae6f57f780351a2ba22954fef diff --git a/src/axi_adapter.sv b/src/axi_adapter.sv index 03166754d7..d253e42db9 100644 --- a/src/axi_adapter.sv +++ b/src/axi_adapter.sv @@ -16,12 +16,11 @@ */ import std_cache_pkg::*; - module axi_adapter #( - parameter int unsigned DATA_WIDTH = 256, - parameter logic CRITICAL_WORD_FIRST = 0, // the AXI subsystem needs to support wrapping reads for this feature - parameter int unsigned AXI_ID_WIDTH = 10 - )( + parameter int unsigned DATA_WIDTH = 256, + parameter logic CRITICAL_WORD_FIRST = 0, // the AXI subsystem needs to support wrapping reads for this feature + parameter int unsigned AXI_ID_WIDTH = 10 +)( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low @@ -131,6 +130,8 @@ module axi_adapter #( axi.w_valid = 1'b1; // its a single write if (type_i == SINGLE_REQ) begin + // only a single write so the data is already the last one + axi.w_last = 1'b1; // single req can be granted here gnt_o = axi.aw_ready & axi.w_ready; gnt_id_o = id_i; @@ -200,9 +201,13 @@ module axi_adapter #( axi.w_valid = 1'b1; axi.w_last = (cnt_q == '0) ? 1'b1 : 1'b0; - axi.w_data = wdata_i[BURST_SIZE-cnt_q]; - axi.w_strb = be_i[BURST_SIZE-cnt_q]; - + if (type_i == SINGLE_REQ) begin + axi.w_data = wdata_i[0]; + axi.w_strb = be_i[0]; + end else begin + axi.w_data = wdata_i[BURST_SIZE-cnt_q]; + axi.w_strb = be_i[BURST_SIZE-cnt_q]; + end axi.aw_valid = 1'b1; // we are here because we want to write a cache line axi.aw_len = BURST_SIZE; @@ -249,8 +254,13 @@ module axi_adapter #( // ~> from write, there is an outstanding write WAIT_LAST_W_READY: begin axi.w_valid = 1'b1; - axi.w_data = wdata_i[BURST_SIZE-cnt_q]; - axi.w_strb = be_i[BURST_SIZE-cnt_q]; + if (type_i == SINGLE_REQ) begin + axi.w_data = wdata_i[0]; + axi.w_strb = be_i[0]; + end else begin + axi.w_data = wdata_i[BURST_SIZE-cnt_q]; + axi.w_strb = be_i[BURST_SIZE-cnt_q]; + end // this is the last write axi.w_last = (cnt_q == '0) ? 1'b1 : 1'b0; diff --git a/src/axi_node b/src/axi_node index ec45a3d13a..1f77f634b6 160000 --- a/src/axi_node +++ b/src/axi_node @@ -1 +1 @@ -Subproject commit ec45a3d13a9b827f35aa76d19a8400e3ef6d13b6 +Subproject commit 1f77f634b65fdee56dfc928cadadd66e9fafc485 diff --git a/src/axi_slice b/src/axi_slice deleted file mode 160000 index f8886bd3f2..0000000000 --- a/src/axi_slice +++ /dev/null @@ -1 +0,0 @@ -Subproject commit f8886bd3f2d4967aaccff15b67bf1f9e1a0e3453 diff --git a/src/branch_unit.sv b/src/branch_unit.sv index bbf35fd531..7600ce35dd 100644 --- a/src/branch_unit.sv +++ b/src/branch_unit.sv @@ -66,7 +66,7 @@ module branch_unit ( if (operator_i == JALR) target_address[0] = 1'b0; // if we need to put the branch target address in a destination register, output it here to WB - branch_result_o = next_pc; + branch_result_o = next_pc; // save PC - we need this to get the target row in the branch target buffer // we play this trick with the branch instruction which wraps a word boundary: diff --git a/src/cache_subsystem/amo_alu.sv b/src/cache_subsystem/amo_alu.sv new file mode 100644 index 0000000000..7e2537e85b --- /dev/null +++ b/src/cache_subsystem/amo_alu.sv @@ -0,0 +1,63 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 15.09.2018 +// Description: Combinatorial AMO unit +module amo_alu ( + // AMO interface + input ariane_pkg::amo_t amo_op_i, + input logic [63:0] amo_operand_a_i, + input logic [63:0] amo_operand_b_i, + output logic [63:0] amo_result_o // result of atomic memory operation +); + + logic [64:0] adder_sum; + logic [64:0] adder_operand_a, adder_operand_b; + + assign adder_sum = adder_operand_a + adder_operand_b; + + always_comb begin + + adder_operand_a = $signed(amo_operand_a_i); + adder_operand_b = $signed(amo_operand_b_i); + + amo_result_o = amo_operand_b_i; + + unique case (amo_op_i) + // the default is to output operand_b + ariane_pkg::AMO_SC:; + ariane_pkg::AMO_SWAP:; + ariane_pkg::AMO_ADD: amo_result_o = adder_sum[63:0]; + ariane_pkg::AMO_AND: amo_result_o = amo_operand_a_i & amo_operand_b_i; + ariane_pkg::AMO_OR: amo_result_o = amo_operand_a_i | amo_operand_b_i; + ariane_pkg::AMO_XOR: amo_result_o = amo_operand_a_i ^ amo_operand_b_i; + ariane_pkg::AMO_MAX: begin + adder_operand_b = -$signed(amo_operand_b_i); + amo_result_o = adder_sum[64] ? amo_operand_b_i : amo_operand_a_i; + end + ariane_pkg::AMO_MIN: begin + adder_operand_b = -$signed(amo_operand_b_i); + amo_result_o = adder_sum[64] ? amo_operand_a_i : amo_operand_b_i; + end + ariane_pkg::AMO_MAXU: begin + adder_operand_a = $unsigned(amo_operand_a_i); + adder_operand_b = -$unsigned(amo_operand_b_i); + amo_result_o = adder_sum[64] ? amo_operand_b_i : amo_operand_a_i; + end + ariane_pkg::AMO_MINU: begin + adder_operand_a = $unsigned(amo_operand_a_i); + adder_operand_b = -$unsigned(amo_operand_b_i); + amo_result_o = adder_sum[64] ? amo_operand_a_i : amo_operand_b_i; + end + default: amo_result_o = '0; + endcase + end +endmodule diff --git a/src/cache_subsystem/cache_ctrl.sv b/src/cache_subsystem/cache_ctrl.sv index 75ccd38e6c..7e1813df11 100644 --- a/src/cache_subsystem/cache_ctrl.sv +++ b/src/cache_subsystem/cache_ctrl.sv @@ -21,57 +21,53 @@ import ariane_pkg::*; import std_cache_pkg::*; module cache_ctrl #( - parameter logic [63:0] CACHE_START_ADDR = 64'h4000_0000 - )( - input logic clk_i, // Clock - input logic rst_ni, // Asynchronous reset active low - input logic flush_i, - input logic bypass_i, // enable cache - output logic busy_o, - - // Core request ports - input dcache_req_i_t req_port_i, - output dcache_req_o_t req_port_o, - - // SRAM interface - output logic [DCACHE_SET_ASSOC-1:0] req_o, // req is valid - output logic [DCACHE_INDEX_WIDTH-1:0] addr_o, // address into cache array - input logic gnt_i, - output cache_line_t data_o, - output cl_be_t be_o, - output logic [DCACHE_TAG_WIDTH-1:0] tag_o, //valid one cycle later - input cache_line_t [DCACHE_SET_ASSOC-1:0] data_i, - output logic we_o, - input logic [DCACHE_SET_ASSOC-1:0] hit_way_i, - // Miss handling - output miss_req_t miss_req_o, - // return - input logic miss_gnt_i, - input logic active_serving_i, // the miss unit is currently active for this unit, serving the miss - input logic [63:0] critical_word_i, - input logic critical_word_valid_i, - - input logic bypass_gnt_i, - input logic bypass_valid_i, - input logic [63:0] bypass_data_i, - // check MSHR for aliasing - output logic [55:0] mshr_addr_o, - input logic mshr_addr_matches_i, - input logic mshr_index_matches_i + parameter logic [63:0] CACHE_START_ADDR = 64'h4000_0000 +)( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, + input logic bypass_i, // enable cache + output logic busy_o, + // Core request ports + input dcache_req_i_t req_port_i, + output dcache_req_o_t req_port_o, + // SRAM interface + output logic [DCACHE_SET_ASSOC-1:0] req_o, // req is valid + output logic [DCACHE_INDEX_WIDTH-1:0] addr_o, // address into cache array + input logic gnt_i, + output cache_line_t data_o, + output cl_be_t be_o, + output logic [DCACHE_TAG_WIDTH-1:0] tag_o, //valid one cycle later + input cache_line_t [DCACHE_SET_ASSOC-1:0] data_i, + output logic we_o, + input logic [DCACHE_SET_ASSOC-1:0] hit_way_i, + // Miss handling + output miss_req_t miss_req_o, + // return + input logic miss_gnt_i, + input logic active_serving_i, // the miss unit is currently active for this unit, serving the miss + input logic [63:0] critical_word_i, + input logic critical_word_valid_i, + // bypass ports + input logic bypass_gnt_i, + input logic bypass_valid_i, + input logic [63:0] bypass_data_i, + // check MSHR for aliasing + output logic [55:0] mshr_addr_o, + input logic mshr_addr_matches_i, + input logic mshr_index_matches_i ); - // 0 IDLE - // 1 WAIT_TAG - // 2 WAIT_TAG_BYPASSED - // 3 STORE_REQ - // 4 WAIT_REFILL_VALID - // 5 WAIT_REFILL_GNT - // 6 WAIT_TAG_SAVED - // 7 WAIT_MSHR - // 8 WAIT_CRITICAL_WORD - enum logic [3:0] { - IDLE, WAIT_TAG, WAIT_TAG_BYPASSED, STORE_REQ, WAIT_REFILL_VALID, WAIT_REFILL_GNT, WAIT_TAG_SAVED, WAIT_MSHR, WAIT_CRITICAL_WORD + IDLE, // 0 + WAIT_TAG, // 1 + WAIT_TAG_BYPASSED, // 2 + STORE_REQ, // 3 + WAIT_REFILL_VALID, // 4 + WAIT_REFILL_GNT, // 5 + WAIT_TAG_SAVED, // 6 + WAIT_MSHR, // 7 + WAIT_CRITICAL_WORD // 8 } state_d, state_q; typedef struct packed { @@ -109,12 +105,10 @@ module cache_ctrl #( // incoming cache-line -> this is needed as synthesis is not supporting +: indexing in a multi-dimensional array // cache-line offset -> multiple of 64 cl_offset = mem_req_q.index[DCACHE_BYTE_OFFSET-1:3] << 6; // shift by 6 to the left - // default assignments state_d = state_q; mem_req_d = mem_req_q; hit_way_d = hit_way_q; - // output assignments req_port_o.data_gnt = 1'b0; req_port_o.data_rvalid = 1'b0; @@ -135,7 +129,7 @@ module cache_ctrl #( IDLE: begin // a new request arrived if (req_port_i.data_req && !flush_i) begin - // request the cache line - we can do this specualtive + // request the cache line - we can do this speculatively req_o = '1; // save index, be and we @@ -172,10 +166,11 @@ module cache_ctrl #( WAIT_TAG, WAIT_TAG_SAVED: begin // depending on where we come from // For the store case the tag comes in the same cycle - tag_o = (state_q == WAIT_TAG_SAVED || mem_req_q.we) ? mem_req_q.tag : req_port_i.address_tag; + tag_o = (state_q == WAIT_TAG_SAVED || mem_req_q.we) ? mem_req_q.tag + : req_port_i.address_tag; // we speculatively request another transfer if (req_port_i.data_req && !flush_i) begin - req_o = '1; + req_o = '1; end // check that the client really wants to do the request @@ -185,7 +180,6 @@ module cache_ctrl #( // ------------ if (|hit_way_i) begin // we can request another cache-line if this was a load - // make another request if (req_port_i.data_req && !mem_req_q.we && !flush_i) begin state_d = WAIT_TAG; // switch back to WAIT_TAG mem_req_d.index = req_port_i.address_index; @@ -195,12 +189,12 @@ module cache_ctrl #( mem_req_d.wdata = req_port_i.data_wdata; mem_req_d.tag = req_port_i.address_tag; mem_req_d.bypass = 1'b0; + req_port_o.data_gnt = gnt_i; if (!gnt_i) begin state_d = IDLE; end - end else begin state_d = IDLE; end @@ -215,7 +209,6 @@ module cache_ctrl #( // report data for a read if (!mem_req_q.we) begin req_port_o.data_rvalid = 1'b1; - // else this was a store so we need an extra step to handle it end else begin state_d = STORE_REQ; @@ -273,7 +266,7 @@ module cache_ctrl #( // ~> we are here as we need a second round of memory access for a store STORE_REQ: begin // check if the MSHR still doesn't match - mshr_addr_o = {mem_req_d.tag, mem_req_q.index}; + mshr_addr_o = {mem_req_q.tag, mem_req_q.index}; // We need to re-check for MSHR aliasing here as the store requires at least // two memory look-ups on a single-ported SRAM and therefore is non-atomic @@ -284,7 +277,7 @@ module cache_ctrl #( we_o = 1'b1; be_o.vldrty = hit_way_q; - + // set the correct byte enable be_o.data[cl_offset>>3 +: 8] = mem_req_q.be; data_o.data[cl_offset +: 64] = mem_req_q.wdata; @@ -384,7 +377,6 @@ module cache_ctrl #( mem_req_d.wdata = req_port_i.data_wdata; mem_req_d.tag = req_port_i.address_tag; - state_d = IDLE; // Wait until we have access on the memory array @@ -393,7 +385,6 @@ module cache_ctrl #( mem_req_d.bypass = 1'b0; req_port_o.data_gnt = 1'b1; end - end else begin state_d = IDLE; end @@ -411,7 +402,7 @@ module cache_ctrl #( endcase if (req_port_i.kill_req) begin - state_d = IDLE; + state_d = IDLE; req_port_o.data_rvalid = 1'b1; end end @@ -421,9 +412,9 @@ module cache_ctrl #( // -------------- always_ff @(posedge clk_i or negedge rst_ni) begin if (~rst_ni) begin - state_q <= IDLE; - mem_req_q <= '0; - hit_way_q <= '0; + state_q <= IDLE; + mem_req_q <= '0; + hit_way_q <= '0; end else begin state_q <= state_d; mem_req_q <= mem_req_d; @@ -443,18 +434,3 @@ module cache_ctrl #( `endif `endif endmodule - - - - -module AMO_alu ( - input logic clk_i, - input logic rst_ni, - // AMO interface - input logic amo_commit_i, // commit atomic memory operation - output logic amo_valid_o, // we have a valid AMO result - output logic [63:0] amo_result_o, // result of atomic memory operation - input logic amo_flush_i // forget about AMO - ); - -endmodule diff --git a/src/cache_subsystem/miss_handler.sv b/src/cache_subsystem/miss_handler.sv index 04246b4f35..31eed3a14f 100644 --- a/src/cache_subsystem/miss_handler.sv +++ b/src/cache_subsystem/miss_handler.sv @@ -47,6 +47,9 @@ module miss_handler #( input logic [NR_PORTS-1:0][55:0] mshr_addr_i, output logic [NR_PORTS-1:0] mshr_addr_matches_o, output logic [NR_PORTS-1:0] mshr_index_matches_o, + // AMO + input amo_req_t amo_req_i, + output amo_resp_t amo_resp_o, // Port to SRAMs, for refill and eviction output logic [DCACHE_SET_ASSOC-1:0] req_o, output logic [DCACHE_INDEX_WIDTH-1:0] addr_o, // address into cache array @@ -56,22 +59,25 @@ module miss_handler #( output logic we_o ); - // 0 IDLE - // 1 FLUSHING - // 2 FLUSH - // 3 WB_CACHELINE_FLUSH - // 4 FLUSH_REQ_STATUS - // 5 WB_CACHELINE_MISS - // 6 WAIT_GNT_SRAM - // 7 MISS - // 8 REQ_CACHELINE - // 9 MISS_REPL - // A SAVE_CACHELINE - // B INIT - // FSM states - enum logic [3:0] { IDLE, FLUSHING, FLUSH, WB_CACHELINE_FLUSH, FLUSH_REQ_STATUS, WB_CACHELINE_MISS, WAIT_GNT_SRAM, MISS, - REQ_CACHELINE, MISS_REPL, SAVE_CACHELINE, INIT } state_d, state_q; + enum logic [3:0] { + IDLE, // 0 + FLUSHING, // 1 + FLUSH, // 2 + WB_CACHELINE_FLUSH, // 3 + FLUSH_REQ_STATUS, // 4 + WB_CACHELINE_MISS, // 5 + WAIT_GNT_SRAM, // 6 + MISS, // 7 + REQ_CACHELINE, // 8 + MISS_REPL, // 9 + SAVE_CACHELINE, // A + INIT, // B + AMO_LOAD, // C + AMO_SAVE_LOAD, // D + AMO_STORE // E + } state_d, state_q; + // Registers mshr_t mshr_d, mshr_q; logic [DCACHE_INDEX_WIDTH-1:0] cnt_d, cnt_q; @@ -79,6 +85,7 @@ module miss_handler #( // cache line to evict cache_line_t evict_cl_d, evict_cl_q; + logic serve_amo_d, serve_amo_q; // Request from one FSM logic [NR_PORTS-1:0] miss_req_valid; logic [NR_PORTS-1:0] miss_req_bypass; @@ -90,11 +97,13 @@ module miss_handler #( // Cache Line Refill <-> AXI logic req_fsm_miss_valid; - logic req_fsm_miss_bypass; logic [63:0] req_fsm_miss_addr; logic [DCACHE_LINE_WIDTH-1:0] req_fsm_miss_wdata; logic req_fsm_miss_we; logic [(DCACHE_LINE_WIDTH/8)-1:0] req_fsm_miss_be; + req_t req_fsm_miss_req; + logic [1:0] req_fsm_miss_size; + logic gnt_miss_fsm; logic valid_miss_fsm; logic [(DCACHE_LINE_WIDTH/64)-1:0][63:0] data_miss_fsm; @@ -103,6 +112,14 @@ module miss_handler #( logic lfsr_enable; logic [DCACHE_SET_ASSOC-1:0] lfsr_oh; logic [$clog2(DCACHE_SET_ASSOC-1)-1:0] lfsr_bin; + // AMOs + ariane_pkg::amo_t amo_op; + logic [63:0] amo_operand_a, amo_operand_b, amo_result_o; + + struct packed { + logic [63:3] address; + logic valid; + } reservation_d, reservation_q; // ------------------------------ // Cache Management @@ -129,14 +146,16 @@ module miss_handler #( lfsr_enable = 1'b0; // to AXI refill req_fsm_miss_valid = 1'b0; - req_fsm_miss_bypass = 1'b0; req_fsm_miss_addr = '0; req_fsm_miss_wdata = '0; req_fsm_miss_we = 1'b0; req_fsm_miss_be = '0; + req_fsm_miss_req = CACHE_LINE_REQ; + req_fsm_miss_size = 2'b11; // core flush_ack_o = 1'b0; miss_o = 1'b0; // to performance counter + serve_amo_d = serve_amo_q; // -------------------------------- // Flush and Miss operation // -------------------------------- @@ -148,7 +167,15 @@ module miss_handler #( // communicate to the requester which unit we are currently serving active_serving_o = '0; active_serving_o[mshr_q.id] = mshr_q.valid; - + // AMOs + amo_resp_o.ack = 1'b0; + amo_resp_o.result = '0; + // silence the unit when not used + amo_op = amo_req_i.amo_op; + amo_operand_a = '0; + amo_operand_b = '0; + + reservation_d = reservation_q; case (state_q) IDLE: begin @@ -288,6 +315,7 @@ module miss_handler #( addr_o = cnt_q; req_o = 1'b1; we_o = 1'b1; + data_o.valid = INVALIDATE_ON_FLUSH ? 1'b0 : 1'b1; // invalidate be_o.vldrty = evict_way_q; // go back to handling the miss or flushing, depending on where we came from @@ -320,11 +348,12 @@ module miss_handler #( state_d = FLUSH_REQ_STATUS; addr_o = cnt_q; req_o = 1'b1; - be_o.vldrty = '1; + be_o.vldrty = INVALIDATE_ON_FLUSH ? '1 : '0; we_o = 1'b1; // finished with flushing operation, go back to idle if (cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] == DCACHE_NUM_WORDS-1) begin - flush_ack_o = 1'b1; + // only acknowledge if the flush wasn't triggered by an atomic + flush_ack_o = ~serve_amo_q; state_d = IDLE; end end @@ -343,6 +372,82 @@ module miss_handler #( if (cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] == DCACHE_NUM_WORDS-1) state_d = IDLE; end + // ---------------------- + // AMOs + // ---------------------- + // TODO(zarubaf) Move this closer to memory + // ~> we are here because we need to do the AMO, the cache is clean at this point + // start by executing the load + AMO_LOAD: begin + req_fsm_miss_valid = 1'b1; + // address is in operand a + req_fsm_miss_addr = amo_req_i.operand_a; + req_fsm_miss_req = SINGLE_REQ; + req_fsm_miss_size = amo_req_i.size; + // the request has been granted + if (gnt_miss_fsm) begin + state_d = AMO_SAVE_LOAD; + end + end + // save the load value + AMO_SAVE_LOAD: begin + if (valid_miss_fsm) begin + // we are only concerned about the lower 64-bit + mshr_d.wdata = data_miss_fsm[0]; + state_d = AMO_STORE; + end + end + // and do the store + AMO_STORE: begin + automatic logic [63:0] load_data; + // re-align load data + load_data = data_align(amo_req_i.operand_a[2:0], mshr_q.wdata); + // Sign-extend for word operation + if (amo_req_i.size == 2'b10) begin + amo_operand_a = sext32(load_data[31:0]); + amo_operand_b = sext32(amo_req_i.operand_b[31:0]); + end else begin + amo_operand_a = load_data; + amo_operand_b = amo_req_i.operand_b; + end + + // we do not need a store request for load reserved + req_fsm_miss_valid = (amo_req_i.amo_op == AMO_LR) ? 1'b0 : 1'b1; + // for a load reserved we do not want to write + req_fsm_miss_we = (amo_req_i.amo_op == AMO_LR) ? 1'b0 : 1'b1; + req_fsm_miss_req = SINGLE_REQ; + req_fsm_miss_size = amo_req_i.size; + req_fsm_miss_addr = amo_req_i.operand_a; + + req_fsm_miss_wdata = data_align(amo_req_i.operand_a[2:0], amo_result_o); + req_fsm_miss_be = be_gen(amo_req_i.operand_a[2:0], amo_req_i.size); + + // place a reservation on the memory + if (amo_req_i.amo_op == AMO_LR) begin + reservation_d.address = amo_req_i.operand_a[63:3]; + reservation_d.valid = 1'b1; + end + + // the request is valid or we didn't need to go for another store + if (valid_miss_fsm || (amo_req_i.amo_op == AMO_LR)) begin + state_d = IDLE; + amo_resp_o.ack = 1'b1; + // write-back the result + amo_resp_o.result = amo_operand_a; + // in case we have a SC we need to look into the reservation table + if (amo_req_i.amo_op == AMO_SC) begin + if (reservation_q.address == amo_req_i.operand_a[63:3] && reservation_q.valid) begin + amo_resp_o.result = 1'b0; + end else begin + amo_resp_o.result = 1'b1; + end + // An SC must fail if there is a nother SC (to any address) between the LR and the SC in program + // order. + // in any case destory the reservation + reservation_d.valid = 1'b0; + end + end + end endcase end @@ -369,17 +474,21 @@ module miss_handler #( // -------------------- always_ff @(posedge clk_i or negedge rst_ni) begin if (~rst_ni) begin - mshr_q <= '0; - state_q <= INIT; - cnt_q <= '0; - evict_way_q <= '0; - evict_cl_q <= '0; + mshr_q <= '0; + state_q <= INIT; + cnt_q <= '0; + evict_way_q <= '0; + evict_cl_q <= '0; + serve_amo_q <= 1'b0; + reservation_q <= '0; end else begin - mshr_q <= mshr_d; - state_q <= state_d; - cnt_q <= cnt_d; - evict_way_q <= evict_way_d; - evict_cl_q <= evict_cl_d; + mshr_q <= mshr_d; + state_q <= state_d; + cnt_q <= cnt_d; + evict_way_q <= evict_way_d; + evict_cl_q <= evict_cl_d; + serve_amo_q <= serve_amo_d; + reservation_q <= reservation_d; end end @@ -408,32 +517,32 @@ module miss_handler #( logic [AXI_ID_WIDTH-1:0] gnt_id_bypass_fsm; arbiter #( - .NR_PORTS ( NR_PORTS ), - .DATA_WIDTH ( 64 ) + .NR_PORTS ( NR_PORTS ), + .DATA_WIDTH ( 64 ) ) i_bypass_arbiter ( // Master Side - .data_req_i ( miss_req_valid & miss_req_bypass ), - .address_i ( miss_req_addr ), - .data_wdata_i ( miss_req_wdata ), - .data_we_i ( miss_req_we ), - .data_be_i ( miss_req_be ), - .data_size_i ( miss_req_size ), - .data_gnt_o ( bypass_gnt_o ), - .data_rvalid_o ( bypass_valid_o ), - .data_rdata_o ( bypass_data_o ), + .data_req_i ( miss_req_valid & miss_req_bypass ), + .address_i ( miss_req_addr ), + .data_wdata_i ( miss_req_wdata ), + .data_we_i ( miss_req_we ), + .data_be_i ( miss_req_be ), + .data_size_i ( miss_req_size ), + .data_gnt_o ( bypass_gnt_o ), + .data_rvalid_o ( bypass_valid_o ), + .data_rdata_o ( bypass_data_o ), // Slave Sid - .id_i ( id_bypass_fsm[$clog2(NR_PORTS)-1:0] ), - .id_o ( id_fsm_bypass ), - .gnt_id_i ( gnt_id_bypass_fsm[$clog2(NR_PORTS)-1:0] ), - .address_o ( req_fsm_bypass_addr ), - .data_wdata_o ( req_fsm_bypass_wdata ), - .data_req_o ( req_fsm_bypass_valid ), - .data_we_o ( req_fsm_bypass_we ), - .data_be_o ( req_fsm_bypass_be ), - .data_size_o ( req_fsm_bypass_size ), - .data_gnt_i ( gnt_bypass_fsm ), - .data_rvalid_i ( valid_bypass_fsm ), - .data_rdata_i ( data_bypass_fsm ), + .id_i ( id_bypass_fsm[$clog2(NR_PORTS)-1:0] ), + .id_o ( id_fsm_bypass ), + .gnt_id_i ( gnt_id_bypass_fsm[$clog2(NR_PORTS)-1:0] ), + .address_o ( req_fsm_bypass_addr ), + .data_wdata_o ( req_fsm_bypass_wdata ), + .data_req_o ( req_fsm_bypass_valid ), + .data_we_o ( req_fsm_bypass_we ), + .data_be_o ( req_fsm_bypass_be ), + .data_size_o ( req_fsm_bypass_size ), + .data_gnt_i ( gnt_bypass_fsm ), + .data_rvalid_i ( valid_bypass_fsm ), + .data_rdata_i ( data_bypass_fsm ), .* ); @@ -461,20 +570,20 @@ module miss_handler #( ); // ---------------------- - // Cache Line Arbiter + // Cache Line AXI Refill // ---------------------- axi_adapter #( .DATA_WIDTH ( DCACHE_LINE_WIDTH ), .AXI_ID_WIDTH ( AXI_ID_WIDTH ) ) i_miss_axi_adapter ( .req_i ( req_fsm_miss_valid ), - .type_i ( CACHE_LINE_REQ ), + .type_i ( req_fsm_miss_req ), .gnt_o ( gnt_miss_fsm ), .addr_i ( req_fsm_miss_addr ), .we_i ( req_fsm_miss_we ), .wdata_i ( req_fsm_miss_wdata ), .be_i ( req_fsm_miss_be ), - .size_i ( 2'b11 ), + .size_i ( req_fsm_miss_size ), .id_i ( '0 ), .gnt_id_o ( ), // open .valid_o ( valid_miss_fsm ), @@ -494,6 +603,16 @@ module miss_handler #( .* ); + // ----------------- + // AMO ALU + // ----------------- + amo_alu i_amo_alu ( + .amo_op_i ( amo_op ), + .amo_operand_a_i ( amo_operand_a ), + .amo_operand_b_i ( amo_operand_b ), + .amo_result_o ( amo_result_o ) + ); + // ----------------- // Struct Split // ----------------- diff --git a/src/cache_subsystem/std_cache_subsystem.sv b/src/cache_subsystem/std_cache_subsystem.sv index a6739b7b43..e3cfeb6fa0 100644 --- a/src/cache_subsystem/std_cache_subsystem.sv +++ b/src/cache_subsystem/std_cache_subsystem.sv @@ -24,58 +24,50 @@ import std_cache_pkg::*; module std_cache_subsystem #( parameter logic [63:0] CACHE_START_ADDR = 64'h4000_0000 )( - input logic clk_i, - input logic rst_ni, - - // I$ - input logic icache_en_i, // enable icache (or bypass e.g: in debug mode) - input logic icache_flush_i, // flush the icache, flush and kill have to be asserted together - output logic icache_miss_o, // to performance counter - - // address translation requests - input icache_areq_i_t icache_areq_i, // to/from frontend - output icache_areq_o_t icache_areq_o, - // data requests - input icache_dreq_i_t icache_dreq_i, // to/from frontend - output icache_dreq_o_t icache_dreq_o, - - // D$ - // Cache management - input logic dcache_enable_i, // from CSR - input logic dcache_flush_i, // high until acknowledged - output logic dcache_flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed - output logic dcache_miss_o, // we missed on a ld/st - // AMO interface (not functional yet) - input logic dcache_amo_commit_i, // commit atomic memory operation - output logic dcache_amo_valid_o, // we have a valid AMO result - output logic [63:0] dcache_amo_result_o, // result of atomic memory operation - input logic dcache_amo_flush_i, // forget about AMO - // Request ports - input dcache_req_i_t [2:0] dcache_req_ports_i, // to/from LSU - output dcache_req_o_t [2:0] dcache_req_ports_o, // to/from LSU - - // memory side - AXI_BUS.Master icache_data_if, // I$ refill port - AXI_BUS.Master dcache_data_if, // D$ refill port - AXI_BUS.Master dcache_bypass_if // bypass axi port (disabled D$ or uncacheable access) + input logic clk_i, + input logic rst_ni, + // I$ + input logic icache_en_i, // enable icache (or bypass e.g: in debug mode) + input logic icache_flush_i, // flush the icache, flush and kill have to be asserted together + output logic icache_miss_o, // to performance counter + // address translation requests + input icache_areq_i_t icache_areq_i, // to/from frontend + output icache_areq_o_t icache_areq_o, + // data requests + input icache_dreq_i_t icache_dreq_i, // to/from frontend + output icache_dreq_o_t icache_dreq_o, + // AMOs + input amo_req_t amo_req_i, + output amo_resp_t amo_resp_o, + // D$ + // Cache management + input logic dcache_enable_i, // from CSR + input logic dcache_flush_i, // high until acknowledged + output logic dcache_flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed + output logic dcache_miss_o, // we missed on a ld/st + // Request ports + input dcache_req_i_t [2:0] dcache_req_ports_i, // to/from LSU + output dcache_req_o_t [2:0] dcache_req_ports_o, // to/from LSU + // memory side + AXI_BUS.Master icache_data_if, // I$ refill port + AXI_BUS.Master dcache_data_if, // D$ refill port + AXI_BUS.Master dcache_bypass_if // bypass axi port (disabled D$ or uncacheable access) ); - std_icache #( ) i_icache ( - .clk_i ( clk_i ), - .rst_ni ( rst_ni ), - .flush_i ( icache_flush_i ), - .en_i ( icache_en_i ), - .miss_o ( icache_miss_o ), - .areq_i ( icache_areq_i ), - .areq_o ( icache_areq_o ), - .dreq_i ( icache_dreq_i ), - .dreq_o ( icache_dreq_o ), - .axi ( icache_data_if ) + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( icache_flush_i ), + .en_i ( icache_en_i ), + .miss_o ( icache_miss_o ), + .areq_i ( icache_areq_i ), + .areq_o ( icache_areq_o ), + .dreq_i ( icache_dreq_i ), + .dreq_o ( icache_dreq_o ), + .axi ( icache_data_if ) ); - // decreasing priority // Port 0: PTW // Port 1: Load Unit @@ -83,21 +75,18 @@ module std_cache_subsystem #( std_nbdcache #( .CACHE_START_ADDR ( CACHE_START_ADDR ) ) i_nbdcache ( - .clk_i ( clk_i ), - .rst_ni ( rst_ni ), - .enable_i ( dcache_enable_i ), - .flush_i ( dcache_flush_i ), - .flush_ack_o ( dcache_flush_ack_o ), - .miss_o ( dcache_miss_o ), - .data_if ( dcache_data_if ), - .bypass_if ( dcache_bypass_if ), - .amo_commit_i ( dcache_amo_commit_i ), - .amo_valid_o ( dcache_amo_valid_o ), - .amo_result_o ( dcache_amo_result_o ), - .amo_flush_i ( dcache_amo_flush_i ), - .req_ports_i ( dcache_req_ports_i ), - .req_ports_o ( dcache_req_ports_o ) + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .enable_i ( dcache_enable_i ), + .flush_i ( dcache_flush_i ), + .flush_ack_o ( dcache_flush_ack_o ), + .miss_o ( dcache_miss_o ), + .data_if ( dcache_data_if ), + .bypass_if ( dcache_bypass_if ), + .req_ports_i ( dcache_req_ports_i ), + .req_ports_o ( dcache_req_ports_o ), + .amo_req_i ( amo_req_i ), + .amo_resp_o ( amo_resp_o ) ); - endmodule // std_cache_subsystem diff --git a/src/cache_subsystem/std_icache.sv b/src/cache_subsystem/std_icache.sv index 7818c1d977..a6dcc2def9 100644 --- a/src/cache_subsystem/std_icache.sv +++ b/src/cache_subsystem/std_icache.sv @@ -50,13 +50,14 @@ module std_icache #( logic flushing_d, flushing_q; // signals - logic [ICACHE_SET_ASSOC-1:0] req; // request to memory array - logic [(ICACHE_LINE_WIDTH+7)/8-1:0] data_be; // byte enable for data array + logic [ICACHE_SET_ASSOC-1:0] req; // request to data memory + logic [ICACHE_SET_ASSOC-1:0] vld_req; // request to valid/tag memory + logic [(ICACHE_LINE_WIDTH+7)/8-1:0] data_be; // byte enable for data memory logic [(2**NR_AXI_REFILLS-1):0][7:0] be; // byte enable logic [$clog2(ICACHE_NUM_WORD)-1:0] addr; // this is a cache-line address, to memory array logic we; // write enable to memory array logic [ICACHE_SET_ASSOC-1:0] hit; // hit from tag compare - logic [ICACHE_BYTE_OFFSET-1:2] idx; // index in cache line + logic [$clog2(ICACHE_NUM_WORD)-1:0] idx; // index in cache line logic update_lfsr; // shift the LFSR logic [ICACHE_SET_ASSOC-1:0] random_way; // random way select from LFSR logic [ICACHE_SET_ASSOC-1:0] way_valid; // bit string which contains the zapped valid bits @@ -84,7 +85,7 @@ module std_icache #( ) tag_sram ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), - .req_i ( req[i] ), + .req_i ( vld_req[i] ), .we_i ( we ), .addr_i ( addr ), .wdata_i ( tag_wdata ), @@ -108,45 +109,31 @@ module std_icache #( .rdata_o ( data_rdata[i] ) ); end + // -------------------- - // Tag Comparison + // Tag Comparison and way select // -------------------- - for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin - assign hit[i] = (tag_rdata[i].tag == tag) ? tag_rdata[i].valid : 1'b0; - end - - `ifndef SYNTHESIS - `ifndef VERILATOR - // assert that cache only hits on one way - assert property ( - @(posedge clk_i) $onehot0(hit)) else begin $error("[icache] Hit should be one-hot encoded"); $stop(); end - `endif - `endif - // ------------------ - // Way Select - // ------------------ - assign idx = vaddr_q[ICACHE_BYTE_OFFSET-1:2]; // cacheline selected by hit - logic [ICACHE_LINE_WIDTH/FETCH_WIDTH-1:0][FETCH_WIDTH-1:0] selected_cl; - logic [ICACHE_LINE_WIDTH-1:0] selected_cl_flat; - - for (genvar i = 0; i < ICACHE_LINE_WIDTH; i++) begin - logic [ICACHE_SET_ASSOC-1:0] hit_masked_cl; - - for (genvar j = 0; j < ICACHE_SET_ASSOC; j++) - assign hit_masked_cl[j] = data_rdata[j][i] & hit[j]; + logic [ICACHE_SET_ASSOC-1:0][FETCH_WIDTH-1:0] cl_sel; + + assign idx = vaddr_q[ICACHE_BYTE_OFFSET-1:2]; - assign selected_cl_flat[i] = |hit_masked_cl; + generate + for (genvar i=0;i redo the request, REDO_REQ: begin - req = '1; - addr = vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_BYTE_OFFSET]; - tag = tag_q; + req = '1; + vld_req = '1; + tag = tag_q; state_d = TAG_CMP_SAVED; // do tag comparison on the saved tag end // ~> we are coming here after reset or when a flush was requested FLUSH: begin - addr = cnt_q; cnt_d = cnt_q + 1; - req = '1; + vld_req = '1; we = 1; // we've finished flushing, go back to idle if (cnt_q == ICACHE_NUM_WORD - 1) begin @@ -441,9 +436,21 @@ module std_icache #( end end - `ifndef SYNTHESIS - initial begin - assert ($bits(axi.aw_addr) == 64) else $fatal(1, "Ariane needs a 64-bit bus"); - end - `endif +/////////////////////////////////////////////////////// +// assertions +/////////////////////////////////////////////////////// + +//pragma translate_off +`ifndef VERILATOR +initial begin + assert ($bits(axi.aw_addr) == 64) + else $fatal(1, "[icache] Ariane needs a 64-bit bus"); +end + +// assert that cache only hits on one way +onehot: assert property ( + @(posedge clk_i) disable iff (~rst_ni) $onehot0(hit)) + else $fatal(1, "[icache] Hit should be one-hot encoded"); +`endif +//pragma translate_on endmodule diff --git a/src/cache_subsystem/std_nbdcache.sv b/src/cache_subsystem/std_nbdcache.sv index 6d89c7d682..fa499cf6db 100644 --- a/src/cache_subsystem/std_nbdcache.sv +++ b/src/cache_subsystem/std_nbdcache.sv @@ -16,7 +16,7 @@ import ariane_pkg::*; import std_cache_pkg::*; module std_nbdcache #( - parameter logic [63:0] CACHE_START_ADDR = 64'h4000_0000 + parameter logic [63:0] CACHE_START_ADDR = 64'h8000_0000 )( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low @@ -24,16 +24,13 @@ module std_nbdcache #( input logic enable_i, // from CSR input logic flush_i, // high until acknowledged output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed - output logic miss_o, // we missed on a ld/st - // AMO interface - input logic amo_commit_i, // commit atomic memory operation - output logic amo_valid_o, // we have a valid AMO result - output logic [63:0] amo_result_o, // result of atomic memory operation - input logic amo_flush_i, // forget about AMO + output logic miss_o, // we missed on a LD/ST + // AMOs + input amo_req_t amo_req_i, + output amo_resp_t amo_resp_o, // Request ports input dcache_req_i_t [2:0] req_ports_i, // request ports - output dcache_req_o_t [2:0] req_ports_o, // request ports - + output dcache_req_o_t [2:0] req_ports_o, // request ports // Cache AXI refill port AXI_BUS.Master data_if, AXI_BUS.Master bypass_if @@ -92,13 +89,11 @@ module std_nbdcache #( .CACHE_START_ADDR ( CACHE_START_ADDR ) ) i_cache_ctrl ( .bypass_i ( ~enable_i ), - .busy_o ( busy [i] ), - + // from core .req_port_i ( req_ports_i [i] ), .req_port_o ( req_ports_o [i] ), - - + // to SRAM array .req_o ( req [i+1] ), .addr_o ( addr [i+1] ), .gnt_i ( gnt [i+1] ), @@ -118,9 +113,9 @@ module std_nbdcache #( .bypass_valid_i ( bypass_valid [i] ), .bypass_data_i ( bypass_data [i] ), - .mshr_addr_o ( mshr_addr [i] ), // TODO - .mshr_addr_matches_i ( mshr_addr_matches [i] ), // TODO - .mshr_index_matches_i ( mshr_index_matches[i] ), // TODO + .mshr_addr_o ( mshr_addr [i] ), + .mshr_addr_matches_i ( mshr_addr_matches [i] ), + .mshr_index_matches_i ( mshr_index_matches[i] ), .* ); end @@ -132,7 +127,11 @@ module std_nbdcache #( miss_handler #( .NR_PORTS ( 3 ) ) i_miss_handler ( + .flush_i ( flush_i ), .busy_i ( |busy ), + // AMOs + .amo_req_i ( amo_req_i ), + .amo_resp_o ( amo_resp_o ), .miss_req_i ( miss_req ), .miss_gnt_o ( miss_gnt ), .bypass_gnt_o ( bypass_gnt ), @@ -150,6 +149,8 @@ module std_nbdcache #( .be_o ( be [0] ), .data_o ( wdata [0] ), .we_o ( we [0] ), + .bypass_if, + .data_if, .* ); @@ -194,7 +195,7 @@ module std_nbdcache #( // ---------------- // align each valid/dirty bit pair to a byte boundary in order to leverage byte enable signals. - // note: if you have an SRAM that supports flat bit enables for your target technology, + // note: if you have an SRAM that supports flat bit enables for your target technology, // you can use it here to save the extra 4x overhead introduced by this workaround. logic [4*DCACHE_DIRTY_WIDTH-1:0] dirty_wdata, dirty_rdata; diff --git a/src/clint/README.md b/src/clint/README.md index 9781815890..a62fc203b6 100644 --- a/src/clint/README.md +++ b/src/clint/README.md @@ -6,6 +6,6 @@ The CLINT plugs into an existing AXI Bus with an AXI 4 Lite interface. The IP mi | Address | Description | Note | |-------------------|-------------|------------------------------------------------| -| `BASE` + `0xo` | msip | Machine mode software interrupt (IPI) | +| `BASE` + `0x0` | msip | Machine mode software interrupt (IPI) | | `BASE` + `0x4000` | mtimecmp | Machine mode timer compare register for Hart 0 | | `BASE` + `0xBFF8` | mtime | Timer register | diff --git a/src/clint/axi_lite_interface.sv b/src/clint/axi_lite_interface.sv index 77a0b1cacb..982ce2fbc3 100644 --- a/src/clint/axi_lite_interface.sv +++ b/src/clint/axi_lite_interface.sv @@ -141,7 +141,7 @@ module axi_lite_interface #( // Registers // ------------------------ always_ff @(posedge clk_i or negedge rst_ni) begin - if(~rst_ni) begin + if (~rst_ni) begin CS <= IDLE; address_q <= '0; trans_id_q <= '0; @@ -159,10 +159,10 @@ module axi_lite_interface #( `ifndef SYNTHESIS `ifndef VERILATOR // check that burst length is just one - assert property (@(posedge clk_i) slave.ar_valid |-> ((slave.ar_len == 8'b0) && (slave.ar_size == $clog2(AXI_ADDR_WIDTH/8)))) + assert property (@(posedge clk_i) slave.ar_valid |-> ((slave.ar_len == 8'b0))) else begin $error("AXI Lite does not support bursts larger than 1 or byte length unequal to the native bus size"); $stop(); end // do the same for the write channel - assert property (@(posedge clk_i) slave.aw_valid |-> ((slave.aw_len == 8'b0) && (slave.aw_size == $clog2(AXI_ADDR_WIDTH/8)))) + assert property (@(posedge clk_i) slave.aw_valid |-> ((slave.aw_len == 8'b0))) else begin $error("AXI Lite does not support bursts larger than 1 or byte length unequal to the native bus size"); $stop(); end `endif `endif diff --git a/src/clint/clint.sv b/src/clint/clint.sv index 76fe06d76a..a53766497b 100644 --- a/src/clint/clint.sv +++ b/src/clint/clint.sv @@ -27,9 +27,7 @@ module clint #( AXI_BUS.Slave slave, - input logic halted_i, // cores are halted, also halt timer input logic rtc_i, // Real-time clock in (usually 32.768 kHz) - output logic [63:0] time_o, // Global Time out, this is the time-base of the whole SoC output logic [NR_CORES-1:0] timer_irq_o, // Timer interrupts output logic [NR_CORES-1:0] ipi_o // software interrupt (a.k.a inter-process-interrupt) ); @@ -54,9 +52,6 @@ module clint #( // increase the timer logic increase_timer; - // directly output the mtime_q register - this needs synchronization (but in the core). - assign time_o = mtime_q; - // ----------------------------- // AXI Interface Logic // ----------------------------- @@ -82,7 +77,7 @@ module clint #( mtimecmp_n = mtimecmp_q; msip_n = msip_q; // RTC says we should increase the timer - if (increase_timer && !halted_i) + if (increase_timer) mtime_n = mtime_q + 1; // written from APB bus - gets priority @@ -137,10 +132,11 @@ module clint #( always_comb begin : irq_gen // check that the mtime cmp register is set to a meaningful value for (int unsigned i = 0; i < NR_CORES; i++) begin - if (mtimecmp_q[i] != 0 && mtime_q >= mtimecmp_q[i]) + if (mtimecmp_q[i] != 0 && mtime_q >= mtimecmp_q[i]) begin timer_irq_o[i] = 1'b1; - else + end else begin timer_irq_o[i] = 1'b0; + end end end @@ -160,7 +156,7 @@ module clint #( // Registers always_ff @(posedge clk_i or negedge rst_ni) begin - if(~rst_ni) begin + if (~rst_ni) begin mtime_q <= 64'b0; mtimecmp_q <= 'b0; msip_q <= '0; diff --git a/src/commit_stage.sv b/src/commit_stage.sv index 2dd2072360..e91be4e687 100644 --- a/src/commit_stage.sv +++ b/src/commit_stage.sv @@ -18,6 +18,7 @@ module commit_stage #( parameter int unsigned NR_COMMIT_PORTS = 2 )( input logic clk_i, + input logic rst_ni, input logic halt_i, // request to halt the core input logic flush_dcache_i, // request to flush dcache -> also flush the pipeline output exception_t exception_o, // take exception to controller @@ -27,12 +28,12 @@ module commit_stage #( // from scoreboard input scoreboard_entry_t [NR_COMMIT_PORTS-1:0] commit_instr_i, // the instruction we want to commit output logic [NR_COMMIT_PORTS-1:0] commit_ack_o, // acknowledge that we are indeed committing - // to register file output logic [NR_COMMIT_PORTS-1:0][4:0] waddr_o, // register file write address output logic [NR_COMMIT_PORTS-1:0][63:0] wdata_o, // register file write data output logic [NR_COMMIT_PORTS-1:0] we_o, // register file write enable - + // Atomic memory operations + input amo_resp_t amo_resp_i, // result of AMO operation // to CSR file and PC Gen (because on certain CSR instructions we'll need to flush the whole pipeline) output logic [63:0] pc_o, // to/from CSR file @@ -43,18 +44,22 @@ module commit_stage #( // commit signals to ex output logic commit_lsu_o, // commit the pending store input logic commit_lsu_ready_i, // commit buffer of LSU is ready + output logic amo_valid_commit_o, // valid AMO in commit stage input logic no_st_pending_i, // there is no store pending output logic commit_csr_o, // commit the pending CSR instruction output logic fence_i_o, // flush I$ and pipeline output logic fence_o, // flush D$ and pipeline + output logic flush_commit_o, // request a pipeline flush output logic sfence_vma_o // flush TLBs and pipeline ); assign waddr_o[0] = commit_instr_i[0].rd[4:0]; assign waddr_o[1] = commit_instr_i[1].rd[4:0]; - assign pc_o = commit_instr_i[0].pc; + assign pc_o = commit_instr_i[0].pc; + logic instr_0_is_amo; + assign instr_0_is_amo = is_amo(commit_instr_i[0].op); // ------------------- // Commit Instruction // ------------------- @@ -64,93 +69,120 @@ module commit_stage #( commit_ack_o[0] = 1'b0; commit_ack_o[1] = 1'b0; + amo_valid_commit_o = 1'b0; + we_o[0] = 1'b0; we_o[1] = 1'b0; commit_lsu_o = 1'b0; commit_csr_o = 1'b0; - wdata_o[0] = commit_instr_i[0].result; + // amos will commit on port 0 + wdata_o[0] = (amo_resp_i.ack) ? amo_resp_i.result : commit_instr_i[0].result; wdata_o[1] = commit_instr_i[1].result; csr_op_o = ADD; // this corresponds to a CSR NOP csr_wdata_o = 64'b0; fence_i_o = 1'b0; fence_o = 1'b0; sfence_vma_o = 1'b0; + flush_commit_o = 1'b0; // we will not commit the instruction if we took an exception // and we do not commit the instruction if we requested a halt // furthermore if the debugger is requesting to debug do not commit this instruction if we are not yet in debug mode - if (commit_instr_i[0].valid && !halt_i && (!debug_req_i || debug_mode_i)) begin - - commit_ack_o[0] = 1'b1; - // register will be the all zero register. - // and also acknowledge the instruction, this is mainly done for the instruction tracer - // as it will listen on the instruction ack signal. For the overall result it does not make any - // difference as the whole pipeline is going to be flushed anyway. - if (!exception_o.valid) begin - // we can definitely write the register file - // if the instruction is not committing anything the destination - we_o[0] = 1'b1; + // also check that there is no atomic memory operation committing, right now this is the only operation + // which will take longer than one cycle to commit + if (commit_instr_i[0].valid && !halt_i) begin + if (!debug_req_i || debug_mode_i) begin + commit_ack_o[0] = 1'b1; + // register will be the all zero register. + // and also acknowledge the instruction, this is mainly done for the instruction tracer + // as it will listen on the instruction ack signal. For the overall result it does not make any + // difference as the whole pipeline is going to be flushed anyway. + if (!exception_o.valid) begin + // we can definitely write the register file + // if the instruction is not committing anything the destination + we_o[0] = 1'b1; - // check whether the instruction we retire was a store - // do not commit the instruction if we got an exception since the store buffer will be cleared - // by the subsequent flush triggered by an exception - if (commit_instr_i[0].fu == STORE) begin - // check if the LSU is ready to accept another commit entry (e.g.: a non-speculative store) - if (commit_lsu_ready_i) - commit_lsu_o = 1'b1; - else // if the LSU buffer is not ready - do not commit, wait - commit_ack_o[0] = 1'b0; + // check whether the instruction we retire was a store + // do not commit the instruction if we got an exception since the store buffer will be cleared + // by the subsequent flush triggered by an exception + if (commit_instr_i[0].fu == STORE && !instr_0_is_amo) begin + // check if the LSU is ready to accept another commit entry (e.g.: a non-speculative store) + if (commit_lsu_ready_i) + commit_lsu_o = 1'b1; + else // if the LSU buffer is not ready - do not commit, wait + commit_ack_o[0] = 1'b0; + end end - end - // --------- - // CSR Logic - // --------- - // check whether the instruction we retire was a CSR instruction - if (commit_instr_i[0].fu == CSR) begin - // write the CSR file - commit_csr_o = 1'b1; - wdata_o[0] = csr_rdata_i; - csr_op_o = commit_instr_i[0].op; - csr_wdata_o = commit_instr_i[0].result; - end - // ------------------ - // SFENCE.VMA Logic - // ------------------ - // check if this instruction was a SFENCE_VMA - if (commit_instr_i[0].op == SFENCE_VMA) begin - // no store pending so we can flush the TLBs and pipeline - sfence_vma_o = no_st_pending_i; - // wait for the store buffer to drain until flushing the pipeline - commit_ack_o[0] = no_st_pending_i; - end - // ------------------ - // FENCE.I Logic - // ------------------ - // Fence synchronizes data and instruction streams. That means that we need to flush the private icache - // and the private dcache. This is the most expensive instruction. - if (commit_instr_i[0].op == FENCE_I || (flush_dcache_i && commit_instr_i[0].fu != STORE)) begin - commit_ack_o[0] = no_st_pending_i; - // tell the controller to flush the I$ - fence_i_o = no_st_pending_i; + // --------- + // CSR Logic + // --------- + // check whether the instruction we retire was a CSR instruction + if (commit_instr_i[0].fu == CSR) begin + // write the CSR file + commit_csr_o = 1'b1; + wdata_o[0] = csr_rdata_i; + csr_op_o = commit_instr_i[0].op; + csr_wdata_o = commit_instr_i[0].result; + end + // ------------------ + // SFENCE.VMA Logic + // ------------------ + // check if this instruction was a SFENCE_VMA + if (commit_instr_i[0].op == SFENCE_VMA) begin + // no store pending so we can flush the TLBs and pipeline + sfence_vma_o = no_st_pending_i; + // wait for the store buffer to drain until flushing the pipeline + commit_ack_o[0] = no_st_pending_i; + end + // ------------------ + // FENCE.I Logic + // ------------------ + // Fence synchronizes data and instruction streams. That means that we need to flush the private icache + // and the private dcache. This is the most expensive instruction. + if (commit_instr_i[0].op == FENCE_I || (flush_dcache_i && commit_instr_i[0].fu != STORE)) begin + commit_ack_o[0] = no_st_pending_i; + // tell the controller to flush the I$ + fence_i_o = no_st_pending_i; + end + // ------------------ + // FENCE Logic + // ------------------ + if (commit_instr_i[0].op == FENCE) begin + commit_ack_o[0] = no_st_pending_i; + // tell the controller to flush the D$ + fence_o = no_st_pending_i; + end end // ------------------ - // FENCE Logic + // AMO // ------------------ - if (commit_instr_i[0].op == FENCE) begin - commit_ack_o[0] = no_st_pending_i; - // tell the controller to flush the D$ - fence_o = no_st_pending_i; + if (instr_0_is_amo && !commit_instr_i[0].ex.valid) begin + // AMO finished + commit_ack_o[0] = amo_resp_i.ack; + // flush the pipeline + flush_commit_o = amo_resp_i.ack; + amo_valid_commit_o = 1'b1; + we_o[0] = amo_resp_i.ack; end end + // ----------------- + // Commit Port 2 + // ----------------- // check if the second instruction can be committed as well and the first wasn't a CSR instruction // also if we are in single step mode don't retire the second instruction - if (commit_ack_o[0] && commit_instr_i[1].valid && !halt_i && !(commit_instr_i[0].fu inside {CSR}) && !flush_dcache_i && !single_step_i) begin + if (commit_ack_o[0] && commit_instr_i[1].valid + && !halt_i + && !(commit_instr_i[0].fu inside {CSR}) + && !flush_dcache_i + && !instr_0_is_amo + && !single_step_i) begin // only if the first instruction didn't throw an exception and this instruction won't throw an exception // and the operator is of type ALU, LOAD, CTRL_FLOW, MULT - if (!exception_o.valid && !commit_instr_i[1].ex.valid && (commit_instr_i[1].fu inside {ALU, LOAD, CTRL_FLOW, MULT})) begin + if (!exception_o.valid && !commit_instr_i[1].ex.valid + && (commit_instr_i[1].fu inside {ALU, LOAD, CTRL_FLOW, MULT})) begin we_o[1] = 1'b1; commit_ack_o[1] = 1'b1; end @@ -195,14 +227,17 @@ module commit_stage #( // ------------------------ // check for CSR interrupts (e.g.: normal interrupts which get triggered here) // by putting interrupts here we give them precedence over any other exception - if (csr_exception_i.valid && csr_exception_i.cause[63]) begin + // Don't take the interrupt if we are committing an AMO. + if (csr_exception_i.valid && csr_exception_i.cause[63] && !amo_valid_commit_o) begin exception_o = csr_exception_i; exception_o.tval = commit_instr_i[0].ex.tval; end end - // If we halted the processor don't take any exceptions + // Don't take any exceptions iff: + // - If we halted the processor if (halt_i) begin exception_o.valid = 1'b0; end end -endmodule + +endmodule \ No newline at end of file diff --git a/src/common_cells b/src/common_cells index 4277217c9a..9278bc769f 160000 --- a/src/common_cells +++ b/src/common_cells @@ -1 +1 @@ -Subproject commit 4277217c9ae8b1228f801e5a67de9ecdce8d887f +Subproject commit 9278bc769f3efd006864a7ef7721f2796ed968e6 diff --git a/src/controller.sv b/src/controller.sv index e88a7ba28e..51916ffbe3 100644 --- a/src/controller.sv +++ b/src/controller.sv @@ -36,7 +36,8 @@ module controller ( input logic flush_csr_i, // We got an instruction which altered the CSR, flush the pipeline input logic fence_i_i, // fence.i in input logic fence_i, // fence in - input logic sfence_vma_i // We got an instruction to flush the TLBs and pipeline + input logic sfence_vma_i, // We got an instruction to flush the TLBs and pipeline + input logic flush_commit_i // Flush request from commit stage ); // active fence - high if we are currently flushing the dcache @@ -114,13 +115,12 @@ module controller ( flush_unissued_instr_o = 1'b1; flush_id_o = 1'b1; flush_ex_o = 1'b1; + flush_tlb_o = 1'b1; end - // --------------------------------- - // CSR instruction with side-effect - // --------------------------------- - if (flush_csr_i) begin + // Set PC to commit stage and flush pipleine + if (flush_csr_i || flush_commit_i) begin set_pc_commit_o = 1'b1; flush_if_o = 1'b1; flush_unissued_instr_o = 1'b1; diff --git a/src/csr_regfile.sv b/src/csr_regfile.sv index 347d1fbdc6..e8a0f501ff 100644 --- a/src/csr_regfile.sv +++ b/src/csr_regfile.sv @@ -100,7 +100,6 @@ module csr_regfile #( riscv::priv_lvl_t priv_lvl_d, priv_lvl_q; // we are in debug logic debug_mode_q, debug_mode_d; - logic [63:0] next_pc; riscv::status_rv64_t mstatus_q, mstatus_d; riscv::satp_t satp_q, satp_d; @@ -275,6 +274,10 @@ module csr_regfile #( sscratch_d = sscratch_q; stval_d = stval_q; satp_d = satp_q; + + cycle_d = cycle_q; + instret_d = instret_q; + en_ld_st_translation_d = en_ld_st_translation_q; // check for correct access rights and that we are writing @@ -525,6 +528,7 @@ module csr_regfile #( end // we've got a debug request (and we have an instruction which we can associate it to) + // don't interrupt the AMO if (debug_req_i && commit_instr_i[0].valid) begin // save the PC dpc_d = pc_i; @@ -619,15 +623,17 @@ module csr_regfile #( // -------------------- // Counters // -------------------- - // just increment the cycle count - cycle_d = cycle_q + 1'b1; - // increase instruction retired counter - for (int i = 0; i < NR_COMMIT_PORTS; i++) begin - if (commit_ack_i[i]) begin - instret++; + if (!debug_mode_q) begin + // just increment the cycle count + cycle_d = cycle_q + 1'b1; + // increase instruction retired counter + for (int i = 0; i < NR_COMMIT_PORTS; i++) begin + if (commit_ack_i[i]) begin + instret++; + end end + instret_d = instret; end - instret_d = instret; end // --------------------------- @@ -807,22 +813,6 @@ module csr_regfile #( end end - // calculate the next PC based on the current one - always_comb begin : next_pc_calc - automatic logic [63:0] pc; - automatic logic [63:0] branch_target; - automatic logic branch_taken; - automatic logic is_compressed; - // we only need to check the 0th instruction as in single-step mode we are only retiring one instruction - pc = commit_instr_i[0].pc; - branch_taken = commit_instr_i[0].bp.valid & commit_instr_i[0].bp.predict_taken; - is_compressed = commit_instr_i[0].is_compressed; - branch_target = commit_instr_i[0].bp.predict_address; - // TODO(zarubaf) this adder can potentially be saved, the next address has been - // calculated a couple of times down the pipeline - next_pc = (branch_taken ? branch_target : (is_compressed ? pc + 'h2 : pc + 'h4)); - end - // ------------------- // Output Assignments // ------------------- diff --git a/src/debug/dm_csrs.sv b/src/debug/dm_csrs.sv index 3622eb3bad..aee8177abd 100644 --- a/src/debug/dm_csrs.sv +++ b/src/debug/dm_csrs.sv @@ -24,14 +24,11 @@ module dm_csrs #( input logic dmi_rst_ni, // Debug Module Interface reset, active-low input logic dmi_req_valid_i, output logic dmi_req_ready_o, - input logic [ 6:0] dmi_req_bits_addr_i, - input logic [ 1:0] dmi_req_bits_op_i, // 0 = nop, 1 = read, 2 = write - input logic [31:0] dmi_req_bits_data_i, + input dm::dmi_req_t dmi_req_i, // every request needs a response one cycle later output logic dmi_resp_valid_o, input logic dmi_resp_ready_i, - output logic [ 1:0] dmi_resp_bits_resp_o, - output logic [31:0] dmi_resp_bits_data_o, + output dm::dmi_resp_t dmi_resp_o, // global ctrl output logic ndmreset_o, // non-debug module reset, active-high output logic dmactive_o, // 1 -> debug-module is active, 0 -> synchronous re-set @@ -47,9 +44,9 @@ module dm_csrs #( output logic cmd_valid_o, // debugger is writing to the command field output dm::command_t cmd_o, // abstract command - input logic [NrHarts-1:0] cmderror_valid_i, // an error occured - input dm::cmderr_t [NrHarts-1:0] cmderror_i, // this error occured - input logic [NrHarts-1:0] cmdbusy_i, // cmd is currently busy executing + input logic cmderror_valid_i, // an error occured + input dm::cmderr_t cmderror_i, // this error occured + input logic cmdbusy_i, // cmd is currently busy executing output logic [dm::ProgBufSize-1:0][31:0] progbuf_o, // to system bus output logic [dm::DataCount-1:0][31:0] data_o, @@ -80,7 +77,7 @@ module dm_csrs #( // the amount of bits we need to represent all harts localparam HartSelLen = (NrHarts == 1) ? 1 : $clog2(NrHarts); dm::dtm_op_t dtm_op; - assign dtm_op = dm::dtm_op_t'(dmi_req_bits_op_i); + assign dtm_op = dm::dtm_op_t'(dmi_req_i.op); logic resp_queue_full; logic resp_queue_empty; @@ -121,7 +118,7 @@ module dm_csrs #( logic [NrHarts-1:0] selected_hart; // a successful response returns zero - assign dmi_resp_bits_resp_o = dm::DTM_SUCCESS; + assign dmi_resp_o.resp = dm::DTM_SUCCESS; assign dmi_resp_valid_o = ~resp_queue_empty; assign dmi_req_ready_o = ~resp_queue_full; assign resp_queue_push = dmi_req_valid_i & dmi_req_ready_o; @@ -171,7 +168,7 @@ module dm_csrs #( abstractcs = '0; abstractcs.datacount = dm::DataCount; abstractcs.progbufsize = dm::ProgBufSize; - abstractcs.busy = cmdbusy_i[selected_hart]; + abstractcs.busy = cmdbusy_i; abstractcs.cmderr = cmderr_q; // abstractautoexec @@ -197,14 +194,14 @@ module dm_csrs #( // reads if (dmi_req_ready_o && dmi_req_valid_i && dtm_op == dm::DTM_READ) begin - unique case ({1'b0, dmi_req_bits_addr_i}) inside + unique case ({1'b0, dmi_req_i.addr}) inside [(dm::Data0):DataEnd]: begin if (dm::DataCount > 0) begin - resp_queue_data = data_q[dmi_req_bits_addr_i[4:0]]; + resp_queue_data = data_q[dmi_req_i.addr[4:0]]; end if (!cmdbusy_i) begin // check whether we need to re-execute the command (just give a cmd_valid) - cmd_valid_o = abstractauto_q.autoexecdata[dmi_req_bits_addr_i[3:0] - dm::Data0]; + cmd_valid_o = abstractauto_q.autoexecdata[dmi_req_i.addr[3:0] - dm::Data0]; end end dm::DMControl: resp_queue_data = dmcontrol_q; @@ -215,10 +212,10 @@ module dm_csrs #( // command is read-only dm::Command: resp_queue_data = '0; [(dm::ProgBuf0):ProgBufEnd]: begin - resp_queue_data = progbuf_q[dmi_req_bits_addr_i[4:0]]; + resp_queue_data = progbuf_q[dmi_req_i.addr[4:0]]; if (!cmdbusy_i) begin // check whether we need to re-execute the command (just give a cmd_valid) - cmd_valid_o = abstractauto_q.autoexecprogbuf[dmi_req_bits_addr_i[3:0]]; + cmd_valid_o = abstractauto_q.autoexecprogbuf[dmi_req_i.addr[3:0]]; end end dm::HaltSum0: resp_queue_data = haltsum0; @@ -269,23 +266,23 @@ module dm_csrs #( // write if (dmi_req_ready_o && dmi_req_valid_i && dtm_op == dm::DTM_WRITE) begin - unique case (dm::dm_csr_t'({1'b0, dmi_req_bits_addr_i})) inside + unique case (dm::dm_csr_t'({1'b0, dmi_req_i.addr})) inside [(dm::Data0):DataEnd]: begin // attempts to write them while busy is set does not change their value if (!cmdbusy_i && dm::DataCount > 0) begin - data_d[dmi_req_bits_addr_i[4:0]] = dmi_req_bits_data_i; + data_d[dmi_req_i.addr[4:0]] = dmi_req_i.data; // check whether we need to re-execute the command (just give a cmd_valid) - cmd_valid_o = abstractauto_q.autoexecdata[dmi_req_bits_addr_i[3:0] - dm::Data0]; + cmd_valid_o = abstractauto_q.autoexecdata[dmi_req_i.addr[3:0] - dm::Data0]; end end dm::DMControl: begin automatic dm::dmcontrol_t dmcontrol; - dmcontrol = dm::dmcontrol_t'(dmi_req_bits_data_i); + dmcontrol = dm::dmcontrol_t'(dmi_req_i.data); // clear the havreset of the selected hart if (dmcontrol.ackhavereset) begin havereset_d[selected_hart] = 1'b0; end - dmcontrol_d = dmi_req_bits_data_i; + dmcontrol_d = dmi_req_i.data; end dm::DMStatus:; // write are ignored to R/O register dm::Hartinfo:; // hartinfo is R/O @@ -296,7 +293,7 @@ module dm_csrs #( // them. No abstract command is started until the value is // reset to 0. automatic dm::abstractcs_t a_abstractcs; - a_abstractcs = dm::abstractcs_t'(dmi_req_bits_data_i); + a_abstractcs = dm::abstractcs_t'(dmi_req_i.data); // reads during abstract command execution are not allowed if (!cmdbusy_i) begin cmderr_d = dm::cmderr_t'(~a_abstractcs.cmderr & cmderr_q); @@ -309,7 +306,7 @@ module dm_csrs #( // writes are ignored if a command is already busy if (!cmdbusy_i) begin cmd_valid_o = 1'b1; - command_d = dm::command_t'(dmi_req_bits_data_i); + command_d = dm::command_t'(dmi_req_i.data); // if there was an attempted to write during a busy execution // and the cmderror field is zero set the busy error end else if (cmderr_q == dm::CmdErrNone) begin @@ -319,7 +316,7 @@ module dm_csrs #( dm::AbstractAuto: begin // this field can only be written legally when there is no command executing if (!cmdbusy_i) begin - abstractauto_d = {dmi_req_bits_data_i[31:16], 4'b0, dmi_req_bits_data_i[11:0]}; + abstractauto_d = {dmi_req_i.data[31:16], 4'b0, dmi_req_i.data[11:0]}; end else if (cmderr_q == dm::CmdErrNone) begin cmderr_d = dm::CmdErrBusy; end @@ -327,10 +324,10 @@ module dm_csrs #( [(dm::ProgBuf0):ProgBufEnd]: begin // attempts to write them while busy is set does not change their value if (!cmdbusy_i) begin - progbuf_d[dmi_req_bits_addr_i[4:0]] = dmi_req_bits_data_i; + progbuf_d[dmi_req_i.addr[4:0]] = dmi_req_i.data; // check whether we need to re-execute the command (just give a cmd_valid) // this should probably throw an error if executed during another command was busy - cmd_valid_o = abstractauto_q.autoexecprogbuf[dmi_req_bits_addr_i[3:0]]; + cmd_valid_o = abstractauto_q.autoexecprogbuf[dmi_req_i.addr[3:0]]; end end dm::SBCS: begin @@ -338,7 +335,7 @@ module dm_csrs #( if (sbbusy_i) begin sbcs_d.sbbusyerror = 1'b1; end begin - automatic dm::sbcs_t sbcs = dm::sbcs_t'(dmi_req_bits_data_i); + automatic dm::sbcs_t sbcs = dm::sbcs_t'(dmi_req_i.data); sbcs_d = sbcs; // R/W1C sbcs_d.sbbusyerror = sbcs_q.sbbusyerror & (~sbcs.sbbusyerror); @@ -350,7 +347,7 @@ module dm_csrs #( if (sbbusy_i) begin sbcs_d.sbbusyerror = 1'b1; end begin - sbaddr_d[31:0] = dmi_req_bits_data_i; + sbaddr_d[31:0] = dmi_req_i.data; sbaddress_write_valid_o = (sbcs_q.sberror == '0); end end @@ -359,7 +356,7 @@ module dm_csrs #( if (sbbusy_i) begin sbcs_d.sbbusyerror = 1'b1; end begin - sbaddr_d[63:32] = dmi_req_bits_data_i; + sbaddr_d[63:32] = dmi_req_i.data; end end dm::SBData0: begin @@ -367,7 +364,7 @@ module dm_csrs #( if (sbbusy_i) begin sbcs_d.sbbusyerror = 1'b1; end begin - sbdata_d[31:0] = dmi_req_bits_data_i; + sbdata_d[31:0] = dmi_req_i.data; sbdata_write_valid_o = (sbcs_q.sberror == '0); end end @@ -376,15 +373,15 @@ module dm_csrs #( if (sbbusy_i) begin sbcs_d.sbbusyerror = 1'b1; end begin - sbdata_d[63:32] = dmi_req_bits_data_i; + sbdata_d[63:32] = dmi_req_i.data; end end default:; endcase end // hart threw a command error and has precedence over bus writes - if (cmderror_valid_i[selected_hart]) begin - cmderr_d = cmderror_i[selected_hart]; + if (cmderror_valid_i) begin + cmderr_d = cmderror_i; end // update data registers @@ -448,7 +445,7 @@ module dm_csrs #( assign data_o = data_q; assign resp_queue_pop = dmi_resp_ready_i & ~resp_queue_empty; - + // response FIFO fifo_v2 #( .dtype ( logic [31:0] ), @@ -464,7 +461,7 @@ module dm_csrs #( .alm_empty_o ( ), .data_i ( resp_queue_data ), .push_i ( resp_queue_push ), - .data_o ( dmi_resp_bits_data_o ), + .data_o ( dmi_resp_o.data ), .pop_i ( resp_queue_pop ) ); diff --git a/src/debug/dm_mem.sv b/src/debug/dm_mem.sv index fd56551d71..36d1f63be8 100644 --- a/src/debug/dm_mem.sv +++ b/src/debug/dm_mem.sv @@ -22,7 +22,7 @@ module dm_mem #( input logic clk_i, // Clock input logic dmactive_i, // debug module reset - output logic debug_req_o, + output logic [NrHarts-1:0] debug_req_o, input logic [19:0] hartsel_i, // from Ctrl and Status register input logic [NrHarts-1:0] haltreq_i, diff --git a/src/debug/dm_pkg.sv b/src/debug/dm_pkg.sv index 340e8a2ee2..df7f9ff67e 100644 --- a/src/debug/dm_pkg.sv +++ b/src/debug/dm_pkg.sv @@ -203,4 +203,15 @@ package dm; localparam logic[1:0] DTM_SUCCESS = 2'h0; + typedef struct packed { + logic [6:0] addr; + dtm_op_t op; + logic [31:0] data; + } dmi_req_t; + + typedef struct packed { + logic [31:0] data; + logic [1:0] resp; + } dmi_resp_t; + endpackage diff --git a/src/debug/dm_top.sv b/src/debug/dm_top.sv index fc935c7bd9..5e34316775 100644 --- a/src/debug/dm_top.sv +++ b/src/debug/dm_top.sv @@ -38,14 +38,11 @@ module dm_top #( input logic dmi_rst_ni, input logic dmi_req_valid_i, output logic dmi_req_ready_o, - input logic [ 6:0] dmi_req_bits_addr_i, - input logic [ 1:0] dmi_req_bits_op_i, // 0 = nop, 1 = read, 2 = write - input logic [31:0] dmi_req_bits_data_i, + input dm::dmi_req_t dmi_req_i, output logic dmi_resp_valid_o, input logic dmi_resp_ready_i, - output logic [ 1:0] dmi_resp_bits_resp_o, - output logic [31:0] dmi_resp_bits_data_o + output dm::dmi_resp_t dmi_resp_o ); // Debug CSRs @@ -65,9 +62,9 @@ module dm_top #( logic [63:0] wdata; logic [63:0] rdata; - logic [NrHarts-1:0] cmderror_valid; - dm::cmderr_t [NrHarts-1:0] cmderror; - logic [NrHarts-1:0] cmdbusy; + logic cmderror_valid; + dm::cmderr_t cmderror; + logic cmdbusy; logic [dm::ProgBufSize-1:0][31:0] progbuf; logic [dm::DataCount-1:0][31:0] data_csrs_mem; logic [dm::DataCount-1:0][31:0] data_mem_csrs; @@ -102,16 +99,13 @@ module dm_top #( .clk_i ( clk_i ), .rst_ni ( rst_ni ), .testmode_i ( testmode_i ), - .dmi_rst_ni ( dmi_rst_ni ), - .dmi_req_valid_i ( dmi_req_valid_i ), - .dmi_req_ready_o ( dmi_req_ready_o ), - .dmi_req_bits_addr_i ( dmi_req_bits_addr_i ), - .dmi_req_bits_op_i ( dmi_req_bits_op_i ), - .dmi_req_bits_data_i ( dmi_req_bits_data_i ), - .dmi_resp_valid_o ( dmi_resp_valid_o ), - .dmi_resp_ready_i ( dmi_resp_ready_i ), - .dmi_resp_bits_resp_o ( dmi_resp_bits_resp_o ), - .dmi_resp_bits_data_o ( dmi_resp_bits_data_o ), + .dmi_rst_ni, + .dmi_req_valid_i, + .dmi_req_ready_o, + .dmi_req_i, + .dmi_resp_valid_o, + .dmi_resp_ready_i, + .dmi_resp_o, .ndmreset_o ( ndmreset_o ), .dmactive_o ( dmactive_o ), .hartsel_o ( hartsel ), diff --git a/src/debug/dmi_cdc.sv b/src/debug/dmi_cdc.sv index 0f21889658..98b15a8604 100644 --- a/src/debug/dmi_cdc.sv +++ b/src/debug/dmi_cdc.sv @@ -18,498 +18,55 @@ */ module dmi_cdc ( // JTAG side (master side) - input logic tck_i, - input logic trst_ni, + input logic tck_i, + input logic trst_ni, - input logic mem_valid_i, - output logic mem_gnt_o, - input logic [6:0] mem_addr_i, - input logic mem_we_i, - input logic [31:0] mem_wdata_i, - output logic [31:0] mem_rdata_o, - output logic mem_rvalid_o, + input dm::dmi_req_t jtag_dmi_req_i, + output logic jtag_dmi_ready_o, + input logic jtag_dmi_valid_i, - // Memory -> Slave side - input logic clk_i, - input logic rst_ni, + output dm::dmi_resp_t jtag_dmi_resp_o, + output logic jtag_dmi_valid_o, + input logic jtag_dmi_ready_i, - output logic dmi_req_valid_o, - input logic dmi_req_ready_i, + // core side (slave side) + input logic clk_i, + input logic rst_ni, - output logic [ 6:0] dmi_req_bits_addr_o, - output logic [ 1:0] dmi_req_bits_op_o, - output logic [31:0] dmi_req_bits_data_o, + output dm::dmi_req_t core_dmi_req_o, + output logic core_dmi_valid_o, + input logic core_dmi_ready_i, - input logic dmi_resp_valid_i, - output logic dmi_resp_ready_o, - input logic [ 1:0] dmi_resp_bits_resp_i, - input logic [31:0] dmi_resp_bits_data_i -); - - logic mem_we; - // we will always be ready to receive the request we made - assign dmi_resp_ready_o = 1'b1; - // very "cheap" protocol conversion - assign dmi_req_bits_op_o = (mem_we) ? dm::DTM_WRITE : dm::DTM_READ; - - localparam int unsigned AddrWidth = 7; - localparam int unsigned DataWidth = 32; - - logic cdc_req_a; - logic cdc_ack_a; - logic [AddrWidth-1:0] cdc_addr_a; - logic cdc_we_a; - logic [DataWidth/8-1:0] cdc_be_a; - logic [DataWidth-1:0] cdc_wdata_a; - logic cdc_clear_a; - logic cdc_rreq_a; - logic cdc_rack_a; - logic [DataWidth-1:0] cdc_rdata_a; - logic cdc_rerror_a; - - // lets re-use most of the debug facilities which are already in PULP - dmi_cdc_jtag #( - .ADDR_WIDTH (AddrWidth), - .DATA_WIDTH (DataWidth) - ) i_dmi_cdc_jtag ( - .tck_i, - .trst_ni, - .mem_req_i ( mem_valid_i ), - .mem_gnt_o, - .mem_addr_i, - .mem_we_i, - .mem_be_i ( '1 ), - .mem_wdata_i, - .mem_rdata_o, - .mem_rvalid_o, - // we are not managing any errors here - // a more elaborate implementation should probably handle this more gracefully - .mem_rerror_o ( ), - .mem_clear_i ( 1'b0 ), - .cdc_req_ao ( cdc_req_a ), - .cdc_ack_ai ( cdc_ack_a ), - .cdc_addr_ao ( cdc_addr_a ), - .cdc_we_ao ( cdc_we_a ), - .cdc_be_ao ( cdc_be_a ), - .cdc_wdata_ao ( cdc_wdata_a ), - .cdc_clear_ao ( cdc_clear_a ), - .cdc_rreq_ai ( cdc_rreq_a ), - .cdc_rack_ao ( cdc_rack_a ), - .cdc_rdata_ai ( cdc_rdata_a ), - .cdc_rerror_ai ( cdc_rerror_a ) - ); - - dmi_cdc_mem #( - .ADDR_WIDTH (AddrWidth), - .DATA_WIDTH (DataWidth) - ) i_dmi_cdc_mem ( - .clk_i, - .rst_ni, - .mem_req_o ( dmi_req_valid_o ), - .mem_gnt_i ( dmi_req_ready_i ), - .mem_addr_o ( dmi_req_bits_addr_o ), - .mem_we_o ( mem_we ), - // don't care we always write whole words - .mem_be_o ( ), - .mem_wdata_o ( dmi_req_bits_data_o ), - .mem_rdata_i ( dmi_resp_bits_data_i ), - .mem_rvalid_i ( dmi_resp_valid_i ), - // don't care about clearing an error flag - // that is handled differently in the RISC-V implementation - .mem_rerror_i ( 1'b0 ), - .mem_clear_o ( ), - .cdc_req_ai ( cdc_req_a ), - .cdc_ack_ao ( cdc_ack_a ), - .cdc_addr_ai ( cdc_addr_a ), - .cdc_we_ai ( cdc_we_a ), - .cdc_be_ai ( cdc_be_a ), - .cdc_wdata_ai ( cdc_wdata_a ), - .cdc_clear_ai ( cdc_clear_a ), - .cdc_rreq_ao ( cdc_rreq_a ), - .cdc_rack_ai ( cdc_rack_a ), - .cdc_rdata_ao ( cdc_rdata_a ), - .cdc_rerror_ao ( cdc_rerror_a ) - ); -endmodule - -module dmi_cdc_jtag #( - parameter int unsigned ADDR_WIDTH = 32, - parameter int unsigned DATA_WIDTH = 64 -)( - // JTAG side - input logic tck_i, - input logic trst_ni, - - input logic mem_req_i, - output logic mem_gnt_o, - input logic [ADDR_WIDTH-1:0] mem_addr_i, - input logic mem_we_i, - input logic [DATA_WIDTH/8-1:0] mem_be_i, - input logic [DATA_WIDTH-1:0] mem_wdata_i, - output logic [DATA_WIDTH-1:0] mem_rdata_o, - output logic mem_rvalid_o, - output logic mem_rerror_o, - - input logic mem_clear_i, - - // CDC side - output logic cdc_req_ao, - input logic cdc_ack_ai, - output logic [ADDR_WIDTH-1:0] cdc_addr_ao, - output logic cdc_we_ao, - output logic [DATA_WIDTH/8-1:0] cdc_be_ao, - output logic [DATA_WIDTH-1:0] cdc_wdata_ao, - output logic cdc_clear_ao, - input logic cdc_rreq_ai, - output logic cdc_rack_ao, - input logic [DATA_WIDTH-1:0] cdc_rdata_ai, - input logic cdc_rerror_ai - ); - - enum logic [1:0] { IDLE, WAIT_ACK_LOW, WAIT_ACK_HIGH, READY_ACK_LOW } req_state_p, req_state_n; - enum logic [0:0] { RIDLE, WAIT_REQ_LOW } resp_state_p, resp_state_n; - - logic [ADDR_WIDTH-1:0] cdc_addr_p; - logic cdc_we_p; - logic [DATA_WIDTH/8-1:0] cdc_be_p; - logic [DATA_WIDTH-1:0] cdc_wdata_p; - - logic cdc_clear_p; - - logic cdc_ack; - logic cdc_rreq; - - always_comb - begin - req_state_n = req_state_p; - - mem_gnt_o = 1'b0; - cdc_req_ao = 1'b0; - - unique case (req_state_p) - IDLE: begin - if (mem_req_i) begin - req_state_n = WAIT_ACK_HIGH; - - mem_gnt_o = 1'b1; - end - end - - WAIT_ACK_HIGH: begin - cdc_req_ao = 1'b1; - - if (cdc_ack) begin - req_state_n = WAIT_ACK_LOW; - end - end - - WAIT_ACK_LOW: begin - if (mem_req_i) - mem_gnt_o = 1'b1; - - if (~cdc_ack) begin - if (mem_req_i) - req_state_n = WAIT_ACK_HIGH; - else - req_state_n = IDLE; - end else begin - if (mem_req_i) - req_state_n = READY_ACK_LOW; - end - end - - READY_ACK_LOW: begin - if (~cdc_ack) begin - req_state_n = WAIT_ACK_HIGH; - end - end - - default:; // make unique case happy during reset - endcase - end - - always_comb - begin - resp_state_n = resp_state_p; - - mem_rvalid_o = 1'b0; - cdc_rack_ao = 1'b0; - - unique case (resp_state_p) - RIDLE: begin - if (cdc_rreq) begin - resp_state_n = WAIT_REQ_LOW; - mem_rvalid_o = 1'b1; - end - end - - WAIT_REQ_LOW: begin - cdc_rack_ao = 1'b1; - - if (~cdc_rreq) begin - resp_state_n = RIDLE; - end - end - - default:; // make unique case happy during reset - endcase - end - - always_ff @(posedge tck_i, negedge trst_ni) - begin - if (~trst_ni) begin - req_state_p <= IDLE; - resp_state_p <= RIDLE; - - cdc_addr_p <= '0; - cdc_we_p <= '0; - cdc_be_p <= '0; - cdc_wdata_p <= '0; - cdc_clear_p <= '0; - end else begin - req_state_p <= req_state_n; - resp_state_p <= resp_state_n; - - if (mem_gnt_o) begin - cdc_addr_p <= mem_addr_i; - cdc_we_p <= mem_we_i; - cdc_be_p <= mem_be_i; - cdc_wdata_p <= mem_wdata_i; - cdc_clear_p <= mem_clear_i; - end - end - end - - assign cdc_addr_ao = cdc_addr_p; - assign cdc_we_ao = cdc_we_p; - assign cdc_be_ao = cdc_be_p; - assign cdc_wdata_ao = cdc_wdata_p; - assign cdc_clear_ao = cdc_clear_p; - - pulp_sync i_sync_ack ( - .clk_i ( tck_i ), - .rstn_i ( trst_ni ) , - .serial_i ( cdc_ack_ai ), - .serial_o ( cdc_ack ) - ); - - pulp_sync i_sync_rreq ( - .clk_i ( tck_i ), - .rstn_i ( trst_ni ) , - .serial_i ( cdc_rreq_ai ), - .serial_o ( cdc_rreq ) - ); - - assign mem_rerror_o = cdc_rerror_ai; - assign mem_rdata_o = cdc_rdata_ai; - -endmodule - -module dmi_cdc_mem #( - parameter int unsigned ADDR_WIDTH = 32, - parameter int unsigned DATA_WIDTH = 64 -)( - // mem side - input logic clk_i, - input logic rst_ni, - - output logic mem_req_o, - input logic mem_gnt_i, - output logic [ADDR_WIDTH-1:0] mem_addr_o, - output logic mem_we_o, - output logic [DATA_WIDTH/8-1:0] mem_be_o, - output logic [DATA_WIDTH-1:0] mem_wdata_o, - input logic [DATA_WIDTH-1:0] mem_rdata_i, - input logic mem_rvalid_i, - input logic mem_rerror_i, - output logic mem_clear_o, - - // CDC side - input logic cdc_req_ai, - output logic cdc_ack_ao, - input logic [ADDR_WIDTH-1:0] cdc_addr_ai, - input logic cdc_we_ai, - input logic [DATA_WIDTH/8-1:0] cdc_be_ai, - input logic [DATA_WIDTH-1:0] cdc_wdata_ai, - input logic cdc_clear_ai, - - output logic cdc_rreq_ao, - input logic cdc_rack_ai, - output logic [DATA_WIDTH-1:0] cdc_rdata_ao, - output logic cdc_rerror_ao - ); - - enum logic [1:0] { IDLE, REQUEST, WAIT_REQ_LOW } req_state_p, req_state_n; - enum logic [1:0] { RIDLE, WAIT_ACK_HIGH, WAIT_ACK_LOW } resp_state_p, resp_state_n; - - logic [ADDR_WIDTH-1:0] mem_addr_p; - logic mem_we_p; - logic [DATA_WIDTH/8-1:0] mem_be_p; - logic [DATA_WIDTH-1:0] mem_wdata_p; - logic mem_clear_p; - - logic cdc_req; - logic cdc_clear; - logic cdc_sample; - - logic cdc_rack; - logic [DATA_WIDTH-1:0] cdc_rdata_p; - logic cdc_rerror_p; - - always_comb - begin - req_state_n = req_state_p; - - cdc_ack_ao = 1'b0; - cdc_sample = 1'b0; - - mem_req_o = 1'b0; - - unique case (req_state_p) - IDLE: begin - if (cdc_req) begin - req_state_n = REQUEST; - cdc_sample = 1'b1; - end - end - - REQUEST: begin - mem_req_o = 1'b1; - cdc_ack_ao = 1'b1; - - if (mem_gnt_i) begin - req_state_n = WAIT_REQ_LOW; - end - end - - WAIT_REQ_LOW: begin - cdc_ack_ao = 1'b1; - - if (~cdc_req) begin - req_state_n = IDLE; - end - end - - default:; // make unique case happy during reset - endcase - - if (cdc_clear) - req_state_n = IDLE; - end - - always_comb - begin - resp_state_n = resp_state_p; - cdc_rreq_ao = 1'b0; - - unique case (resp_state_p) - RIDLE: begin - if (mem_rvalid_i) begin - resp_state_n = WAIT_ACK_HIGH; - end - end - - WAIT_ACK_HIGH: begin - cdc_rreq_ao = 1'b1; - - if (cdc_rack) begin - resp_state_n = WAIT_ACK_LOW; - end - end - - WAIT_ACK_LOW: begin - cdc_rreq_ao = 1'b0; - - if (~cdc_rack) begin - resp_state_n = RIDLE; - end - end - - default:; // make unique case happy during reset - endcase - - if (cdc_clear) - resp_state_n = RIDLE; - end - - always_ff @(posedge clk_i, negedge rst_ni) - begin - if (~rst_ni) begin - req_state_p <= IDLE; - resp_state_p <= RIDLE; - - mem_addr_p <= '0; - mem_we_p <= '0; - mem_be_p <= '0; - mem_wdata_p <= '0; - mem_clear_p <= '0; - - cdc_rdata_p <= '0; - cdc_rerror_p <= '0; - end else begin - req_state_p <= req_state_n; - resp_state_p <= resp_state_n; - - if (cdc_sample) begin - mem_addr_p <= cdc_addr_ai; - mem_we_p <= cdc_we_ai; - mem_be_p <= cdc_be_ai; - mem_wdata_p <= cdc_wdata_ai; - mem_clear_p <= cdc_clear_ai; - end else begin - mem_clear_p <= '0; - end - - if (mem_rvalid_i) begin - cdc_rdata_p <= mem_rdata_i; - cdc_rerror_p <= mem_rerror_i; - end - end - end - - assign mem_addr_o = mem_addr_p; - assign mem_we_o = mem_we_p; - assign mem_be_o = mem_be_p; - assign mem_wdata_o = mem_wdata_p; - assign mem_clear_o = mem_clear_p; - - assign cdc_rdata_ao = cdc_rdata_p; - assign cdc_rerror_ao = cdc_rerror_p; - - pulp_sync i_sync_req ( - .clk_i ( clk_i ), - .rstn_i ( rst_ni ) , - .serial_i ( cdc_req_ai ), - .serial_o ( cdc_req ) + input dm::dmi_resp_t core_dmi_resp_i, + output logic core_dmi_ready_o, + input logic core_dmi_valid_i ); - pulp_sync i_sync_clear ( - .clk_i ( clk_i ), - .rstn_i ( rst_ni ), - .serial_i ( cdc_clear_ai ), - .serial_o ( cdc_clear ) + cdc_2phase #(.T(dm::dmi_req_t)) i_cdc_req ( + .src_rst_ni ( trst_ni ), + .src_clk_i ( tck_i ), + .src_data_i ( jtag_dmi_req_i ), + .src_valid_i ( jtag_dmi_valid_i ), + .src_ready_o ( jtag_dmi_ready_o ), + + .dst_rst_ni ( rst_ni ), + .dst_clk_i ( clk_i ), + .dst_data_o ( core_dmi_req_o ), + .dst_valid_o ( core_dmi_valid_o ), + .dst_ready_i ( core_dmi_ready_i ) ); - pulp_sync i_sync_rack ( - .clk_i ( clk_i ), - .rstn_i ( rst_ni ) , - .serial_i ( cdc_rack_ai ), - .serial_o ( cdc_rack ) + cdc_2phase #(.T(dm::dmi_resp_t)) i_cdc_resp ( + .src_rst_ni ( rst_ni ), + .src_clk_i ( clk_i ), + .src_data_i ( core_dmi_resp_i ), + .src_valid_i ( core_dmi_valid_i ), + .src_ready_o ( core_dmi_ready_o ), + + .dst_rst_ni ( trst_ni ), + .dst_clk_i ( tck_i ), + .dst_data_o ( jtag_dmi_resp_o ), + .dst_valid_o ( jtag_dmi_valid_o ), + .dst_ready_i ( jtag_dmi_ready_i ) ); - - //---------------------------------------------------------------------------- - // Assertions - //---------------------------------------------------------------------------- - -`ifndef SYNTHESIS -`ifndef verilator - assert property ( - @(posedge clk_i) (mem_req_o) |-> (!$isunknown(mem_addr_o) && !$isunknown(mem_we_o) - && !$isunknown(mem_be_o) && !$isunknown(mem_wdata_o))) - else $warning("mem request data may never be unknown"); - - assert property ( - @(posedge clk_i) (!$isunknown(mem_gnt_i))) else $warning("memory grant may never be unknown"); -`endif -`endif endmodule diff --git a/src/debug/dmi_jtag.sv b/src/debug/dmi_jtag.sv index dc4c9859dc..430ccae51e 100644 --- a/src/debug/dmi_jtag.sv +++ b/src/debug/dmi_jtag.sv @@ -17,27 +17,25 @@ */ module dmi_jtag ( - input logic clk_i, // DMI Clock - input logic rst_ni, // Asynchronous reset active low - - output logic dmi_rst_no, // hard reset - - output logic dmi_req_valid_o, - input logic dmi_req_ready_i, - output logic [ 6:0] dmi_req_bits_addr_o, - output logic [ 1:0] dmi_req_bits_op_o, // 0 = nop, 1 = read, 2 = write - output logic [31:0] dmi_req_bits_data_o, - input logic dmi_resp_valid_i, - output logic dmi_resp_ready_o, - input logic [ 1:0] dmi_resp_bits_resp_i, - input logic [31:0] dmi_resp_bits_data_i, - - input logic tck_i, // JTAG test clock pad - input logic tms_i, // JTAG test mode select pad - input logic trst_ni, // JTAG test reset pad - input logic td_i, // JTAG test data input pad - output logic td_o, // JTAG test data output pad - output logic tdo_oe_o // Data out output enable + input logic clk_i, // DMI Clock + input logic rst_ni, // Asynchronous reset active low + + output logic dmi_rst_no, // hard reset + + output dm::dmi_req_t dmi_req_o, + output logic dmi_req_valid_o, + input logic dmi_req_ready_i, + + input dm::dmi_resp_t dmi_resp_i, + output logic dmi_resp_ready_o, + input logic dmi_resp_valid_i, + + input logic tck_i, // JTAG test clock pad + input logic tms_i, // JTAG test mode select pad + input logic trst_ni, // JTAG test reset pad + input logic td_i, // JTAG test data input pad + output logic td_o, // JTAG test data output pad + output logic tdo_oe_o // Data out output enable ); assign dmi_rst_no = 1'b1; @@ -51,13 +49,13 @@ module dmi_jtag ( logic dmi_tdi; logic dmi_tdo; - logic mem_valid; - logic mem_gnt; - logic [6:0] mem_addr; - logic mem_we; - logic [31:0] mem_wdata; - logic [31:0] mem_rdata; - logic mem_rvalid; + dm::dmi_req_t dmi_req; + logic dmi_req_ready; + logic dmi_req_valid; + + dm::dmi_resp_t dmi_resp; + logic dmi_resp_valid; + logic dmi_resp_ready; typedef struct packed { logic [6:0] address; @@ -77,10 +75,12 @@ module dmi_jtag ( logic [31:0] data_d, data_q; dmi_t dmi; - assign dmi = dmi_t'(dr_q); - assign mem_addr = address_q; - assign mem_wdata = data_q; - assign mem_we = (state_q == Write); + assign dmi = dmi_t'(dr_q); + assign dmi_req.addr = address_q; + assign dmi_req.data = data_q; + assign dmi_req.op = (state_q == Write) ? dm::DTM_WRITE : dm::DTM_READ; + // we'will always be ready to accept the data we requested + assign dmi_resp_ready = 1'b1; logic error_dmi_busy; dmi_error_t error_d, error_q; @@ -93,7 +93,7 @@ module dmi_jtag ( data_d = data_q; error_d = error_q; - mem_valid = 1'b0; + dmi_req_valid = 1'b0; case (state_q) Idle: begin @@ -112,31 +112,31 @@ module dmi_jtag ( end Read: begin - mem_valid = 1'b1; - if (mem_gnt) begin + dmi_req_valid = 1'b1; + if (dmi_req_ready) begin state_d = WaitReadValid; end end WaitReadValid: begin // load data into register and shift out - if (mem_rvalid) begin - data_d = mem_rdata; + if (dmi_resp_valid) begin + data_d = dmi_resp.data; state_d = Idle; end end Write: begin - mem_valid = 1'b1; + dmi_req_valid = 1'b1; // got a valid answer go back to idle - if (mem_gnt) begin + if (dmi_req_ready) begin state_d = Idle; end end WaitWriteValid: begin // just wait for idle here - if (mem_rvalid) begin + if (dmi_resp_valid) begin state_d = Idle; end end @@ -237,26 +237,21 @@ module dmi_jtag ( // JTAG side (master side) .tck_i, .trst_ni, - - .mem_valid_i ( mem_valid ), - .mem_gnt_o ( mem_gnt ), - .mem_addr_i ( mem_addr ), - .mem_we_i ( mem_we ), - .mem_wdata_i ( mem_wdata ), - .mem_rdata_o ( mem_rdata ), - .mem_rvalid_o ( mem_rvalid ), - + .jtag_dmi_req_i ( dmi_req ), + .jtag_dmi_ready_o ( dmi_req_ready ), + .jtag_dmi_valid_i ( dmi_req_valid ), + .jtag_dmi_resp_o ( dmi_resp ), + .jtag_dmi_valid_o ( dmi_resp_valid ), + .jtag_dmi_ready_i ( dmi_resp_ready ), + // core side .clk_i, .rst_ni, - .dmi_req_valid_o, - .dmi_req_ready_i, - .dmi_req_bits_addr_o, - .dmi_req_bits_op_o, - .dmi_req_bits_data_o, - .dmi_resp_valid_i, - .dmi_resp_ready_o, - .dmi_resp_bits_resp_i, - .dmi_resp_bits_data_i + .core_dmi_req_o ( dmi_req_o ), + .core_dmi_valid_o ( dmi_req_valid_o ), + .core_dmi_ready_i ( dmi_req_ready_i ), + .core_dmi_resp_i ( dmi_resp_i ), + .core_dmi_ready_o ( dmi_resp_ready_o ), + .core_dmi_valid_i ( dmi_resp_valid_i ) ); endmodule diff --git a/src/decoder.sv b/src/decoder.sv index 815d10d1b7..efb31c5ad8 100644 --- a/src/decoder.sv +++ b/src/decoder.sv @@ -390,18 +390,22 @@ module decoder ( endcase end - `ifdef ENABLE_ATOMICS riscv::OpcodeAmo: begin // we are going to use the load unit for AMOs - instruction_o.fu = LOAD; - instruction_o.rd[4:0] = instr.stype.imm0; - instruction_o.rs1[4:0] = instr.itype.rs1; + instruction_o.fu = STORE; + instruction_o.rs1[4:0] = instr.atype.rs1; + instruction_o.rs2[4:0] = instr.atype.rs2; + instruction_o.rd[4:0] = instr.atype.rd; + // TODO(zarubaf): Ordering // words if (instr.stype.funct3 == 3'h2) begin unique case (instr.instr[31:27]) 5'h0: instruction_o.op = AMO_ADDW; 5'h1: instruction_o.op = AMO_SWAPW; - 5'h2: instruction_o.op = AMO_LRW; + 5'h2: begin + instruction_o.op = AMO_LRW; + if (instr.atype.rs2 != 0) illegal_instr = 1'b1; + end 5'h3: instruction_o.op = AMO_SCW; 5'h4: instruction_o.op = AMO_XORW; 5'h8: instruction_o.op = AMO_ORW; @@ -417,7 +421,10 @@ module decoder ( unique case (instr.instr[31:27]) 5'h0: instruction_o.op = AMO_ADDD; 5'h1: instruction_o.op = AMO_SWAPD; - 5'h2: instruction_o.op = AMO_LRD; + 5'h2: begin + instruction_o.op = AMO_LRD; + if (instr.atype.rs2 != 0) illegal_instr = 1'b1; + end 5'h3: instruction_o.op = AMO_SCD; 5'h4: instruction_o.op = AMO_XORD; 5'h8: instruction_o.op = AMO_ORD; @@ -432,7 +439,6 @@ module decoder ( illegal_instr = 1'b1; end end - `endif // -------------------------------- // Control Flow Instructions diff --git a/src/ex_stage.sv b/src/ex_stage.sv index 34ed57fa21..ac14f98cad 100644 --- a/src/ex_stage.sv +++ b/src/ex_stage.sv @@ -58,6 +58,7 @@ module ex_stage #( output logic lsu_commit_ready_o, // commit queue is ready to accept another commit request output exception_t lsu_exception_o, output logic no_st_pending_o, + input logic amo_valid_commit_i, // CSR output logic csr_ready_o, input logic csr_valid_i, @@ -91,7 +92,8 @@ module ex_stage #( // interface to dcache input dcache_req_o_t [2:0] dcache_req_ports_i, output dcache_req_i_t [2:0] dcache_req_ports_o, - + output amo_req_t amo_req_o, // request to cache subsytem + input amo_resp_t amo_resp_i, // response from cache subsystem // Performance counters output logic itlb_miss_o, output logic dtlb_miss_o @@ -131,8 +133,10 @@ module ex_stage #( lsu lsu_i ( .commit_i ( lsu_commit_i ), .commit_ready_o ( lsu_commit_ready_o ), - .dcache_req_ports_i ( dcache_req_ports_i ), - .dcache_req_ports_o ( dcache_req_ports_o ), + .dcache_req_ports_i, + .dcache_req_ports_o, + .amo_req_o, + .amo_resp_i, .* ); diff --git a/src/frontend/bht.sv b/src/frontend/bht.sv new file mode 100644 index 0000000000..a49f007b6f --- /dev/null +++ b/src/frontend/bht.sv @@ -0,0 +1,88 @@ +//Copyright (C) 2018 to present, +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 2.0 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-2.0. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License.// +// Author: Florian Zaruba, ETH Zurich +// Date: 08.02.2018 +// Migrated: Luis Vitorio Cargnini, IEEE +// Date: 09.06.2018 + +// branch history table - 2 bit saturation counter +module bht #( + parameter int unsigned NR_ENTRIES = 1024 +)( + input logic clk_i, + input logic rst_ni, + input logic flush_i, + input logic debug_mode_i, + + input logic [63:0] vpc_i, + input ariane_pkg::bht_update_t bht_update_i, + output ariane_pkg::bht_prediction_t bht_prediction_o +); + localparam OFFSET = 2; // we are using compressed instructions so do not use the lower 2 bits for prediction + localparam ANTIALIAS_BITS = 8; + // number of bits we should use for prediction + localparam PREDICTION_BITS = $clog2(NR_ENTRIES) + OFFSET; + + struct packed { + logic valid; + logic [1:0] saturation_counter; + } bht_d[NR_ENTRIES-1:0], bht_q[NR_ENTRIES-1:0]; + + logic [$clog2(NR_ENTRIES)-1:0] index, update_pc; + logic [1:0] saturation_counter; + + assign index = vpc_i[PREDICTION_BITS - 1:OFFSET]; + assign update_pc = bht_update_i.pc[PREDICTION_BITS - 1:OFFSET]; + // prediction assignment + assign bht_prediction_o.valid = bht_q[index].valid; + assign bht_prediction_o.taken = bht_q[index].saturation_counter == 2'b10; + assign bht_prediction_o.strongly_taken = (bht_q[index].saturation_counter == 2'b11); + always_comb begin : update_bht + bht_d = bht_q; + saturation_counter = bht_q[update_pc].saturation_counter; + + if (bht_update_i.valid && !debug_mode_i) begin + bht_d[update_pc].valid = 1'b1; + + if (saturation_counter == 2'b11) begin + // we can safely decrease it + if (~bht_update_i.taken) + bht_d[update_pc].saturation_counter = saturation_counter - 1; + // then check if it saturated in the negative regime e.g.: branch not taken + end else if (saturation_counter == 2'b00) begin + // we can safely increase it + if (bht_update_i.taken) + bht_d[update_pc].saturation_counter = saturation_counter + 1; + end else begin // otherwise we are not in any boundaries and can decrease or increase it + if (bht_update_i.taken) + bht_d[update_pc].saturation_counter = saturation_counter + 1; + else + bht_d[update_pc].saturation_counter = saturation_counter - 1; + end + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + for (int unsigned i = 0; i < NR_ENTRIES; i++) + bht_q[i] <= '0; + end else begin + // evict all entries + if (flush_i) begin + for (int i = 0; i < NR_ENTRIES; i++) begin + bht_q[i].valid <= 1'b0; + bht_q[i].saturation_counter <= 2'b10; + end + end else begin + bht_q <= bht_d; + end + end + end +endmodule diff --git a/src/frontend/btb.sv b/src/frontend/btb.sv new file mode 100644 index 0000000000..2169ac961e --- /dev/null +++ b/src/frontend/btb.sv @@ -0,0 +1,87 @@ +//Copyright (C) 2018 to present, +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 2.0 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-2.0. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 08.02.2018 +// Migrated: Luis Vitorio Cargnini, IEEE +// Date: 09.06.2018 + +// ------------------------------ +// Branch Prediction +// ------------------------------ + +// branch target buffer +module btb #( + parameter int NR_ENTRIES = 8 +)( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // flush the btb + input logic debug_mode_i, + + input logic [63:0] vpc_i, // virtual PC from IF stage + input ariane_pkg::btb_update_t btb_update_i, // update btb with this information + output ariane_pkg::btb_prediction_t btb_prediction_o // prediction from btb +); + // number of bits which are not used for indexing + localparam OFFSET = 1; // we are using compressed instructions so do use the lower 2 bits for prediction + localparam ANTIALIAS_BITS = 8; + // number of bits we should use for prediction + localparam PREDICTION_BITS = $clog2(NR_ENTRIES) + OFFSET; + // typedef for all branch target entries + // we may want to try to put a tag field that fills the rest of the PC in-order to mitigate aliasing effects + ariane_pkg::btb_prediction_t btb_d [NR_ENTRIES-1:0], btb_q [NR_ENTRIES-1:0]; + logic [$clog2(NR_ENTRIES)-1:0] index, update_pc; + + assign index = vpc_i[PREDICTION_BITS - 1:OFFSET]; + assign update_pc = btb_update_i.pc[PREDICTION_BITS - 1:OFFSET]; + + // output matching prediction + assign btb_prediction_o = btb_q[index]; + + // ------------------------- + // Update Branch Prediction + // ------------------------- + // update on a mis-predict + always_comb begin : update_branch_predict + btb_d = btb_q; + + if (btb_update_i.valid && !debug_mode_i) begin + btb_d[update_pc].valid = 1'b1; + // the target address is simply updated + btb_d[update_pc].target_address = btb_update_i.target_address; + // as is the information whether this was a compressed branch + btb_d[update_pc].is_lower_16 = btb_update_i.is_lower_16; + // check if we should invalidate this entry, this happens in case we predicted a branch + // where actually none-is (aliasing) + if (btb_update_i.clear) begin + btb_d[update_pc].valid = 1'b0; + end + end + end + + // sequential process + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + // Bias the branches to be taken upon first arrival + for (int i = 0; i < NR_ENTRIES; i++) + btb_q[i] <= '{default: 0}; + end else begin + // evict all entries + if (flush_i) begin + for (int i = 0; i < NR_ENTRIES; i++) begin + btb_q[i].valid <= 1'b0; + end + end else begin + btb_q <= btb_d; + end + end + end +endmodule diff --git a/src/frontend.sv b/src/frontend/frontend.sv similarity index 63% rename from src/frontend.sv rename to src/frontend/frontend.sv index 119ff5bb61..c8877ef917 100644 --- a/src/frontend.sv +++ b/src/frontend/frontend.sv @@ -20,6 +20,7 @@ module frontend ( input logic rst_ni, // Asynchronous reset active low input logic flush_i, // flush request for PCGEN input logic flush_bp_i, // flush branch prediction + input logic debug_mode_i, // global input input logic [63:0] boot_addr_i, // Set a new PC @@ -88,9 +89,9 @@ module frontend ( logic is_mispredict; // branch-prediction which we inject into the pipeline branchpredict_sbe_t bp_sbe; - + // fetch fifo credit system - logic fifo_valid, fifo_ready, fifo_empty, fifo_pop; + logic fifo_valid, fifo_ready, fifo_empty, fifo_pop; logic s2_eff_kill, issue_req, s2_in_flight_d, s2_in_flight_q; logic [$clog2(FETCH_FIFO_DEPTH):0] fifo_credits_d; logic [$clog2(FETCH_FIFO_DEPTH):0] fifo_credits_q; @@ -207,10 +208,12 @@ module frontend ( end // to take this jump we need a valid prediction target **speculative** - if ((rvi_jalr[i] || rvc_jalr[i]) && btb_prediction.valid) begin - bp_vaddr = btb_prediction.target_address; - taken[i+1] = 1'b1; + if ((rvi_jalr[i] || rvc_jalr[i]) && ~(rvi_call[i] || rvc_call[i])) begin bp_sbe.cf_type = BTB; + if (btb_prediction.valid) begin + bp_vaddr = btb_prediction.target_address; + taken[i+1] = 1'b1; + end end // is it a return and the RAS contains a valid prediction? **speculative** @@ -301,11 +304,11 @@ module frontend ( automatic logic [63:0] fetch_address; // check whether we come out of reset - // this is a workaround. some tools have issues - // having boot_addr_i in the asynchronous + // this is a workaround. some tools have issues + // having boot_addr_i in the asynchronous // reset assignment to npc_q, even though // boot_addr_i will be assigned a constant - // on the top-level. + // on the top-level. if (npc_rst_load_q) begin npc_d = boot_addr_i; fetch_address = boot_addr_i; @@ -314,7 +317,7 @@ module frontend ( // keep stable by default npc_d = npc_q; end - + // ------------------------------- // 1. Branch Prediction // ------------------------------- @@ -352,8 +355,8 @@ module frontend ( // On a pipeline flush start fetching from the next address // of the instruction in the commit stage if (set_pc_commit_i) begin - // we came here from a flush request of a CSR instruction, - // as CSR instructions do not exist in a compressed form + // we came here from a flush request of a CSR instruction or AMO, + // as CSR or AMO instructions do not exist in a compressed form // we can unconditionally do PC + 4 here // TODO(zarubaf) This adder can at least be merged with the one in the csr_regfile stage npc_d = pc_commit_i + 64'h4; @@ -372,36 +375,34 @@ module frontend ( // ------------------- // Credit-based fetch FIFO flow ctrl // ------------------- - - assign fifo_credits_d = (flush_i) ? FETCH_FIFO_DEPTH : - fifo_credits_q + fifo_pop + s2_eff_kill - issue_req; - - // check whether there is a request in flight that is being killed now + assign fifo_credits_d = (flush_i) ? FETCH_FIFO_DEPTH : + fifo_credits_q + fifo_pop + s2_eff_kill - issue_req; + + // check whether there is a request in flight that is being killed now // if this is the case, we need to increment the credit by 1 assign s2_eff_kill = s2_in_flight_q & icache_dreq_o.kill_s2; - assign s2_in_flight_d = (flush_i) ? 1'b0 : - (issue_req) ? 1'b1 : + assign s2_in_flight_d = (flush_i) ? 1'b0 : + (issue_req) ? 1'b1 : (icache_dreq_i.valid) ? 1'b0 : - s2_in_flight_q; + s2_in_flight_q; // only enable counter if current request is not being killed assign issue_req = if_ready & (~icache_dreq_o.kill_s1); - assign fifo_pop = fetch_ack_i & fetch_entry_valid_o; + assign fifo_pop = fetch_ack_i & fetch_entry_valid_o; assign fifo_ready = (|fifo_credits_q); assign if_ready = icache_dreq_i.ready & fifo_ready; assign icache_dreq_o.req = fifo_ready; assign fetch_entry_valid_o = ~fifo_empty; - //pragma translate_off `ifndef VERILATOR fetch_fifo_credits0 : assert property ( - @(posedge clk_i) disable iff (~rst_ni) (fifo_credits_q <= FETCH_FIFO_DEPTH)) + @(posedge clk_i) disable iff (~rst_ni) (fifo_credits_q <= FETCH_FIFO_DEPTH)) else $fatal("[frontend] fetch fifo credits must be <= FETCH_FIFO_DEPTH!"); initial begin - assert (FETCH_FIFO_DEPTH<=8) else $fatal("[frontend] fetch fifo deeper than 8 not supported"); - assert (FETCH_WIDTH==32) else $fatal("[frontend] fetch width != not supported"); - end + assert (FETCH_FIFO_DEPTH<=8) else $fatal("[frontend] fetch fifo deeper than 8 not supported"); + assert (FETCH_WIDTH==32) else $fatal("[frontend] fetch width != not supported"); + end `endif //pragma translate_on @@ -446,21 +447,25 @@ module frontend ( btb #( .NR_ENTRIES ( BTB_ENTRIES ) ) i_btb ( + .clk_i, + .rst_ni, .flush_i ( flush_bp_i ), + .debug_mode_i, .vpc_i ( icache_vaddr_q ), .btb_update_i ( btb_update ), - .btb_prediction_o ( btb_prediction ), - .* + .btb_prediction_o ( btb_prediction ) ); bht #( .NR_ENTRIES ( BHT_ENTRIES ) ) i_bht ( + .clk_i, + .rst_ni, .flush_i ( flush_bp_i ), + .debug_mode_i, .vpc_i ( icache_vaddr_q ), .bht_update_i ( bht_update ), - .bht_prediction_o ( bht_prediction ), - .* + .bht_prediction_o ( bht_prediction ) ); for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin @@ -483,256 +488,22 @@ module frontend ( ); end -fifo_v2 #( - .DEPTH ( 8 ), - .dtype ( fetch_entry_t )) -i_fetch_fifo ( - .clk_i ( clk_i ), - .rst_ni ( rst_ni ), - .flush_i ( flush_i ), - .testmode_i ( 1'b0 ), - .full_o ( ), - .empty_o ( fifo_empty ), - .alm_full_o ( ), - .alm_empty_o ( ), - .data_i ( {icache_vaddr_q, icache_data_q, bp_sbe, icache_ex_q} ), - .push_i ( fifo_valid ), - .data_o ( fetch_entry_o ), - .pop_i ( fifo_pop ) -); - - - -endmodule - -// ------------------------------ -// Instruction Scanner -// ------------------------------ -module instr_scan ( - input logic [31:0] instr_i, // expect aligned instruction, compressed or not - output logic is_rvc_o, - output logic rvi_return_o, - output logic rvi_call_o, - output logic rvi_branch_o, - output logic rvi_jalr_o, - output logic rvi_jump_o, - output logic [63:0] rvi_imm_o, - output logic rvc_branch_o, - output logic rvc_jump_o, - output logic rvc_jr_o, - output logic rvc_return_o, - output logic rvc_jalr_o, - output logic rvc_call_o, - output logic [63:0] rvc_imm_o -); - assign is_rvc_o = (instr_i[1:0] != 2'b11); - // check that rs1 is either x1 or x5 and that rs1 is not x1 or x5, TODO: check the fact about bit 7 - assign rvi_return_o = rvi_jalr_o & ~instr_i[7] & ~instr_i[19] & ~instr_i[18] & ~instr_i[16] & instr_i[15]; - assign rvi_call_o = (rvi_jalr_o | rvi_jump_o) & instr_i[7]; // TODO: check that this captures calls - // differentiates between JAL and BRANCH opcode, JALR comes from BHT - assign rvi_imm_o = (instr_i[3]) ? uj_imm(instr_i) : sb_imm(instr_i); - assign rvi_branch_o = (instr_i[6:0] == riscv::OpcodeBranch) ? 1'b1 : 1'b0; - assign rvi_jalr_o = (instr_i[6:0] == riscv::OpcodeJalr) ? 1'b1 : 1'b0; - assign rvi_jump_o = (instr_i[6:0] == riscv::OpcodeJal) ? 1'b1 : 1'b0; - // opcode JAL - assign rvc_jump_o = (instr_i[15:13] == riscv::OpcodeCJ) & is_rvc_o & (instr_i[1:0] == 2'b01); - assign rvc_jr_o = (instr_i[15:12] == 4'b1000) & (instr_i[6:2] == 5'b00000) & is_rvc_o & (instr_i[1:0] == 2'b10); - assign rvc_branch_o = ((instr_i[15:13] == riscv::OpcodeCBeqz) | (instr_i[15:13] == riscv::OpcodeCBnez)) & is_rvc_o & (instr_i[1:0] == 2'b01); - // check that rs1 is x1 or x5 - assign rvc_return_o = rvc_jr_o & ~instr_i[11] & ~instr_i[10] & ~instr_i[8] & instr_i[7]; - assign rvc_jalr_o = (instr_i[15:12] == 4'b1001) & (instr_i[6:2] == 5'b00000) & is_rvc_o; - assign rvc_call_o = rvc_jalr_o; // TODO: check that this captures calls - - // // differentiates between JAL and BRANCH opcode, JALR comes from BHT - assign rvc_imm_o = (instr_i[14]) ? {{56{instr_i[12]}}, instr_i[6:5], instr_i[2], instr_i[11:10], instr_i[4:3], 1'b0} - : {{53{instr_i[12]}}, instr_i[8], instr_i[10:9], instr_i[6], instr_i[7], instr_i[2], instr_i[11], instr_i[5:3], 1'b0}; -endmodule - -// ------------------------------ -// Branch Prediction -// ------------------------------ - -// branch target buffer -module btb #( - parameter int NR_ENTRIES = 8 -)( - input logic clk_i, // Clock - input logic rst_ni, // Asynchronous reset active low - input logic flush_i, // flush the btb - - input logic [63:0] vpc_i, // virtual PC from IF stage - input btb_update_t btb_update_i, // update btb with this information - output btb_prediction_t btb_prediction_o // prediction from btb -); - // number of bits which are not used for indexing - localparam OFFSET = 1; // we are using compressed instructions so do use the lower 2 bits for prediction - localparam ANTIALIAS_BITS = 8; - // number of bits we should use for prediction - localparam PREDICTION_BITS = $clog2(NR_ENTRIES) + OFFSET; - // typedef for all branch target entries - // we may want to try to put a tag field that fills the rest of the PC in-order to mitigate aliasing effects - btb_prediction_t btb_d [NR_ENTRIES-1:0], btb_q [NR_ENTRIES-1:0]; - logic [$clog2(NR_ENTRIES)-1:0] index, update_pc; - - assign index = vpc_i[PREDICTION_BITS - 1:OFFSET]; - assign update_pc = btb_update_i.pc[PREDICTION_BITS - 1:OFFSET]; - - // output matching prediction - assign btb_prediction_o = btb_q[index]; - - // ------------------------- - // Update Branch Prediction - // ------------------------- - // update on a mis-predict - always_comb begin : update_branch_predict - btb_d = btb_q; - - if (btb_update_i.valid) begin - btb_d[update_pc].valid = 1'b1; - // the target address is simply updated - btb_d[update_pc].target_address = btb_update_i.target_address; - // as is the information whether this was a compressed branch - btb_d[update_pc].is_lower_16 = btb_update_i.is_lower_16; - // check if we should invalidate this entry, this happens in case we predicted a branch - // where actually none-is (aliasing) - if (btb_update_i.clear) begin - btb_d[update_pc].valid = 1'b0; - end - end - end - - // sequential process - always_ff @(posedge clk_i or negedge rst_ni) begin - if (~rst_ni) begin - // Bias the branches to be taken upon first arrival - for (int i = 0; i < NR_ENTRIES; i++) - btb_q[i] <= '{default: 0}; - end else begin - // evict all entries - if (flush_i) begin - for (int i = 0; i < NR_ENTRIES; i++) begin - btb_q[i].valid <= 1'b0; - end - end else begin - btb_q <= btb_d; - end - end - end -endmodule - -// return address stack -module ras #( - parameter int unsigned DEPTH = 2 -)( - input logic clk_i, - input logic rst_ni, - input logic push_i, - input logic pop_i, - input logic [63:0] data_i, - output ras_t data_o -); - - ras_t [DEPTH-1:0] stack_d, stack_q; - - assign data_o = stack_q[0]; - - always_comb begin - stack_d = stack_q; - - // push on the stack - if (push_i) begin - stack_d[0].ra = data_i; - // mark the new return address as valid - stack_d[0].valid = 1'b1; - stack_d[DEPTH-1:1] = stack_q[DEPTH-2:0]; - end - - if (pop_i) begin - stack_d[DEPTH-2:0] = stack_q[DEPTH-1:1]; - // we popped the value so invalidate the end of the stack - stack_d[DEPTH-1].valid = 1'b0; - stack_d[DEPTH-1].ra = 'b0; - end - end - - always_ff @(posedge clk_i or negedge rst_ni) begin - if (~rst_ni) begin - stack_q <= '0; - end else begin - stack_q <= stack_d; - end - end -endmodule - -// branch history table - 2 bit saturation counter -module bht #( - parameter int unsigned NR_ENTRIES = 1024 -)( - input logic clk_i, - input logic rst_ni, - input logic flush_i, - input logic [63:0] vpc_i, - input bht_update_t bht_update_i, - output bht_prediction_t bht_prediction_o -); - localparam OFFSET = 2; // we are using compressed instructions so do not use the lower 2 bits for prediction - localparam ANTIALIAS_BITS = 8; - // number of bits we should use for prediction - localparam PREDICTION_BITS = $clog2(NR_ENTRIES) + OFFSET; - - struct packed { - logic valid; - logic [1:0] saturation_counter; - } bht_d[NR_ENTRIES-1:0], bht_q[NR_ENTRIES-1:0]; - - logic [$clog2(NR_ENTRIES)-1:0] index, update_pc; - logic [1:0] saturation_counter; - - assign index = vpc_i[PREDICTION_BITS - 1:OFFSET]; - assign update_pc = bht_update_i.pc[PREDICTION_BITS - 1:OFFSET]; - // prediction assignment - assign bht_prediction_o.valid = bht_q[index].valid; - assign bht_prediction_o.taken = bht_q[index].saturation_counter == 2'b10; - assign bht_prediction_o.strongly_taken = (bht_q[index].saturation_counter == 2'b11); - always_comb begin : update_bht - bht_d = bht_q; - saturation_counter = bht_q[update_pc].saturation_counter; - - if (bht_update_i.valid) begin - bht_d[update_pc].valid = 1'b1; - - if (saturation_counter == 2'b11) begin - // we can safely decrease it - if (~bht_update_i.taken) - bht_d[update_pc].saturation_counter = saturation_counter - 1; - // then check if it saturated in the negative regime e.g.: branch not taken - end else if (saturation_counter == 2'b00) begin - // we can safely increase it - if (bht_update_i.taken) - bht_d[update_pc].saturation_counter = saturation_counter + 1; - end else begin // otherwise we are not in any boundaries and can decrease or increase it - if (bht_update_i.taken) - bht_d[update_pc].saturation_counter = saturation_counter + 1; - else - bht_d[update_pc].saturation_counter = saturation_counter - 1; - end - end - end + fifo_v2 #( + .DEPTH ( 8 ), + .dtype ( fetch_entry_t )) + i_fetch_fifo ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( flush_i ), + .testmode_i ( 1'b0 ), + .full_o ( ), + .empty_o ( fifo_empty ), + .alm_full_o ( ), + .alm_empty_o ( ), + .data_i ( {icache_vaddr_q, icache_data_q, bp_sbe, icache_ex_q} ), + .push_i ( fifo_valid ), + .data_o ( fetch_entry_o ), + .pop_i ( fifo_pop ) + ); - always_ff @(posedge clk_i or negedge rst_ni) begin - if (~rst_ni) begin - for (int unsigned i = 0; i < NR_ENTRIES; i++) - bht_q[i] <= '0; - end else begin - // evict all entries - if (flush_i) begin - for (int i = 0; i < NR_ENTRIES; i++) begin - bht_q[i].valid <= 1'b0; - bht_q[i].saturation_counter <= 2'b10; - end - end else begin - bht_q <= bht_d; - end - end - end endmodule diff --git a/src/frontend/instr_scan.sv b/src/frontend/instr_scan.sv new file mode 100644 index 0000000000..766f911cda --- /dev/null +++ b/src/frontend/instr_scan.sv @@ -0,0 +1,66 @@ +//Copyright (C) 2018 to present, +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 2.0 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-2.0. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// Author: Florian Zaruba, ETH Zurich +// Date: 08.02.2018 +// Migrated: Luis Vitorio Cargnini, IEEE +// Date: 09.06.2018 + +// ------------------------------ +// Instruction Scanner +// ------------------------------ +module instr_scan ( + input logic [31:0] instr_i, // expect aligned instruction, compressed or not + output logic is_rvc_o, + output logic rvi_return_o, + output logic rvi_call_o, + output logic rvi_branch_o, + output logic rvi_jalr_o, + output logic rvi_jump_o, + output logic [63:0] rvi_imm_o, + output logic rvc_branch_o, + output logic rvc_jump_o, + output logic rvc_jr_o, + output logic rvc_return_o, + output logic rvc_jalr_o, + output logic rvc_call_o, + output logic [63:0] rvc_imm_o +); + assign is_rvc_o = (instr_i[1:0] != 2'b11); + // check that rs1 is either x1 or x5 and that rs1 is not x1 or x5, TODO: check the fact about bit 7 + assign rvi_return_o = rvi_jalr_o & ~instr_i[7] & ~instr_i[19] & ~instr_i[18] & ~instr_i[16] & instr_i[15]; + assign rvi_call_o = (rvi_jalr_o | rvi_jump_o) & instr_i[7]; // TODO: check that this captures calls + // differentiates between JAL and BRANCH opcode, JALR comes from BHT + assign rvi_imm_o = (instr_i[3]) ? ariane_pkg::uj_imm(instr_i) : ariane_pkg::sb_imm(instr_i); + assign rvi_branch_o = (instr_i[6:0] == riscv::OpcodeBranch) ? 1'b1 : 1'b0; + assign rvi_jalr_o = (instr_i[6:0] == riscv::OpcodeJalr) ? 1'b1 : 1'b0; + assign rvi_jump_o = (instr_i[6:0] == riscv::OpcodeJal) ? 1'b1 : 1'b0; + // opcode JAL + assign rvc_jump_o = (instr_i[15:13] == riscv::OpcodeCJ) & is_rvc_o & (instr_i[1:0] == 2'b01); + // always links to register 0 + assign rvc_jr_o = (instr_i[15:13] == riscv::OpcodeC2JalrMvAdd) + & ~instr_i[12] + & (instr_i[6:2] == 5'b00000) + & (instr_i[1:0] == 2'b10) + & is_rvc_o; + assign rvc_branch_o = ((instr_i[15:13] == riscv::OpcodeCBeqz) | (instr_i[15:13] == riscv::OpcodeCBnez)) + & (instr_i[1:0] == 2'b01) + & is_rvc_o ; + // check that rs1 is x1 or x5 + assign rvc_return_o = ~instr_i[11] & ~instr_i[10] & ~instr_i[8] & instr_i[7] & rvc_jr_o ; + // always links to register 1 e.g.: it is a jump + assign rvc_jalr_o = (instr_i[15:13] == riscv::OpcodeC2JalrMvAdd) + & instr_i[12] + & (instr_i[6:2] == 5'b00000) & is_rvc_o; + assign rvc_call_o = rvc_jalr_o; + + // // differentiates between JAL and BRANCH opcode, JALR comes from BHT + assign rvc_imm_o = (instr_i[14]) ? {{56{instr_i[12]}}, instr_i[6:5], instr_i[2], instr_i[11:10], instr_i[4:3], 1'b0} + : {{53{instr_i[12]}}, instr_i[8], instr_i[10:9], instr_i[6], instr_i[7], instr_i[2], instr_i[11], instr_i[5:3], 1'b0}; +endmodule diff --git a/src/frontend/ras.sv b/src/frontend/ras.sv new file mode 100644 index 0000000000..3fef9b803e --- /dev/null +++ b/src/frontend/ras.sv @@ -0,0 +1,58 @@ +//Copyright (C) 2018 to present, +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 2.0 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-2.0. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 08.02.2018 +// Migrated: Luis Vitorio Cargnini, IEEE +// Date: 09.06.2018 + +// return address stack +module ras #( + parameter int unsigned DEPTH = 2 +)( + input logic clk_i, + input logic rst_ni, + input logic push_i, + input logic pop_i, + input logic [63:0] data_i, + output ariane_pkg::ras_t data_o +); + + ariane_pkg::ras_t [DEPTH-1:0] stack_d, stack_q; + + assign data_o = stack_q[0]; + + always_comb begin + stack_d = stack_q; + + // push on the stack + if (push_i) begin + stack_d[0].ra = data_i; + // mark the new return address as valid + stack_d[0].valid = 1'b1; + stack_d[DEPTH-1:1] = stack_q[DEPTH-2:0]; + end + + if (pop_i) begin + stack_d[DEPTH-2:0] = stack_q[DEPTH-1:1]; + // we popped the value so invalidate the end of the stack + stack_d[DEPTH-1].valid = 1'b0; + stack_d[DEPTH-1].ra = 'b0; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + stack_q <= '0; + end else begin + stack_q <= stack_d; + end + end +endmodule diff --git a/src/issue_read_operands.sv b/src/issue_read_operands.sv index 8cf5e990cf..e88de864ce 100644 --- a/src/issue_read_operands.sv +++ b/src/issue_read_operands.sv @@ -20,7 +20,6 @@ module issue_read_operands #( )( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low - input logic test_en_i, // flush input logic flush_i, // coming from rename @@ -230,7 +229,7 @@ module issue_read_operands #( // use the zimm as operand a if (issue_instr_i.use_zimm) begin // zero extend operand a - operand_a_n = {52'b0, issue_instr_i.rs1}; + operand_a_n = {52'b0, issue_instr_i.rs1[4:0]}; end // or is it an immediate (including PC), this is not the case for a store and control flow instructions if (issue_instr_i.use_imm && (issue_instr_i.fu != STORE) && (issue_instr_i.fu != CTRL_FLOW)) begin @@ -282,7 +281,7 @@ module issue_read_operands #( // Clock and Reset .clk ( clk_i ), .rst_n ( rst_ni ), - .test_en_i ( test_en_i ), + .test_en_i ( 1'b0 ), .raddr_a_i ( issue_instr_i.rs1[4:0] ), .rdata_a_o ( operand_a_regfile ), diff --git a/src/issue_stage.sv b/src/issue_stage.sv index 560c808755..793aedaa52 100644 --- a/src/issue_stage.sv +++ b/src/issue_stage.sv @@ -19,10 +19,9 @@ module issue_stage #( parameter int unsigned NR_ENTRIES = 8, parameter int unsigned NR_WB_PORTS = 4, parameter int unsigned NR_COMMIT_PORTS = 2 - )( +)( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low - input logic test_en_i, // Test Enable input logic flush_unissued_instr_i, input logic flush_i, @@ -61,7 +60,7 @@ module issue_stage #( // write back port input logic [NR_WB_PORTS-1:0][TRANS_ID_BITS-1:0] trans_id_i, - + input branchpredict_t resolved_branch_i, input logic [NR_WB_PORTS-1:0][63:0] wbdata_i, input exception_t [NR_WB_PORTS-1:0] ex_ex_i, // exception from execute stage input logic [NR_WB_PORTS-1:0] wb_valid_i, @@ -99,44 +98,54 @@ module issue_stage #( // 1. Re-name // --------------------------------------------------------- re_name i_re_name ( - .clk_i ( clk_i ), - .rst_ni ( rst_ni ), - .flush_i ( flush_i ), - .issue_instr_i ( decoded_instr_i ), - .issue_instr_valid_i ( decoded_instr_valid_i ), - .issue_ack_o ( decoded_instr_ack_o ), - .issue_instr_o ( issue_instr_rename_sb ), - .issue_instr_valid_o ( issue_instr_valid_rename_sb ), - .issue_ack_i ( issue_ack_sb_rename ) + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( flush_i ), + .flush_unissied_instr_i ( flush_unissued_instr_i ), + .issue_instr_i ( decoded_instr_i ), + .issue_instr_valid_i ( decoded_instr_valid_i ), + .issue_ack_o ( decoded_instr_ack_o ), + .issue_instr_o ( issue_instr_rename_sb ), + .issue_instr_valid_o ( issue_instr_valid_rename_sb ), + .issue_ack_i ( issue_ack_sb_rename ) ); // --------------------------------------------------------- // 2. Manage instructions in a scoreboard // --------------------------------------------------------- - scoreboard #( - .NR_ENTRIES ( NR_ENTRIES ), - .NR_WB_PORTS ( NR_WB_PORTS ) + scoreboard #( + .NR_ENTRIES (NR_ENTRIES ), + .NR_WB_PORTS(NR_WB_PORTS) ) i_scoreboard ( - .unresolved_branch_i ( 1'b0 ), - .rd_clobber_o ( rd_clobber_sb_iro ), - .rs1_i ( rs1_iro_sb ), - .rs1_o ( rs1_sb_iro ), - .rs1_valid_o ( rs1_valid_sb_iro ), - .rs2_i ( rs2_iro_sb ), - .rs2_o ( rs2_sb_iro ), - .rs2_valid_o ( rs2_valid_iro_sb ), - - .decoded_instr_i ( issue_instr_rename_sb ), - .decoded_instr_valid_i ( issue_instr_valid_rename_sb ), - .decoded_instr_ack_o ( issue_ack_sb_rename ), - .issue_instr_o ( issue_instr_sb_iro ), - .issue_instr_valid_o ( issue_instr_valid_sb_iro ), - .issue_ack_i ( issue_ack_iro_sb ), - - .trans_id_i ( trans_id_i ), - .wbdata_i ( wbdata_i ), - .ex_i ( ex_ex_i ), - .* + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_unissued_instr_i ( flush_unissued_instr_i ), + .flush_i ( flush_i ), + .unresolved_branch_i ( 1'b0 ), + + .rd_clobber_o ( rd_clobber_sb_iro ), + .rs1_i ( rs1_iro_sb ), + .rs1_o ( rs1_sb_iro ), + .rs1_valid_o ( rs1_valid_sb_iro ), + .rs2_i ( rs2_iro_sb ), + .rs2_o ( rs2_sb_iro ), + .rs2_valid_o ( rs2_valid_iro_sb ), + + .commit_instr_o ( commit_instr_o ), + .commit_ack_i ( commit_ack_i ), + + .decoded_instr_i ( issue_instr_rename_sb ), + .decoded_instr_valid_i ( issue_instr_valid_rename_sb ), + .decoded_instr_ack_o ( issue_ack_sb_rename ), + + .issue_instr_o ( issue_instr_sb_iro ), + .issue_instr_valid_o ( issue_instr_valid_sb_iro ), + .issue_ack_i ( issue_ack_iro_sb ), + .resolved_branch_i ( resolved_branch_i ), + .trans_id_i ( trans_id_i ), + .wbdata_i ( wbdata_i ), + .ex_i ( ex_ex_i ), + .wb_valid_i ( wb_valid_i ) ); // --------------------------------------------------------- diff --git a/src/load_unit.sv b/src/load_unit.sv index d4fdd55991..5d299d279c 100644 --- a/src/load_unit.sv +++ b/src/load_unit.sv @@ -41,7 +41,9 @@ module load_unit ( input dcache_req_o_t req_port_i, output dcache_req_i_t req_port_o ); - enum logic [2:0] {IDLE, WAIT_GNT, SEND_TAG, WAIT_PAGE_OFFSET, ABORT_TRANSACTION, WAIT_TRANSLATION, WAIT_FLUSH} NS, CS; + enum logic [2:0] { IDLE, WAIT_GNT, SEND_TAG, WAIT_PAGE_OFFSET, + ABORT_TRANSACTION, WAIT_TRANSLATION, WAIT_FLUSH + } state_d, state_q; // in order to decouple the response interface from the request interface we need a // a queue which can hold all outstanding memory requests struct packed { @@ -72,7 +74,7 @@ module load_unit ( // --------------- always_comb begin : load_control // default assignments - NS = CS; + state_d = state_q; load_data_d = load_data_q; translation_req_o = 1'b0; req_port_o.data_req = 1'b0; @@ -83,7 +85,7 @@ module load_unit ( req_port_o.data_size = extract_transfer_size(lsu_ctrl_i.operator); pop_ld_o = 1'b0; - case (CS) + case (state_q) IDLE: begin // we've got a new load request if (valid_i) begin @@ -96,18 +98,18 @@ module load_unit ( req_port_o.data_req = 1'b1; // we got no data grant so wait for the grant before sending the tag if (!req_port_i.data_gnt) begin - NS = WAIT_GNT; + state_d = WAIT_GNT; end else begin if (dtlb_hit_i) begin // we got a grant and a hit on the DTLB so we can send the tag in the next cycle - NS = SEND_TAG; + state_d = SEND_TAG; pop_ld_o = 1'b1; end else - NS = ABORT_TRANSACTION; + state_d = ABORT_TRANSACTION; end end else begin // wait for the store buffer to train and the page offset to not match anymore - NS = WAIT_PAGE_OFFSET; + state_d = WAIT_PAGE_OFFSET; end end end @@ -116,7 +118,7 @@ module load_unit ( WAIT_PAGE_OFFSET: begin // we make a new request as soon as the page offset does not match anymore if (!page_offset_matches_i) begin - NS = WAIT_GNT; + state_d = WAIT_GNT; end end @@ -127,14 +129,14 @@ module load_unit ( req_port_o.kill_req = 1'b1; req_port_o.tag_valid = 1'b1; // redo the request by going back to the wait gnt state - NS = WAIT_TRANSLATION; + state_d = WAIT_TRANSLATION; end WAIT_TRANSLATION: begin translation_req_o = 1'b1; // we've got a hit and we can continue with the request process if (dtlb_hit_i) - NS = WAIT_GNT; + state_d = WAIT_GNT; end WAIT_GNT: begin @@ -146,17 +148,17 @@ module load_unit ( if (req_port_i.data_gnt) begin // so we send the tag in the next cycle if (dtlb_hit_i) begin - NS = SEND_TAG; + state_d = SEND_TAG; pop_ld_o = 1'b1; end else // should we not have hit on the TLB abort this transaction an retry later - NS = ABORT_TRANSACTION; + state_d = ABORT_TRANSACTION; end // otherwise we keep waiting on our grant end // we know for sure that the tag we want to send is valid SEND_TAG: begin req_port_o.tag_valid = 1'b1; - NS = IDLE; + state_d = IDLE; // we can make a new request here if we got one if (valid_i) begin // start the translation process even though we do not know if the addresses match @@ -168,19 +170,19 @@ module load_unit ( req_port_o.data_req = 1'b1; // we got no data grant so wait for the grant before sending the tag if (!req_port_i.data_gnt) begin - NS = WAIT_GNT; + state_d = WAIT_GNT; end else begin // we got a grant so we can send the tag in the next cycle if (dtlb_hit_i) begin // we got a grant and a hit on the DTLB so we can send the tag in the next cycle - NS = SEND_TAG; + state_d = SEND_TAG; pop_ld_o = 1'b1; end else // we missed on the TLB -> wait for the translation - NS = ABORT_TRANSACTION; + state_d = ABORT_TRANSACTION; end end else begin // wait for the store buffer to train and the page offset to not match anymore - NS = WAIT_PAGE_OFFSET; + state_d = WAIT_PAGE_OFFSET; end end // ---------- @@ -198,7 +200,7 @@ module load_unit ( req_port_o.kill_req = 1'b1; req_port_o.tag_valid = 1'b1; // we've killed the current request so we can go back to idle - NS = IDLE; + state_d = IDLE; end endcase @@ -206,8 +208,8 @@ module load_unit ( // we got an exception if (ex_i.valid && valid_i) begin // the next state will be the idle state - NS = IDLE; - // pop load - but only if we are not getting an rvalid in here - otherwise we will over-wright an incoming transaction + state_d = IDLE; + // pop load - but only if we are not getting an rvalid in here - otherwise we will over-write an incoming transaction if (!req_port_i.data_rvalid) pop_ld_o = 1'b1; end @@ -219,7 +221,7 @@ module load_unit ( // if we just flushed and the queue is not empty or we are getting an rvalid this cycle wait in a extra stage if (flush_i) begin - NS = WAIT_FLUSH; + state_d = WAIT_FLUSH; end end @@ -232,7 +234,7 @@ module load_unit ( // output the queue data directly, the valid signal is set corresponding to the process above trans_id_o = load_data_q.trans_id; // we got an rvalid and are currently not flushing and not aborting the request - if (req_port_i.data_rvalid && CS != WAIT_FLUSH) begin + if (req_port_i.data_rvalid && state_q != WAIT_FLUSH) begin // we killed the request if(!req_port_o.kill_req) valid_o = 1'b1; @@ -249,7 +251,7 @@ module load_unit ( valid_o = 1'b1; trans_id_o = lsu_ctrl_i.trans_id; // if we are waiting for the translation to finish do not give a valid signal yet - end else if (CS == WAIT_TRANSLATION) begin + end else if (state_q == WAIT_TRANSLATION) begin valid_o = 1'b0; end @@ -259,53 +261,17 @@ module load_unit ( // latch physical address for the tag cycle (one cycle after applying the index) always_ff @(posedge clk_i or negedge rst_ni) begin if (~rst_ni) begin - CS <= IDLE; + state_q <= IDLE; load_data_q <= '0; end else begin - CS <= NS; + state_q <= state_d; load_data_q <= load_data_d; end end - // --------------- - // AMO Operation - // --------------- - always_comb begin : amo_op_select - req_port_o.amo_op = AMO_NONE; - - if (lsu_ctrl_i.valid) begin - case (lsu_ctrl_i.operator) - AMO_LRW: req_port_o.amo_op = AMO_LR; - AMO_LRD: req_port_o.amo_op = AMO_LR; - AMO_SCW: req_port_o.amo_op = AMO_SC; - AMO_SCD: req_port_o.amo_op = AMO_SC; - AMO_SWAPW: req_port_o.amo_op = AMO_SWAP; - AMO_ADDW: req_port_o.amo_op = AMO_ADD; - AMO_ANDW: req_port_o.amo_op = AMO_AND; - AMO_ORW: req_port_o.amo_op = AMO_OR; - AMO_XORW: req_port_o.amo_op = AMO_XOR; - AMO_MAXW: req_port_o.amo_op = AMO_MAX; - AMO_MAXWU: req_port_o.amo_op = AMO_MAXU; - AMO_MINW: req_port_o.amo_op = AMO_MIN; - AMO_MINWU: req_port_o.amo_op = AMO_MINU; - AMO_SWAPD: req_port_o.amo_op = AMO_SWAP; - AMO_ADDD: req_port_o.amo_op = AMO_ADD; - AMO_ANDD: req_port_o.amo_op = AMO_AND; - AMO_ORD: req_port_o.amo_op = AMO_OR; - AMO_XORD: req_port_o.amo_op = AMO_XOR; - AMO_MAXD: req_port_o.amo_op = AMO_MAX; - AMO_MAXDU: req_port_o.amo_op = AMO_MAXU; - AMO_MIND: req_port_o.amo_op = AMO_MIN; - AMO_MINDU: req_port_o.amo_op = AMO_MINU; - default: req_port_o.amo_op = AMO_NONE; - endcase - end - end - // --------------- // Sign Extend // --------------- - logic [63:0] shifted_data; // realign as needed @@ -362,7 +328,9 @@ module load_unit ( // result mux always_comb begin unique case (load_data_q.operator) - LW, LWU: result_o = {{32{sign_bit}}, shifted_data[31:0]}; + LW, LWU: begin + result_o = {{32{sign_bit}}, shifted_data[31:0]}; + end LH, LHU: result_o = {{48{sign_bit}}, shifted_data[15:0]}; LB, LBU: result_o = {{56{sign_bit}}, shifted_data[7:0]}; default: result_o = shifted_data; @@ -380,7 +348,7 @@ module load_unit ( // end always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs - if(~rst_ni) begin + if (~rst_ni) begin idx_q <= 0; signed_q <= 0; fp_sign_q <= 0; diff --git a/src/lsu.sv b/src/lsu.sv index 2250c6b41b..ef2d8fbfed 100644 --- a/src/lsu.sv +++ b/src/lsu.sv @@ -15,12 +15,13 @@ import ariane_pkg::*; module lsu #( - parameter int unsigned ASID_WIDTH = 1 + parameter int unsigned ASID_WIDTH = 1 )( input logic clk_i, input logic rst_ni, input logic flush_i, output logic no_st_pending_o, + input logic amo_valid_commit_i, input fu_t fu_i, input fu_op operator_i, @@ -57,7 +58,9 @@ module lsu #( // interface to dcache input dcache_req_o_t [2:0] dcache_req_ports_i, output dcache_req_i_t [2:0] dcache_req_ports_o, - + // AMO interface + output amo_req_t amo_req_o, + input amo_resp_t amo_resp_i, output exception_t lsu_exception_o // to WB, signal exception status LD/ST exception ); @@ -73,6 +76,7 @@ module lsu #( logic pop_st; logic pop_ld; + // ------------------------------ // Address Generation Unit (AGU) // ------------------------------ // virtual address as calculated by the AGU in the first cycle @@ -108,28 +112,6 @@ module lsu #( exception_t ld_ex; exception_t st_ex; - // ------------ - // NB Dcache - // ------------ - logic [2:0][11:0] address_index_i; - logic [2:0][43:0] address_tag_i; - logic [2:0][63:0] data_wdata_i; - logic [2:0] data_req_i; - logic [2:0] data_we_i; - logic [2:0][1:0] data_size_i; - - logic [2:0] kill_req_i; - logic [2:0] tag_valid_i; - logic [2:0][7:0] data_be_i; - logic [2:0] data_gnt_o; - logic [2:0] data_rvalid_o; - logic [2:0][63:0] data_rdata_o; - amo_t [2:0] amo_op_i; - - // AMO operations always go through the load unit - assign amo_op_i[0] = AMO_NONE; - assign amo_op_i[2] = AMO_NONE; - // ------------------- // MMU e.g.: TLBs/PTW // ------------------- @@ -147,7 +129,7 @@ module lsu #( .lsu_paddr_o ( mmu_paddr ), .lsu_exception_o ( mmu_exception ), .lsu_dtlb_hit_o ( dtlb_hit ), // send in the same cycle as the request - // connecting PTW to D$ IF (aka mem arbiter + // connecting PTW to D$ IF .req_port_i ( dcache_req_ports_i [0] ), .req_port_o ( dcache_req_ports_o [0] ), // icache address translation requests @@ -159,9 +141,17 @@ module lsu #( // Store Unit // ------------------ store_unit i_store_unit ( + .clk_i, + .rst_ni, + .flush_i, + .no_st_pending_o, + .valid_i ( st_valid_i ), .lsu_ctrl_i ( lsu_ctrl ), .pop_st_o ( pop_st ), + .commit_i, + .commit_ready_o, + .amo_valid_commit_i, .valid_o ( st_valid ), .trans_id_o ( st_trans_id ), @@ -176,10 +166,12 @@ module lsu #( // Load Unit .page_offset_i ( page_offset ), .page_offset_matches_o ( page_offset_matches ), + // AMOs + .amo_req_o, + .amo_resp_i, // to memory arbiter .req_port_i ( dcache_req_ports_i [2] ), - .req_port_o ( dcache_req_ports_o [2] ), - .* + .req_port_o ( dcache_req_ports_o [2] ) ); // ------------------ @@ -264,49 +256,10 @@ module lsu #( // --------------- // Byte Enable // --------------- - always_comb begin : byte_enable - be_i = 8'b0; - // we can generate the byte enable from the virtual address since the last - // 12 bit are the same anyway - // and we can always generate the byte enable from the address at hand - case (operator_i) - LD, SD: // double word - be_i = 8'b1111_1111; - LW, LWU, SW: // word - case (vaddr_i[2:0]) - 3'b000: be_i = 8'b0000_1111; - 3'b001: be_i = 8'b0001_1110; - 3'b010: be_i = 8'b0011_1100; - 3'b011: be_i = 8'b0111_1000; - 3'b100: be_i = 8'b1111_0000; - default:; - endcase - LH, LHU, SH: // half word - case (vaddr_i[2:0]) - 3'b000: be_i = 8'b0000_0011; - 3'b001: be_i = 8'b0000_0110; - 3'b010: be_i = 8'b0000_1100; - 3'b011: be_i = 8'b0001_1000; - 3'b100: be_i = 8'b0011_0000; - 3'b101: be_i = 8'b0110_0000; - 3'b110: be_i = 8'b1100_0000; - default:; - endcase - LB, LBU, SB: // byte - case (vaddr_i[2:0]) - 3'b000: be_i = 8'b0000_0001; - 3'b001: be_i = 8'b0000_0010; - 3'b010: be_i = 8'b0000_0100; - 3'b011: be_i = 8'b0000_1000; - 3'b100: be_i = 8'b0001_0000; - 3'b101: be_i = 8'b0010_0000; - 3'b110: be_i = 8'b0100_0000; - 3'b111: be_i = 8'b1000_0000; - endcase - default: - be_i = 8'b0; - endcase - end + // we can generate the byte enable from the virtual address since the last + // 12 bit are the same anyway + // and we can always generate the byte enable from the address at hand + assign be_i = be_gen(vaddr_i[2:0], extract_transfer_size(operator_i)); // ------------------------ // Misaligned Exception @@ -324,23 +277,33 @@ module lsu #( data_misaligned = 1'b0; - if(lsu_ctrl.valid) begin + if (lsu_ctrl.valid) begin case (lsu_ctrl.operator) // double word - LD, SD: begin - if (lsu_ctrl.vaddr[2:0] != 3'b000) + LD, SD, + AMO_LRD, AMO_SCD, + AMO_SWAPD, AMO_ADDD, AMO_ANDD, AMO_ORD, + AMO_XORD, AMO_MAXD, AMO_MAXDU, AMO_MIND, + AMO_MINDU: begin + if (lsu_ctrl.vaddr[2:0] != 3'b000) begin data_misaligned = 1'b1; + end end // word - LW, LWU, SW: begin - if (lsu_ctrl.vaddr[1:0] != 2'b00) + LW, LWU, SW, + AMO_LRW, AMO_SCW, + AMO_SWAPW, AMO_ADDW, AMO_ANDW, AMO_ORW, + AMO_XORW, AMO_MAXW, AMO_MAXWU, AMO_MINW, + AMO_MINWU: begin + if (lsu_ctrl.vaddr[1:0] != 2'b00) begin data_misaligned = 1'b1; + end end - // half word LH, LHU, SH: begin - if (lsu_ctrl.vaddr[0] != 1'b0) + if (lsu_ctrl.vaddr[0] != 1'b0) begin data_misaligned = 1'b1; + end end // byte -> is always aligned default:; @@ -403,15 +366,6 @@ module lsu #( .ready_o ( lsu_ready_o ), .* ); - // ------------ - // Assertions - // ------------ - - `ifndef SYNTHESIS - `ifndef VERILATOR - // TODO - `endif - `endif endmodule // ------------------ @@ -504,7 +458,7 @@ module lsu_bypass ( // registers always_ff @(posedge clk_i or negedge rst_ni) begin - if(~rst_ni) begin + if (~rst_ni) begin mem_q <= '{default: 0}; status_cnt_q <= '0; write_pointer_q <= '0; diff --git a/src/mult.sv b/src/mult.sv index 4aa711e492..2cbb5fe5c5 100644 --- a/src/mult.sv +++ b/src/mult.sv @@ -437,14 +437,15 @@ module mul ( // Pipeline register logic [TRANS_ID_BITS-1:0] trans_id_q; logic mult_valid_q; - logic [63:0] result_q; + fu_op operator_d, operator_q; + logic [127:0] mult_result_d, mult_result_q; + // control registers logic sign_a, sign_b; logic mult_valid; // control signals assign mult_valid_o = mult_valid_q; - assign result_o = result_q; assign mult_trans_id_o = trans_id_q; assign mult_ready_o = 1'b1; @@ -472,28 +473,38 @@ module mul ( end end + + // single stage version + assign mult_result_d = $signed({operand_a_i[63] & sign_a, operand_a_i}) * + $signed({operand_b_i[63] & sign_b, operand_b_i}); + + + assign operator_d = operator_i; + always_comb begin : p_selmux + unique case (operator_q) + MULH, MULHU, MULHSU: result_o = mult_result_q[127:64]; + MULW: result_o = sext32(mult_result_q[31:0]); + // MUL performs an XLEN-bit×XLEN-bit multiplication and places the lower XLEN bits in the destination register + default: result_o = mult_result_q[63:0];// including MUL + endcase + end + // ----------------------- // Output pipeline register // ----------------------- always_ff @(posedge clk_i or negedge rst_ni) begin if (~rst_ni) begin - mult_valid_q <= '0; - trans_id_q <= '0; - result_q <= '0; - end else begin + mult_valid_q <= '0; + trans_id_q <= '0; + operator_q <= MUL; + mult_result_q <= '0; + end else begin // Input silencing trans_id_q <= trans_id_i; // Output Register - mult_valid_q <= mult_valid; - - case (operator_i) - // MUL performs an XLEN-bit×XLEN-bit multiplication and places the lower XLEN bits in the destination register - MUL: result_q <= mult_result[63:0]; - MULH: result_q <= mult_result[127:64]; - MULHU: result_q <= mult_result[127:64]; - MULHSU: result_q <= mult_result[127:64]; - MULW: result_q <= sext32(mult_result[31:0]); - endcase - end + mult_valid_q <= mult_valid; + operator_q <= operator_d; + mult_result_q <= mult_result_d; + end end endmodule diff --git a/src/ptw.sv b/src/ptw.sv index d552f4092e..89a04e2cac 100644 --- a/src/ptw.sv +++ b/src/ptw.sv @@ -31,7 +31,7 @@ module ptw #( input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores input logic lsu_is_store_i, // this translation was triggered by a store - // PTW memory interface + // PTW memory interface input dcache_req_o_t req_port_i, output dcache_req_i_t req_port_o, @@ -61,8 +61,6 @@ module ptw #( ); - assign req_port_o.amo_op = AMO_NONE; - // input registers logic data_rvalid_q; logic [63:0] data_rdata_q; @@ -165,10 +163,10 @@ module ptw #( ptw_pptr_n = ptw_pptr_q; state_d = state_q; global_mapping_n = global_mapping_q; - // input registers + // input registers tlb_update_asid_n = tlb_update_asid_q; vaddr_n = vaddr_q; - + itlb_miss_o = 1'b0; dtlb_miss_o = 1'b0; diff --git a/src/re_name.sv b/src/re_name.sv index f61dfa8ef3..93def90f99 100644 --- a/src/re_name.sv +++ b/src/re_name.sv @@ -24,6 +24,7 @@ module re_name ( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low input logic flush_i, // Flush renaming state + input logic flush_unissied_instr_i, // from/to scoreboard input scoreboard_entry_t issue_instr_i, input logic issue_instr_valid_i, @@ -52,7 +53,7 @@ module re_name ( re_name_table_gpr_n = re_name_table_gpr_q; issue_instr_o = issue_instr_i; - if (issue_ack_i) begin + if (issue_ack_i && !flush_unissied_instr_i) begin // if we acknowledge the instruction tic the corresponding destination register re_name_table_gpr_n[issue_instr_i.rd] = re_name_table_gpr_q[issue_instr_i.rd] ^ 1'b1; end diff --git a/src/scoreboard.sv b/src/scoreboard.sv index ec7f6af7f7..5ed3e587ee 100644 --- a/src/scoreboard.sv +++ b/src/scoreboard.sv @@ -18,8 +18,7 @@ module scoreboard #( parameter int unsigned NR_ENTRIES = 8, parameter int unsigned NR_WB_PORTS = 1, parameter int unsigned NR_COMMIT_PORTS = 2 - ) - ( +)( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low input logic flush_unissued_instr_i, // flush only un-issued instructions @@ -53,6 +52,7 @@ module scoreboard #( input logic issue_ack_i, // write-back port + input branchpredict_t resolved_branch_i, input logic [NR_WB_PORTS-1:0][TRANS_ID_BITS-1:0] trans_id_i, // transaction ID at which to write the result back input logic [NR_WB_PORTS-1:0][63:0] wbdata_i, // write data in input exception_t [NR_WB_PORTS-1:0] ex_i, // exception from a functional unit (e.g.: ld/st exception) @@ -124,10 +124,13 @@ module scoreboard #( mem_n[trans_id_i[i]].sbe.valid = 1'b1; mem_n[trans_id_i[i]].sbe.result = wbdata_i[i]; // save the target address of a branch (needed for debug in commit stage) - mem_n[trans_id_i[i]].sbe.bp.predict_address = resolved_branch_i.target_address; + if (resolved_branch_i.valid) begin + mem_n[trans_id_i[i]].sbe.bp.predict_address = resolved_branch_i.target_address; + end // write the exception back if it is valid - if (ex_i[i].valid) + if (ex_i[i].valid) begin mem_n[trans_id_i[i]].sbe.ex = ex_i[i]; + end end end diff --git a/src/store_buffer.sv b/src/store_buffer.sv index 26f4005cb3..6b07536f8b 100644 --- a/src/store_buffer.sv +++ b/src/store_buffer.sv @@ -40,7 +40,7 @@ module store_buffer ( // D$ interface input dcache_req_o_t req_port_i, - output dcache_req_i_t req_port_o + output dcache_req_i_t req_port_o ); // depth of store-buffers localparam int unsigned DEPTH_SPEC = 4; @@ -71,10 +71,6 @@ module store_buffer ( logic [$clog2(DEPTH_COMMIT)-1:0] commit_read_pointer_n, commit_read_pointer_q; logic [$clog2(DEPTH_COMMIT)-1:0] commit_write_pointer_n, commit_write_pointer_q; - - - assign req_port_o.amo_op = AMO_NONE; - // ---------------------------------------- // Speculative Queue - Core Interface // ---------------------------------------- @@ -257,6 +253,10 @@ module store_buffer ( @(posedge clk_i) rst_ni && (speculative_status_cnt_q == DEPTH_SPEC) |-> !valid_i) else $error ("[Speculative Queue] You are trying to push new data although the buffer is not ready"); + speculative_buffer_underflow: assert property ( + @(posedge clk_i) rst_ni && (speculative_status_cnt_q == 0) |-> !commit_i) + else $error ("[Speculative Queue] You are committing although there are no stores to commit"); + commit_buffer_overflow: assert property ( @(posedge clk_i) rst_ni && (commit_status_cnt_q == DEPTH_SPEC) |-> !commit_i) else $error("[Commit Queue] You are trying to commit a store although the buffer is full"); diff --git a/src/store_unit.sv b/src/store_unit.sv index a7a3ea5905..7c2c8a3634 100644 --- a/src/store_unit.sv +++ b/src/store_unit.sv @@ -10,7 +10,7 @@ // // Author: Florian Zaruba, ETH Zurich // Date: 22.05.2017 -// Description: Store Unit, takes care of all store requests +// Description: Store Unit, takes care of all store requests and atomic memory operations (AMOs) import ariane_pkg::*; @@ -25,7 +25,7 @@ module store_unit ( output logic pop_st_o, input logic commit_i, output logic commit_ready_o, - + input logic amo_valid_commit_i, // store unit output port output logic valid_o, output logic [TRANS_ID_BITS-1:0] trans_id_o, @@ -41,22 +41,33 @@ module store_unit ( input logic [11:0] page_offset_i, output logic page_offset_matches_o, // D$ interface + output amo_req_t amo_req_o, + input amo_resp_t amo_resp_i, input dcache_req_o_t req_port_i, - output dcache_req_i_t req_port_o + output dcache_req_i_t req_port_o ); + // it doesn't matter what we are writing back as stores don't return anything assign result_o = 64'b0; - enum logic [1:0] {IDLE, VALID_STORE, WAIT_TRANSLATION, WAIT_STORE_READY} NS, CS; + enum logic [1:0] { + IDLE, + VALID_STORE, + WAIT_TRANSLATION, + WAIT_STORE_READY + } state_d, state_q; // store buffer control signals - logic st_ready; - logic st_valid; - logic st_valid_without_flush; - + logic st_ready; + logic st_valid; + logic st_valid_without_flush; + logic instr_is_amo; + assign instr_is_amo = is_amo(lsu_ctrl_i.operator); // keep the data and the byte enable for the second cycle (after address translation) - logic [63:0] st_data_n, st_data_q; - logic [7:0] st_be_n, st_be_q; - logic [1:0] st_data_size_n, st_data_size_q; + logic [63:0] st_data_n, st_data_q; + logic [7:0] st_be_n, st_be_q; + logic [1:0] st_data_size_n, st_data_size_q; + amo_t amo_op_d, amo_op_q; + logic [TRANS_ID_BITS-1:0] trans_id_n, trans_id_q; // output assignments @@ -71,25 +82,24 @@ module store_unit ( pop_st_o = 1'b0; ex_o = ex_i; trans_id_n = lsu_ctrl_i.trans_id; - NS = CS; + state_d = state_q; - case (CS) + case (state_q) // we got a valid store IDLE: begin if (valid_i) begin - - NS = VALID_STORE; + state_d = VALID_STORE; translation_req_o = 1'b1; pop_st_o = 1'b1; // check if translation was valid and we have space in the store buffer // otherwise simply stall if (!dtlb_hit_i) begin - NS = WAIT_TRANSLATION; + state_d = WAIT_TRANSLATION; pop_st_o = 1'b0; end if (!st_ready) begin - NS = WAIT_STORE_READY; + state_d = WAIT_STORE_READY; pop_st_o = 1'b0; end end @@ -103,25 +113,25 @@ module store_unit ( st_valid_without_flush = 1'b1; - // we have another request - if (valid_i) begin + // we have another request and its not an AMO (the AMO buffer only has depth 1) + if (valid_i && !instr_is_amo) begin translation_req_o = 1'b1; - NS = VALID_STORE; - pop_st_o = 1'b1; + state_d = VALID_STORE; + pop_st_o = 1'b1; if (!dtlb_hit_i) begin - NS = WAIT_TRANSLATION; + state_d = WAIT_TRANSLATION; pop_st_o = 1'b0; end if (!st_ready) begin + state_d = WAIT_STORE_READY; pop_st_o = 1'b0; - NS = WAIT_STORE_READY; end // if we do not have another request go back to idle end else begin - NS = IDLE; + state_d = IDLE; end end @@ -131,7 +141,7 @@ module store_unit ( translation_req_o = 1'b1; if (st_ready && dtlb_hit_i) begin - NS = IDLE; + state_d = IDLE; end end @@ -142,7 +152,7 @@ module store_unit ( translation_req_o = 1'b1; if (dtlb_hit_i) begin - NS = IDLE; + state_d = IDLE; end end endcase @@ -151,16 +161,16 @@ module store_unit ( // Access Exception // ----------------- // we got an address translation exception (access rights, misaligned or page fault) - if (ex_i.valid && (CS != IDLE)) begin + if (ex_i.valid && (state_q != IDLE)) begin // the only difference is that we do not want to store this request pop_st_o = 1'b1; st_valid = 1'b0; - NS = IDLE; + state_d = IDLE; valid_o = 1'b1; end if (flush_i) - NS = IDLE; + state_d = IDLE; end // ----------- @@ -168,57 +178,99 @@ module store_unit ( // ----------- // re-align the write data to comply with the address offset always_comb begin - st_be_n = lsu_ctrl_i.be; - st_data_n = lsu_ctrl_i.data; + st_be_n = lsu_ctrl_i.be; + // don't shift the data if we are going to perform an AMO as we still need to operate on this data + st_data_n = instr_is_amo ? lsu_ctrl_i.data + : data_align(lsu_ctrl_i.vaddr[2:0], lsu_ctrl_i.data); st_data_size_n = extract_transfer_size(lsu_ctrl_i.operator); - - case (lsu_ctrl_i.vaddr[2:0]) - 3'b000: st_data_n = lsu_ctrl_i.data; - 3'b001: st_data_n = {lsu_ctrl_i.data[55:0], lsu_ctrl_i.data[63:56]}; - 3'b010: st_data_n = {lsu_ctrl_i.data[47:0], lsu_ctrl_i.data[63:48]}; - 3'b011: st_data_n = {lsu_ctrl_i.data[39:0], lsu_ctrl_i.data[63:40]}; - 3'b100: st_data_n = {lsu_ctrl_i.data[31:0], lsu_ctrl_i.data[63:32]}; - 3'b101: st_data_n = {lsu_ctrl_i.data[23:0], lsu_ctrl_i.data[63:24]}; - 3'b110: st_data_n = {lsu_ctrl_i.data[15:0], lsu_ctrl_i.data[63:16]}; - 3'b111: st_data_n = {lsu_ctrl_i.data[7:0], lsu_ctrl_i.data[63:8]}; + // save AMO op for next cycle + case (lsu_ctrl_i.operator) + AMO_LRW, AMO_LRD: amo_op_d = AMO_LR; + AMO_SCW, AMO_SCD: amo_op_d = AMO_SC; + AMO_SWAPW, AMO_SWAPD: amo_op_d = AMO_SWAP; + AMO_ADDW, AMO_ADDD: amo_op_d = AMO_ADD; + AMO_ANDW, AMO_ANDD: amo_op_d = AMO_AND; + AMO_ORW, AMO_ORD: amo_op_d = AMO_OR; + AMO_XORW, AMO_XORD: amo_op_d = AMO_XOR; + AMO_MAXW, AMO_MAXD: amo_op_d = AMO_MAX; + AMO_MAXWU, AMO_MAXDU: amo_op_d = AMO_MAXU; + AMO_MINW, AMO_MIND: amo_op_d = AMO_MIN; + AMO_MINWU, AMO_MINDU: amo_op_d = AMO_MINU; + default: amo_op_d = AMO_NONE; endcase end + + logic store_buffer_valid, amo_buffer_valid; + logic store_buffer_ready, amo_buffer_ready; + + // multiplex between store unit and amo buffer + assign store_buffer_valid = st_valid & (amo_op_q == AMO_NONE); + assign amo_buffer_valid = st_valid & (amo_op_q != AMO_NONE); + + assign st_ready = store_buffer_ready & amo_buffer_ready; + // --------------- // Store Queue // --------------- store_buffer store_buffer_i ( - // store queue write port - .valid_i ( st_valid ), - .valid_without_flush_i ( st_valid_without_flush ), // the flush signal can be critical and we need this valid - // signal to check whether the page_offset matches or not, functionaly it doesn't - // make a difference whether we use the correct valid signal or not as we are flushing the whole pipeline anyway + .clk_i, + .rst_ni, + .flush_i, + .no_st_pending_o, + .page_offset_i, + .page_offset_matches_o, + .commit_i, + .commit_ready_o, + .ready_o ( store_buffer_ready ), + .valid_i ( store_buffer_valid ), + // the flush signal can be critical and we need this valid + // signal to check whether the page_offset matches or not, + // functionaly it doesn't make a difference whether we use + // the correct valid signal or not as we are flushing + // the whole pipeline anyway + .valid_without_flush_i ( st_valid_without_flush ), + .paddr_i, .data_i ( st_data_q ), .be_i ( st_be_q ), .data_size_i ( st_data_size_q ), - // store buffer out - .ready_o ( st_ready ), - .req_port_i ( req_port_i ), - .req_port_o ( req_port_o ), + .req_port_o ( req_port_o ) + ); - .* + amo_buffer i_amo_buffer ( + .clk_i, + .rst_ni, + .flush_i, + .valid_i ( amo_buffer_valid ), + .ready_o ( amo_buffer_ready ), + .paddr_i ( paddr_i ), + .amo_op_i ( amo_op_q ), + .data_i ( st_data_q ), + .data_size_i ( st_data_size_q ), + .amo_req_o ( amo_req_o ), + .amo_resp_i ( amo_resp_i ), + .amo_valid_commit_i ( amo_valid_commit_i ), + .no_st_pending_i ( no_st_pending_o ) ); + // --------------- // Registers // --------------- always_ff @(posedge clk_i or negedge rst_ni) begin - if(~rst_ni) begin - CS <= IDLE; + if (~rst_ni) begin + state_q <= IDLE; st_be_q <= '0; st_data_q <= '0; st_data_size_q <= '0; trans_id_q <= '0; + amo_op_q <= AMO_NONE; end else begin - CS <= NS; + state_q <= state_d; st_be_q <= st_be_n; st_data_q <= st_data_n; trans_id_q <= trans_id_n; st_data_size_q <= st_data_size_n; + amo_op_q <= amo_op_d; end end diff --git a/src/util/instruction_trace_item.svh b/src/util/instruction_trace_item.svh index e70d9d064a..144f275dd8 100644 --- a/src/util/instruction_trace_item.svh +++ b/src/util/instruction_trace_item.svh @@ -50,17 +50,17 @@ class instruction_trace_item; endfunction // convert register address to ABI compatible form function string regAddrToStr(logic [5:0] addr); - case (addr) + case (addr[4:0]) 0: return "x0"; 1: return "ra"; 2: return "sp"; 3: return "gp"; 4: return "tp"; - 5, 6, 7: return $sformatf("t%0d", (addr - 5)); - 8, 9: return $sformatf("s%0d", (addr - 8)); - 10, 11, 12, 13, 14, 15, 16, 17: return $sformatf("a%0d", (addr - 10)); - 28, 29, 30, 31: return $sformatf("t%0d", (addr - 25)); - default: return $sformatf("s%0d", (addr - 16)); + 5, 6, 7: return $sformatf("t%0d", (addr[4:0] - 5)); + 8, 9: return $sformatf("s%0d", (addr[4:0] - 8)); + 10, 11, 12, 13, 14, 15, 16, 17: return $sformatf("a%0d", (addr[4:0] - 10)); + 28, 29, 30, 31: return $sformatf("t%0d", (addr[4:0] - 25)); + default: return $sformatf("s%0d", (addr[4:0] - 16)); endcase endfunction @@ -203,6 +203,7 @@ class instruction_trace_item; // loads and stores INSTR_LOAD: s = this.printLoadInstr(); INSTR_STORE: s = this.printStoreInstr(); + INSTR_AMO: s = this.printAMOInstr(); default: s = this.printMnemonic("INVALID"); endcase @@ -226,11 +227,11 @@ class instruction_trace_item; s = $sformatf("%s %-4s:%16x", s, regAddrToStr(result_regs[i]), this.result); end - foreach (read_regs[i]) begin if (read_regs[i] != 0) s = $sformatf("%s %-4s:%16x", s, regAddrToStr(read_regs[i]), reg_file[read_regs[i]]); end + casex (instr) // check of the instrction was a load or store INSTR_STORE: begin @@ -363,29 +364,27 @@ class instruction_trace_item; function string printLoadInstr(); string mnemonic; - - case (instr[14:12]) - 3'b000: mnemonic = "lb"; - 3'b001: mnemonic = "lh"; - 3'b010: mnemonic = "lw"; - 3'b100: mnemonic = "lbu"; - 3'b101: mnemonic = "lhu"; - 3'b110: mnemonic = "lwu"; - 3'b011: mnemonic = "ld"; - default: return printMnemonic("INVALID"); - endcase - - result_regs.push_back(sbe.rd); - read_regs.push_back(sbe.rs1); - // save the immediate for calculating the virtual address - this.imm = sbe.result; - - return $sformatf("%-16s %s, %0d(%s)", mnemonic, regAddrToStr(sbe.rd), $signed(sbe.result), regAddrToStr(sbe.rs1)); + case (instr[14:12]) + 3'b000: mnemonic = "lb"; + 3'b001: mnemonic = "lh"; + 3'b010: mnemonic = "lw"; + 3'b100: mnemonic = "lbu"; + 3'b101: mnemonic = "lhu"; + 3'b110: mnemonic = "lwu"; + 3'b011: mnemonic = "ld"; + default: return printMnemonic("INVALID"); + endcase + + result_regs.push_back(sbe.rd); + read_regs.push_back(sbe.rs1); + // save the immediate for calculating the virtual address + this.imm = sbe.result; + + return $sformatf("%-16s %s, %0d(%s)", mnemonic, regAddrToStr(sbe.rd), $signed(sbe.result), regAddrToStr(sbe.rs1)); endfunction function string printStoreInstr(); - string mnemonic; - + string mnemonic; case (instr[14:12]) 3'b000: mnemonic = "sb"; 3'b001: mnemonic = "sh"; @@ -402,6 +401,51 @@ class instruction_trace_item; return $sformatf("%-16s %s, %0d(%s)", mnemonic, regAddrToStr(sbe.rs2), $signed(sbe.result), regAddrToStr(sbe.rs1)); endfunction // printSInstr + function string printAMOInstr(); + string mnemonic; + // words + if (instr[14:12] == 3'h2) begin + case (instr[31:27]) + 5'h0: mnemonic = "amoadd.w"; + 5'h1: mnemonic = "amoswap.w"; + 5'h2: mnemonic = "lr.w"; + 5'h3: mnemonic = "sc.w"; + 5'h4: mnemonic = "amoxor.w"; + 5'h8: mnemonic = "amoor.w"; + 5'hC: mnemonic = "amoand.w"; + 5'h10: mnemonic = "amomin.w"; + 5'h14: mnemonic = "amomax.w"; + 5'h18: mnemonic = "amominu.w"; + 5'h1C: mnemonic = "amomax.w"; + default: return printMnemonic("INVALID"); + endcase + // doubles + end else if (instr[14:12] == 3'h3) begin + case (instr[31:27]) + 5'h0: mnemonic = "amoadd.d"; + 5'h1: mnemonic = "amoswap.d"; + 5'h2: mnemonic = "lr.d"; + 5'h3: mnemonic = "sc.d"; + 5'h4: mnemonic = "amoxor.d"; + 5'h8: mnemonic = "amoor.d"; + 5'hC: mnemonic = "amoand.d"; + 5'h10: mnemonic = "amomin.d"; + 5'h14: mnemonic = "amomax.d"; + 5'h18: mnemonic = "amominu.d"; + 5'h1C: mnemonic = "amomax.d"; + default: return printMnemonic("INVALID"); + endcase + end else return printMnemonic("INVALID"); + + result_regs.push_back(sbe.rd); + read_regs.push_back(sbe.rs2); + read_regs.push_back(sbe.rs1); + // save the immediate for calculating the virtual address + this.imm = 0; + + return $sformatf("%-16s %s, %s,(%s)", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs2), regAddrToStr(sbe.rs1)); + endfunction + function string printMulInstr(logic is_op32); string s = ""; diff --git a/src/util/instruction_tracer.svh b/src/util/instruction_tracer.svh index 0c66770be7..2b96043c4a 100644 --- a/src/util/instruction_tracer.svh +++ b/src/util/instruction_tracer.svh @@ -29,7 +29,7 @@ class instruction_tracer; logic [63:0] reg_file [32]; // 64 bit clock tick count longint unsigned clk_ticks; - int f; + int f, commit_log; // address mapping // contains mappings of the form vaddr <-> paddr // should it print the instructions to the console @@ -47,11 +47,13 @@ class instruction_tracer; endfunction : new function void create_file(logic [5:0] cluster_id, logic [3:0] core_id); - string fn; + string fn, fn_commit_log; $sformat(fn, "trace_core_%h_%h.log", cluster_id, core_id); + $sformat(fn_commit_log, "trace_core_%h_%h_commit.log", cluster_id, core_id); $display("[TRACER] Output filename is: %s", fn); this.f = $fopen(fn,"w"); + if (ENABLE_SPIKE_COMMIT_LOG) this.commit_log = $fopen(fn_commit_log, "w"); endfunction : create_file task trace(); @@ -125,8 +127,9 @@ class instruction_tracer; // as the most recent version of this register will be there. if (tracer_if.pck.we[i]) begin printInstr(issue_sbe, issue_commit_instruction, tracer_if.pck.wdata[i], address_mapping, tracer_if.pck.priv_lvl, tracer_if.pck.debug_mode, bp_instruction); - end else + end else begin printInstr(issue_sbe, issue_commit_instruction, reg_file[commit_instruction.rd], address_mapping, tracer_if.pck.priv_lvl, tracer_if.pck.debug_mode, bp_instruction); + end end end // -------------- @@ -140,10 +143,11 @@ class instruction_tracer; // Commit Registers // ---------------------- // update shadow reg file here - for (int i = 0; i < 2; i++) + for (int i = 0; i < 2; i++) begin if (tracer_if.pck.we[i] && tracer_if.pck.waddr[i] != 5'b0) begin reg_file[tracer_if.pck.waddr[i]] = tracer_if.pck.wdata[i]; end + end // -------------- // Flush Signals @@ -181,6 +185,9 @@ class instruction_tracer; instruction_trace_item iti = new ($time, clk_ticks, sbe, instr, this.reg_file, result, paddr, priv_lvl, debug_mode, bp); // print instruction to console string print_instr = iti.printInstr(); + if (ENABLE_SPIKE_COMMIT_LOG && !debug_mode) begin + $fwrite(this.commit_log, riscv::spikeCommitLog(sbe.pc, priv_lvl, instr, sbe.rd, result)); + end uvm_report_info( "Tracer", print_instr, UVM_HIGH); $fwrite(this.f, {print_instr, "\n"}); endfunction @@ -193,8 +200,8 @@ class instruction_tracer; endfunction function void close(); - if (f) - $fclose(this.f); + if (f) $fclose(this.f); + if (ENABLE_SPIKE_COMMIT_LOG && this.commit_log) $fclose(this.commit_log); endfunction endclass : instruction_tracer diff --git a/src/util/instruction_tracer_defines.svh b/src/util/instruction_tracer_defines.svh index e79d34e85a..4b9756e45c 100644 --- a/src/util/instruction_tracer_defines.svh +++ b/src/util/instruction_tracer_defines.svh @@ -101,6 +101,9 @@ parameter INSTR_DIVU = { 7'b0000001, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp } parameter INSTR_REM = { 7'b0000001, 10'b?, 3'b110, 5'b?, riscv::OpcodeOp }; parameter INSTR_REMU = { 7'b0000001, 10'b?, 3'b111, 5'b?, riscv::OpcodeOp }; +// A +parameter INSTR_AMO = {25'b?, riscv::OpcodeAmo }; + // Load/Stores parameter INSTR_LOAD = {25'b?, riscv::OpcodeLoad }; parameter INSTR_STORE = {25'b?, riscv::OpcodeStore }; diff --git a/src/util/instruction_tracer_if.sv b/src/util/instruction_tracer_if.sv index 01f6ff07a9..86e8007e2a 100644 --- a/src/util/instruction_tracer_if.sv +++ b/src/util/instruction_tracer_if.sv @@ -36,7 +36,6 @@ interface instruction_tracer_if ( // commit stage scoreboard_entry_t [1:0] commit_instr; // commit instruction logic [1:0] commit_ack; - // address translation // stores logic st_valid; diff --git a/src_files.yml b/src_files.yml index ef2dca9a6b..7e6ee93705 100644 --- a/src_files.yml +++ b/src_files.yml @@ -20,8 +20,11 @@ ariane: src/csr_regfile.sv, src/decoder.sv, src/ex_stage.sv, - src/fetch_fifo.sv, - src/frontend.sv, + src/frontend/btb.sv, + src/frontend/bht.sv, + src/frontend/ras.sv, + src/frontend/instr_scan.sv, + src/frontend/frontend.sv, src/icache.sv, src/id_stage.sv, src/instr_realigner.sv, diff --git a/tb/ariane_tb.sv b/tb/ariane_tb.sv index 4676edb8b6..65dd9fd0f1 100644 --- a/tb/ariane_tb.sv +++ b/tb/ariane_tb.sv @@ -34,9 +34,9 @@ module ariane_tb; logic [31:0] exit_o; ariane_testharness dut ( - .clk_i ( clk_i ), - .rst_ni ( rst_ni ), - .exit_o ( exit_o ) + .clk_i, + .rst_ni, + .exit_o ); // Clock process diff --git a/tb/ariane_testharness.sv b/tb/ariane_testharness.sv index df7d9eedb1..3b451ca10c 100644 --- a/tb/ariane_testharness.sv +++ b/tb/ariane_testharness.sv @@ -14,23 +14,23 @@ // Instantiates an AXI-Bus and memories module ariane_testharness #( - parameter logic [63:0] CACHE_START_ADDR = 64'h8000_0000, // address on which to decide whether the request is cache-able or not - parameter int unsigned AXI_ID_WIDTH = 10, - parameter int unsigned AXI_USER_WIDTH = 1, - parameter int unsigned AXI_ADDRESS_WIDTH = 64, - parameter int unsigned AXI_DATA_WIDTH = 64, - parameter int unsigned NUM_WORDS = 2**24 // memory size - )( - input logic clk_i, - input logic rst_ni, - output logic [31:0] exit_o - ); + parameter logic [63:0] CACHE_START_ADDR = 64'h8000_0000, // address on which to decide whether the request is cache-able or not + parameter int unsigned AXI_ID_WIDTH = 10, + parameter int unsigned AXI_USER_WIDTH = 1, + parameter int unsigned AXI_ADDRESS_WIDTH = 64, + parameter int unsigned AXI_DATA_WIDTH = 64, + parameter int unsigned NUM_WORDS = 2**24 // memory size +)( + input logic clk_i, + input logic rst_ni, + output logic [31:0] exit_o +); // disable test-enable logic test_en; logic ndmreset; logic ndmreset_n; - logic debug_req; + logic debug_req_core; int jtag_enable; logic init_done; @@ -45,9 +45,6 @@ module ariane_testharness #( logic debug_req_valid; logic debug_req_ready; - logic [6:0] debug_req_bits_addr; - logic [1:0] debug_req_bits_op; - logic [31:0] debug_req_bits_data; logic debug_resp_valid; logic debug_resp_ready; logic [1:0] debug_resp_bits_resp; @@ -67,11 +64,15 @@ module ariane_testharness #( logic dmi_resp_ready; logic dmi_resp_valid; + logic rtc_i; + assign rtc_i = 1'b0; + assign test_en = 1'b0; assign ndmreset_n = ~ndmreset ; localparam NB_SLAVE = 4; - localparam NB_MASTER = 3; + localparam NB_MASTER = 4; + localparam AXI_ID_WIDTH_SLAVES = AXI_ID_WIDTH + $clog2(NB_SLAVE); AXI_BUS #( @@ -97,12 +98,16 @@ module ariane_testharness #( if (!$value$plusargs("jtag_rbb_enable=%b", jtag_enable)) jtag_enable = 'h0; end + dm::dmi_req_t jtag_dmi_req; + dm::dmi_req_t dmi_req; + + dm::dmi_req_t debug_req; + dm::dmi_resp_t debug_resp; + // debug if MUX assign debug_req_valid = (jtag_enable[0]) ? jtag_req_valid : dmi_req_valid; - assign debug_req_bits_addr = (jtag_enable[0]) ? jtag_req_bits_addr : dmi_req_bits_addr; - assign debug_req_bits_op = (jtag_enable[0]) ? jtag_req_bits_op : dmi_req_bits_op; - assign debug_req_bits_data = (jtag_enable[0]) ? jtag_req_bits_data : dmi_req_bits_data; assign debug_resp_ready = (jtag_enable[0]) ? jtag_resp_ready : dmi_resp_ready; + assign debug_req = (jtag_enable[0]) ? jtag_dmi_req : dmi_req; assign exit_o = (jtag_enable[0]) ? jtag_exit : dmi_exit; assign jtag_resp_valid = (jtag_enable[0]) ? debug_resp_valid : 1'b0; assign dmi_resp_valid = (jtag_enable[0]) ? 1'b0 : debug_resp_valid; @@ -124,42 +129,39 @@ module ariane_testharness #( ); dmi_jtag i_dmi_jtag ( - .clk_i ( clk_i ), - .rst_ni ( rst_ni ), - - .dmi_rst_no ( ), - .dmi_req_valid_o ( jtag_req_valid ), - .dmi_req_ready_i ( debug_req_ready ), - .dmi_req_bits_addr_o ( jtag_req_bits_addr ), - .dmi_req_bits_op_o ( jtag_req_bits_op ), - .dmi_req_bits_data_o ( jtag_req_bits_data ), - .dmi_resp_valid_i ( jtag_resp_valid ), - .dmi_resp_ready_o ( jtag_resp_ready ), - .dmi_resp_bits_resp_i ( debug_resp_bits_resp ), - .dmi_resp_bits_data_i ( debug_resp_bits_data ), - - .tck_i ( jtag_TCK ), - .tms_i ( jtag_TMS ), - .trst_ni ( jtag_TRSTn ), - .td_i ( jtag_TDI ), - .td_o ( jtag_TDO_data ), - .tdo_oe_o ( jtag_TDO_driven ) + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .dmi_req_o ( jtag_dmi_req ), + .dmi_req_valid_o ( jtag_req_valid ), + .dmi_req_ready_i ( debug_req_ready ), + .dmi_resp_i ( debug_resp ), + .dmi_resp_ready_o ( jtag_resp_ready ), + .dmi_resp_valid_i ( jtag_resp_valid ), + .dmi_rst_no ( ), // not connected + .tck_i ( jtag_TCK ), + .tms_i ( jtag_TMS ), + .trst_ni ( jtag_TRSTn ), + .td_i ( jtag_TDI ), + .td_o ( jtag_TDO_data ), + .tdo_oe_o ( jtag_TDO_driven ) ); // SiFive's SimDTM Module // Converts to DPI calls + logic [1:0] debug_req_bits_op; + assign dmi_req.op = dm::dtm_op_t'(debug_req_bits_op); SimDTM i_SimDTM ( .clk ( clk_i ), .reset ( ~rst_ni ), .debug_req_valid ( dmi_req_valid ), .debug_req_ready ( debug_req_ready ), - .debug_req_bits_addr ( dmi_req_bits_addr ), - .debug_req_bits_op ( dmi_req_bits_op ), - .debug_req_bits_data ( dmi_req_bits_data ), + .debug_req_bits_addr ( dmi_req.addr ), + .debug_req_bits_op ( debug_req_bits_op ), + .debug_req_bits_data ( dmi_req.data ), .debug_resp_valid ( dmi_resp_valid ), .debug_resp_ready ( dmi_resp_ready ), - .debug_resp_bits_resp ( debug_resp_bits_resp ), - .debug_resp_bits_data ( debug_resp_bits_data ), + .debug_resp_bits_resp ( debug_resp.resp ), + .debug_resp_bits_data ( debug_resp.data ), .exit ( dmi_exit ) ); @@ -177,20 +179,17 @@ module ariane_testharness #( .testmode_i ( test_en ), .ndmreset_o ( ndmreset ), .dmactive_o ( ), // active debug session - .debug_req_o ( debug_req ), + .debug_req_o ( debug_req_core ), .unavailable_i ( '0 ), .axi_master ( slave[3] ), - .axi_slave ( master[2] ), + .axi_slave ( master[3] ), .dmi_rst_ni ( rst_ni ), .dmi_req_valid_i ( debug_req_valid ), .dmi_req_ready_o ( debug_req_ready ), - .dmi_req_bits_addr_i ( debug_req_bits_addr ), - .dmi_req_bits_op_i ( debug_req_bits_op ), - .dmi_req_bits_data_i ( debug_req_bits_data ), + .dmi_req_i ( debug_req ), .dmi_resp_valid_o ( debug_resp_valid ), .dmi_resp_ready_i ( debug_resp_ready ), - .dmi_resp_bits_resp_o ( debug_resp_bits_resp ), - .dmi_resp_bits_data_o ( debug_resp_bits_data ) + .dmi_resp_o ( debug_resp ) ); // --------------- @@ -208,7 +207,7 @@ module ariane_testharness #( ) i_axi2rom ( .clk_i ( clk_i ), .rst_ni ( ndmreset_n ), - .slave ( master[1] ), + .slave ( master[2] ), .req_o ( rom_req ), .we_o ( ), .addr_o ( rom_addr ), @@ -234,7 +233,7 @@ module ariane_testharness #( logic [AXI_DATA_WIDTH-1:0] wdata; logic [AXI_DATA_WIDTH-1:0] rdata; - + axi2mem #( .AXI_ID_WIDTH ( AXI_ID_WIDTH_SLAVES ), .AXI_ADDR_WIDTH ( AXI_ADDRESS_WIDTH ), @@ -278,13 +277,33 @@ module ariane_testharness #( .AXI_USER_WIDTH ( AXI_USER_WIDTH ), .AXI_ID_WIDTH ( AXI_ID_WIDTH ) ) i_axi_xbar ( - .clk ( clk_i ), - .rst_n ( ndmreset_n ), - .test_en_i ( test_en ), - .slave ( slave ), - .master ( master ), - .start_addr_i ( {64'h0, 64'h10000, CACHE_START_ADDR} ), - .end_addr_i ( {64'hFFF, 64'h1FFFF, CACHE_START_ADDR + 2**24} ) + .clk ( clk_i ), + .rst_n ( ndmreset_n ), + .test_en_i ( test_en ), + .slave ( slave ), + .master ( master ), + .start_addr_i ( {64'h0, 64'h10000, 64'h2000000, CACHE_START_ADDR} ), + .end_addr_i ( {64'hFFF, 64'h1FFFF, 64'h2FFFFFF, CACHE_START_ADDR + 2**24} ) + ); + + // --------------- + // CLINT + // --------------- + logic ipi; + logic timer_irq; + + clint #( + .AXI_ADDR_WIDTH ( AXI_ADDRESS_WIDTH ), + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_ID_WIDTH ( AXI_ID_WIDTH_SLAVES ), + .NR_CORES ( 1 ) + ) i_clint ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .slave ( master[1] ), + .rtc_i ( rtc_i ), + .timer_irq_o ( timer_irq ), + .ipi_o ( ipi ) ); // --------------- @@ -297,14 +316,13 @@ module ariane_testharness #( ) i_ariane ( .clk_i ( clk_i ), .rst_ni ( ndmreset_n ), - .test_en_i ( test_en ), .boot_addr_i ( 64'h10000 ), // start fetching from ROM .core_id_i ( '0 ), .cluster_id_i ( '0 ), - .irq_i ( '0 ), - .ipi_i ( '0 ), - .time_irq_i ( '0 ), - .debug_req_i ( debug_req ), + .irq_i ( '0 ), // we do not specify other interrupts in this TB + .ipi_i ( ipi ), + .time_irq_i ( timer_irq ), + .debug_req_i ( debug_req_core ), .data_if ( slave[2] ), .bypass_if ( slave[1] ), .instr_if ( slave[0] )