diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 474b1af00..f4f5902a8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,13 +21,13 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v2 with: submodules: recursive - name: Cache Toolchain Directory id: cache-toolchain - uses: actions/cache@v4 + uses: actions/cache@v2 with: path: tools key: ${{ runner.os }}-toolchain-v0.1 @@ -36,7 +36,7 @@ jobs: - name: Cache Third Party Directory id: cache-thirdparty - uses: actions/cache@v4 + uses: actions/cache@v2 with: path: third_party key: ${{ runner.os }}-thirdparty-v0.1 @@ -46,7 +46,7 @@ jobs: - name: Install Dependencies if: steps.cache-toolchain.outputs.cache-hit != 'true' || steps.cache-thirdparty.outputs.cache-hit != 'true' run: | - sudo bash ./ci/system_updates.sh + sudo bash ./ci/install_dependencies.sh - name: Setup Toolchain if: steps.cache-toolchain.outputs.cache-hit != 'true' @@ -62,111 +62,7 @@ jobs: run: | make -C third_party > /dev/null - # build: - # runs-on: ubuntu-20.04 - # needs: setup - # strategy: - # matrix: - # xlen: [32, 64] - - # steps: - # - name: Checkout code - # uses: actions/checkout@v2 - - # - name: Install Dependencies - # run: | - # sudo bash ./ci/system_updates.sh - - # - name: Cache Toolchain Directory - # id: cache-toolchain - # uses: actions/cache@v2 - # with: - # path: tools - # key: ${{ runner.os }}-toolchain-v0.1 - # restore-keys: | - # ${{ runner.os }}-toolchain- - - # - name: Cache Third Party Directory - # id: cache-thirdparty - # uses: actions/cache@v2 - # with: - # path: third_party - # key: ${{ runner.os }}-thirdparty-v0.1 - # restore-keys: | - # ${{ runner.os }}-thirdparty- - - # - name: Run Build - # run: | - # TOOLDIR=$PWD/tools - # mkdir -p build${{ matrix.xlen }} - # cd build${{ matrix.xlen }} - # ../configure --tooldir=$TOOLDIR --xlen=${{ matrix.xlen }} - # source ci/toolchain_env.sh - # make software -s > /dev/null - # make tests -s > /dev/null - - # - name: Upload Build Artifact - # uses: actions/upload-artifact@v2 - # with: - # name: build-${{ matrix.xlen }} - # path: build${{ matrix.xlen }} - - # tests: - # runs-on: ubuntu-20.04 - # needs: build - # strategy: - # matrix: - # name: [regression, opencl, config1, config2, debug, stress] - # xlen: [32, 64] - - # steps: - # - name: Checkout code - # uses: actions/checkout@v2 - - # - name: Install Dependencies - # run: | - # sudo bash ./ci/system_updates.sh - - # - name: Cache Toolchain Directory - # id: cache-toolchain - # uses: actions/cache@v2 - # with: - # path: tools - # key: ${{ runner.os }}-toolchain-v0.1 - # restore-keys: | - # ${{ runner.os }}-toolchain- - - # - name: Cache Third Party Directory - # id: cache-thirdparty - # uses: actions/cache@v2 - # with: - # path: third_party - # key: ${{ runner.os }}-thirdparty-v0.1 - # restore-keys: | - # ${{ runner.os }}-thirdparty- - - # - name: Download Build Artifact - # uses: actions/download-artifact@v2 - # with: - # name: build-${{ matrix.xlen }} - # path: build${{ matrix.xlen }} - - # - name: Run tests - # run: | - # cd build${{ matrix.xlen }} - # source ci/toolchain_env.sh - # chmod -R +x . # Ensure all files have executable permissions - # if [ "${{ matrix.name }}" == "regression" ]; then - # ./ci/regression.sh --unittest - # ./ci/regression.sh --isa - # ./ci/regression.sh --kernel - # ./ci/regression.sh --synthesis - # ./ci/regression.sh --regression - # else - # ./ci/regression.sh --${{ matrix.name }} - # fi - - build_vm: + build: runs-on: ubuntu-20.04 needs: setup strategy: @@ -175,15 +71,15 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v2 - name: Install Dependencies run: | - sudo bash ./ci/system_updates.sh + sudo bash ./ci/install_dependencies.sh - name: Cache Toolchain Directory id: cache-toolchain - uses: actions/cache@v4 + uses: actions/cache@v2 with: path: tools key: ${{ runner.os }}-toolchain-v0.1 @@ -192,7 +88,7 @@ jobs: - name: Cache Third Party Directory id: cache-thirdparty - uses: actions/cache@v4 + uses: actions/cache@v2 with: path: third_party key: ${{ runner.os }}-thirdparty-v0.1 @@ -202,39 +98,39 @@ jobs: - name: Run Build run: | TOOLDIR=$PWD/tools - mkdir -p build${{ matrix.xlen }}-vm - cd build${{ matrix.xlen }}-vm - ../configure --tooldir=$TOOLDIR --xlen=${{ matrix.xlen }} --vm_enable=1 + mkdir -p build${{ matrix.xlen }} + cd build${{ matrix.xlen }} + ../configure --tooldir=$TOOLDIR --xlen=${{ matrix.xlen }} source ci/toolchain_env.sh make software -s > /dev/null make tests -s > /dev/null - name: Upload Build Artifact - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v3 with: - name: build-${{ matrix.xlen }}-vm - path: build${{ matrix.xlen }}-vm + name: build-${{ matrix.xlen }} + path: build${{ matrix.xlen }} - test_vm: + tests: runs-on: ubuntu-20.04 - needs: build_vm + needs: build strategy: fail-fast: false matrix: - name: [regression, opencl, cache, config1, config2, debug, stress, vm] + name: [regression, opencl, cache, config1, config2, debug, scope, stress, synthesis, vm] xlen: [32, 64] steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v2 - name: Install Dependencies run: | - sudo bash ./ci/system_updates.sh + sudo bash ./ci/install_dependencies.sh - name: Cache Toolchain Directory id: cache-toolchain - uses: actions/cache@v4 + uses: actions/cache@v2 with: path: tools key: ${{ runner.os }}-toolchain-v0.1 @@ -243,7 +139,7 @@ jobs: - name: Cache Third Party Directory id: cache-thirdparty - uses: actions/cache@v4 + uses: actions/cache@v2 with: path: third_party key: ${{ runner.os }}-thirdparty-v0.1 @@ -251,21 +147,28 @@ jobs: ${{ runner.os }}-thirdparty- - name: Download Build Artifact - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v3 with: - name: build-${{ matrix.xlen }}-vm - path: build${{ matrix.xlen }}-vm + name: build-${{ matrix.xlen }} + path: build${{ matrix.xlen }} - name: Run tests run: | - cd build${{ matrix.xlen }}-vm + cd build${{ matrix.xlen }} source ci/toolchain_env.sh chmod -R +x . # Ensure all files have executable permissions - ./ci/regression.sh --vm + if [ "${{ matrix.name }}" == "regression" ]; then + ./ci/regression.sh --unittest + ./ci/regression.sh --isa + ./ci/regression.sh --kernel + ./ci/regression.sh --regression + else + ./ci/regression.sh --${{ matrix.name }} + fi complete: runs-on: ubuntu-20.04 - needs: test_vm + needs: tests steps: - name: Check Completion diff --git a/.gitmodules b/.gitmodules index df3ca47e2..32abfe9cb 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,9 +1,9 @@ -[submodule "third_party/fpnew"] - path = third_party/fpnew - url = https://github.com/pulp-platform/fpnew.git [submodule "third_party/softfloat"] path = third_party/softfloat url = https://github.com/ucb-bar/berkeley-softfloat-3.git [submodule "third_party/ramulator"] path = third_party/ramulator url = https://github.com/CMU-SAFARI/ramulator2.git +[submodule "third_party/cvfpu"] + path = third_party/cvfpu + url = https://github.com/openhwgroup/cvfpu.git diff --git a/Makefile.in b/Makefile.in index bfe944998..264738aca 100644 --- a/Makefile.in +++ b/Makefile.in @@ -2,14 +2,6 @@ include config.mk .PHONY: build software tests -vm: - $(MAKE) -C $(VORTEX_HOME)/third_party - $(MAKE) -C hw - $(MAKE) -C sim simx - $(MAKE) -C kernel - $(MAKE) -C runtime vm - $(MAKE) -C tests - all: $(MAKE) -C $(VORTEX_HOME)/third_party $(MAKE) -C hw diff --git a/README.md b/README.md index 83a81a421..ed4c89d88 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,7 @@ Vortex news can be found on its [website](https://vortex.cc.gatech.edu/) ## Specifications - Support RISC-V RV32IMAF and RV64IMAFD + - Microarchitecture: - configurable number of cores, warps, and threads. - configurable number of ALU, FPU, LSU, and SFU units per core. @@ -36,7 +37,7 @@ Vortex news can be found on its [website](https://vortex.cc.gatech.edu/) If you are interested in a stable release of Vortex, you can download the latest release [here](https://github.com/vortexgpgpu/vortex/releases/latest). Otherwise, you can pull the most recent, but (potentially) unstable version as shown below. The following steps demonstrate how to build and run Vortex with the default driver: SimX. If you are interested in a different backend, look [here](docs/simulation.md). ### Supported OS Platforms -- Ubuntu 18.04, 20.04 +- Ubuntu 18.04, 20.04, 22.04, 24.04 - Centos 7 ### Toolchain Dependencies The following dependencies will be fetched prebuilt by `toolchain_install.sh`. @@ -44,24 +45,21 @@ The following dependencies will be fetched prebuilt by `toolchain_install.sh`. - [LLVM](https://llvm.org/) - [RISCV-GNU-TOOLCHAIN](https://github.com/riscv-collab/riscv-gnu-toolchain) - [Verilator](https://www.veripool.org/verilator) -- [FpNew](https://github.com/pulp-platform/fpnew.git) +- [cvfpu](https://github.com/openhwgroup/cvfpu.git) - [SoftFloat](https://github.com/ucb-bar/berkeley-softfloat-3.git) - [Ramulator](https://github.com/CMU-SAFARI/ramulator.git) - [Yosys](https://github.com/YosysHQ/yosys) - [Sv2v](https://github.com/zachjs/sv2v) -### Install development tools -```sh -sudo apt-get install build-essential -sudo apt-get install binutils -sudo apt-get install python -sudo apt-get install uuid-dev -sudo apt-get install git -``` ### Install Vortex codebase ```sh git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git cd vortex ``` +### Install system dependencies +```sh +# ensure dependent libraries are present +sudo ./ci/install_dependencies.sh +``` ### Configure your build folder ```sh mkdir build @@ -96,19 +94,19 @@ make -s make -s make install ``` -- Building Vortex 64-bit simply requires using --xlen=64 configure option. +- Building Vortex 64-bit requires setting --xlen=64 configure option. ```sh -../configure --xlen=32 --tooldir=$HOME/tools +../configure --xlen=64 --tooldir=$HOME/tools ``` - Sourcing "./ci/toolchain_env.sh" is required everytime you start a new terminal. we recommend adding "source /ci/toolchain_env.sh" to your ~/.bashrc file to automate the process at login. ```sh echo "source /ci/toolchain_env.sh" >> ~/.bashrc ``` -- Making changes to Makefiles in your source tree or adding new folders will require executing the "configure" script again to get it propagated into your build folder. +- Making changes to Makefiles in your source tree or adding new folders will require executing the "configure" script again without any options to get changes propagated to your build folder. ```sh ../configure ``` -- To debug the GPU, you can generate a "run.log" trace. see /docs/debugging.md for more information. +- To debug the GPU, the simulation can generate a runtime trace for analysis. See /docs/debugging.md for more information. ```sh ./ci/blackbox.sh --app=demo --debug=3 ``` diff --git a/ci/blackbox.sh b/ci/blackbox.sh index fe94677aa..27a43781b 100755 --- a/ci/blackbox.sh +++ b/ci/blackbox.sh @@ -13,6 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +SCRIPT_DIR=$(dirname "$0") +ROOT_DIR=$SCRIPT_DIR/.. + show_usage() { echo "Vortex BlackBox Test Driver v1.0" @@ -29,302 +32,174 @@ show_help() echo "--rebuild: 0=disable, 1=force, 2=auto, 3=temp" } -SCRIPT_DIR=$(dirname "$0") -ROOT_DIR=$SCRIPT_DIR/.. - -DRIVER=simx -APP=sgemm -CLUSTERS=1 -CORES=1 -WARPS=4 -THREADS=4 -L2= -L3= -DEBUG=0 -DEBUG_LEVEL=0 -SCOPE=0 -HAS_ARGS=0 -PERF_CLASS=0 -REBUILD=2 -TEMPBUILD=0 -LOGFILE=run.log - -for i in "$@" -do -case $i in - --driver=*) - DRIVER=${i#*=} - shift - ;; - --app=*) - APP=${i#*=} - shift - ;; - --clusters=*) - CLUSTERS=${i#*=} - shift - ;; - --cores=*) - CORES=${i#*=} - shift - ;; - --warps=*) - WARPS=${i#*=} - shift - ;; - --threads=*) - THREADS=${i#*=} - shift - ;; - --l2cache) - L2=-DL2_ENABLE - shift - ;; - --l3cache) - L3=-DL3_ENABLE - shift - ;; - --debug=*) - DEBUG_LEVEL=${i#*=} - DEBUG=1 - shift - ;; - --scope) - SCOPE=1 - CORES=1 - shift - ;; - --perf=*) - PERF_FLAG=-DPERF_ENABLE - PERF_CLASS=${i#*=} - shift - ;; - --args=*) - ARGS=${i#*=} - HAS_ARGS=1 - shift - ;; - --rebuild=*) - REBUILD=${i#*=} - shift - ;; - --log=*) - LOGFILE=${i#*=} - shift - ;; - --help) - show_help - exit 0 - ;; - *) - show_usage - exit -1 - ;; -esac -done - -if [ $REBUILD -eq 3 ]; -then - REBUILD=1 - TEMPBUILD=1 -fi - -case $DRIVER in - gpu) - DRIVER_PATH= - ;; - simx) - DRIVER_PATH=$ROOT_DIR/runtime/simx - ;; - rtlsim) - DRIVER_PATH=$ROOT_DIR/runtime/rtlsim - ;; - opae) - DRIVER_PATH=$ROOT_DIR/runtime/opae - ;; - xrt) - DRIVER_PATH=$ROOT_DIR/runtime/xrt - ;; - *) - echo "invalid driver: $DRIVER" - exit -1 - ;; -esac - -if [ -d "$ROOT_DIR/tests/opencl/$APP" ]; -then - APP_PATH=$ROOT_DIR/tests/opencl/$APP -elif [ -d "$ROOT_DIR/tests/regression/$APP" ]; -then - APP_PATH=$ROOT_DIR/tests/regression/$APP -else - echo "Application folder not found: $APP" - exit -1 -fi - -if [ "$DRIVER" = "gpu" ]; -then - # running application - if [ $HAS_ARGS -eq 1 ] - then - echo "running: OPTS=$ARGS make -C $APP_PATH run-$DRIVER" - OPTS=$ARGS make -C $APP_PATH run-$DRIVER - status=$? +add_option() { + if [ -n "$1" ]; then + echo "$1 $2" else - echo "running: make -C $APP_PATH run-$DRIVER" - make -C $APP_PATH run-$DRIVER - status=$? + echo "$2" fi +} - exit $status -fi - -CONFIGS="-DNUM_CLUSTERS=$CLUSTERS -DNUM_CORES=$CORES -DNUM_WARPS=$WARPS -DNUM_THREADS=$THREADS $L2 $L3 $PERF_FLAG $CONFIGS" - -echo "CONFIGS=$CONFIGS" - -if [ $REBUILD -ne 0 ] -then - BLACKBOX_CACHE=blackbox.$DRIVER.cache - if [ -f "$BLACKBOX_CACHE" ] - then - LAST_CONFIGS=`cat $BLACKBOX_CACHE` - fi +DEFAULTS() { + DRIVER=simx + APP=sgemm + DEBUG=0 + DEBUG_LEVEL=0 + SCOPE=0 + HAS_ARGS=0 + PERF_CLASS=0 + CONFIGS="$CONFIGS" + REBUILD=2 + TEMPBUILD=0 + LOGFILE=run.log +} - if [ $REBUILD -eq 1 ] || [ "$CONFIGS+$DEBUG+$SCOPE" != "$LAST_CONFIGS" ]; +parse_args() { + DEFAULTS + for i in "$@"; do + case $i in + --driver=*) DRIVER=${i#*=} ;; + --app=*) APP=${i#*=} ;; + --clusters=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_CLUSTERS=${i#*=}") ;; + --cores=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_CORES=${i#*=}") ;; + --warps=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_WARPS=${i#*=}") ;; + --threads=*) CONFIGS=$(add_option "$CONFIGS" "-DNUM_THREADS=${i#*=}") ;; + --l2cache) CONFIGS=$(add_option "$CONFIGS" "-DL2_ENABLE") ;; + --l3cache) CONFIGS=$(add_option "$CONFIGS" "-DL3_ENABLE") ;; + --perf=*) CONFIGS=$(add_option "$CONFIGS" "-DPERF_ENABLE"); PERF_CLASS=${i#*=} ;; + --debug=*) DEBUG=1; DEBUG_LEVEL=${i#*=} ;; + --scope) SCOPE=1; ;; + --args=*) HAS_ARGS=1; ARGS=${i#*=} ;; + --rebuild=*) REBUILD=${i#*=} ;; + --log=*) LOGFILE=${i#*=} ;; + --help) show_help; exit 0 ;; + *) show_usage; exit 1 ;; + esac + done + + if [ $REBUILD -eq 3 ]; then - make -C $DRIVER_PATH clean-driver > /dev/null - echo "$CONFIGS+$DEBUG+$SCOPE" > $BLACKBOX_CACHE + REBUILD=1 + TEMPBUILD=1 fi -fi - -# export performance monitor class identifier -export VORTEX_PROFILING=$PERF_CLASS +} -status=0 +set_driver_path() { + case $DRIVER in + gpu) DRIVER_PATH="" ;; + simx|rtlsim|opae|xrt) DRIVER_PATH="$ROOT_DIR/runtime/$DRIVER" ;; + *) echo "Invalid driver: $DRIVER"; exit 1 ;; + esac +} -# ensure config update -make -C $ROOT_DIR/hw config > /dev/null +set_app_path() { + if [ -d "$ROOT_DIR/tests/opencl/$APP" ]; then + APP_PATH="$ROOT_DIR/tests/opencl/$APP" + elif [ -d "$ROOT_DIR/tests/regression/$APP" ]; then + APP_PATH="$ROOT_DIR/tests/regression/$APP" + else + echo "Application folder not found: $APP" + exit 1 + fi +} -# ensure the stub driver is present -make -C $ROOT_DIR/runtime/stub > /dev/null +build_driver() { + local cmd_opts="" + [ $DEBUG -ne 0 ] && cmd_opts=$(add_option "$cmd_opts" "DEBUG=$DEBUG_LEVEL") + [ $SCOPE -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "SCOPE=1") + [ $TEMPBUILD -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "DESTDIR=\"$TEMPDIR\"") + [ -n "$CONFIGS" ] && cmd_opts=$(add_option "$cmd_opts" "CONFIGS=\"$CONFIGS\"") -if [ $DEBUG -ne 0 ] -then - # running application - if [ $TEMPBUILD -eq 1 ] - then - # setup temp directory - TEMPDIR=$(mktemp -d) - mkdir -p "$TEMPDIR/$DRIVER" + if [ -n "$cmd_opts" ]; then + echo "Running: $cmd_opts make -C $DRIVER_PATH > /dev/null" + eval "$cmd_opts make -C $DRIVER_PATH > /dev/null" + else + echo "Running: make -C $DRIVER_PATH > /dev/null" + make -C $DRIVER_PATH > /dev/null + fi +} - # driver initialization - if [ $SCOPE -eq 1 ] - then - echo "running: DESTDIR=$TEMPDIR/$DRIVER DEBUG=$DEBUG_LEVEL SCOPE=1 CONFIGS=$CONFIGS make -C $DRIVER_PATH" - DESTDIR="$TEMPDIR/$DRIVER" DEBUG=$DEBUG_LEVEL SCOPE=1 CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null - else - echo "running: DESTDIR=$TEMPDIR/$DRIVER DEBUG=$DEBUG_LEVEL CONFIGS=$CONFIGS make -C $DRIVER_PATH" - DESTDIR="$TEMPDIR/$DRIVER" DEBUG=$DEBUG_LEVEL CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null - fi +run_app() { + local cmd_opts="" + [ $DEBUG -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "DEBUG=1") + [ $TEMPBUILD -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "VORTEX_RT_PATH=\"$TEMPDIR\"") + [ $HAS_ARGS -eq 1 ] && cmd_opts=$(add_option "$cmd_opts" "OPTS=\"$ARGS\"") - # running application - if [ $HAS_ARGS -eq 1 ] - then - echo "running: VORTEX_RT_PATH=$TEMPDIR OPTS=$ARGS make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1" - DEBUG=1 VORTEX_RT_PATH=$TEMPDIR OPTS=$ARGS make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1 - status=$? + if [ $DEBUG -ne 0 ]; then + if [ -n "$cmd_opts" ]; then + echo "Running: $cmd_opts make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1" + eval "$cmd_opts make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1" else - echo "running: VORTEX_RT_PATH=$TEMPDIR make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1" - DEBUG=1 VORTEX_RT_PATH=$TEMPDIR make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1 - status=$? + echo "Running: make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1" + make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1 fi - - # cleanup temp directory - trap "rm -rf $TEMPDIR" EXIT else - # driver initialization - if [ $SCOPE -eq 1 ] - then - echo "running: DEBUG=$DEBUG_LEVEL SCOPE=1 CONFIGS=$CONFIGS make -C $DRIVER_PATH" - DEBUG=$DEBUG_LEVEL SCOPE=1 CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null + if [ -n "$cmd_opts" ]; then + echo "Running: $cmd_opts make -C $APP_PATH run-$DRIVER" + eval "$cmd_opts make -C $APP_PATH run-$DRIVER" else - echo "running: DEBUG=$DEBUG_LEVEL CONFIGS=$CONFIGS make -C $DRIVER_PATH" - DEBUG=$DEBUG_LEVEL CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null + echo "Running: make -C $APP_PATH run-$DRIVER" + make -C $APP_PATH run-$DRIVER fi + fi + status=$? + return $status +} - # running application - if [ $HAS_ARGS -eq 1 ] - then - echo "running: OPTS=$ARGS make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1" - DEBUG=1 OPTS=$ARGS make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1 - status=$? - else - echo "running: make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1" - DEBUG=1 make -C $APP_PATH run-$DRIVER > $LOGFILE 2>&1 - status=$? - fi +main() { + parse_args "$@" + set_driver_path + set_app_path + + # execute on default installed GPU + if [ "$DRIVER" = "gpu" ]; then + run_app + exit $? fi - if [ -f "$APP_PATH/trace.vcd" ] - then - mv -f $APP_PATH/trace.vcd . + if [ -n "$CONFIGS" ]; then + echo "CONFIGS=$CONFIGS" fi -else - if [ $TEMPBUILD -eq 1 ] - then - # setup temp directory - TEMPDIR=$(mktemp -d) - mkdir -p "$TEMPDIR/$DRIVER" - # driver initialization - if [ $SCOPE -eq 1 ] - then - echo "running: DESTDIR=$TEMPDIR/$DRIVER SCOPE=1 CONFIGS=$CONFIGS make -C $DRIVER_PATH" - DESTDIR="$TEMPDIR/$DRIVER" SCOPE=1 CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null - else - echo "running: DESTDIR=$TEMPDIR/$DRIVER CONFIGS=$CONFIGS make -C $DRIVER_PATH" - DESTDIR="$TEMPDIR/$DRIVER" CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null - fi + if [ $REBUILD -ne 0 ]; then + BLACKBOX_CACHE=blackbox.$DRIVER.cache + LAST_CONFIGS=$(cat "$BLACKBOX_CACHE" 2>/dev/null || echo "") - # running application - if [ $HAS_ARGS -eq 1 ] - then - echo "running: VORTEX_RT_PATH=$TEMPDIR OPTS=$ARGS make -C $APP_PATH run-$DRIVER" - VORTEX_RT_PATH=$TEMPDIR OPTS=$ARGS make -C $APP_PATH run-$DRIVER - status=$? - else - echo "running: VORTEX_RT_PATH=$TEMPDIR make -C $APP_PATH run-$DRIVER" - VORTEX_RT_PATH=$TEMPDIR make -C $APP_PATH run-$DRIVER - status=$? + if [ $REBUILD -eq 1 ] || [ "$CONFIGS+$DEBUG+$SCOPE" != "$LAST_CONFIGS" ]; then + make -C $DRIVER_PATH clean-driver > /dev/null + echo "$CONFIGS+$DEBUG+$SCOPE" > "$BLACKBOX_CACHE" fi + fi - # cleanup temp directory + export VORTEX_PROFILING=$PERF_CLASS + + make -C "$ROOT_DIR/hw" config > /dev/null + make -C "$ROOT_DIR/runtime/stub" > /dev/null + + if [ $TEMPBUILD -eq 1 ]; then + # setup temp directory + TEMPDIR=$(mktemp -d) + mkdir -p "$TEMPDIR" + # build stub driver + echo "running: DESTDIR=$TEMPDIR make -C $ROOT_DIR/runtime/stub" + DESTDIR="$TEMPDIR" make -C $ROOT_DIR/runtime/stub > /dev/null + # register tempdir cleanup on exit trap "rm -rf $TEMPDIR" EXIT - else + fi - # driver initialization - if [ $SCOPE -eq 1 ] - then - echo "running: SCOPE=1 CONFIGS=$CONFIGS make -C $DRIVER_PATH" - SCOPE=1 CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null - else - echo "running: CONFIGS=$CONFIGS make -C $DRIVER_PATH" - CONFIGS="$CONFIGS" make -C $DRIVER_PATH > /dev/null - fi + build_driver + run_app + status=$? - # running application - if [ $HAS_ARGS -eq 1 ] - then - echo "running: OPTS=$ARGS make -C $APP_PATH run-$DRIVER" - OPTS=$ARGS make -C $APP_PATH run-$DRIVER - status=$? - else - echo "running: make -C $APP_PATH run-$DRIVER" - make -C $APP_PATH run-$DRIVER - status=$? - fi + if [ $DEBUG -eq 1 ] && [ -f "$APP_PATH/trace.vcd" ]; then + mv -f $APP_PATH/trace.vcd . + fi + + if [ $SCOPE -eq 1 ] && [ -f "$APP_PATH/scope.vcd" ]; then + mv -f $APP_PATH/scope.vcd . fi -fi -exit $status + exit $status +} + +main "$@" \ No newline at end of file diff --git a/ci/install_dependencies.sh b/ci/install_dependencies.sh new file mode 100755 index 000000000..a62ed253b --- /dev/null +++ b/ci/install_dependencies.sh @@ -0,0 +1,46 @@ +#!/bin/sh + +# Copyright 2019-2023 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +# Function to check if GCC version is less than 11 +check_gcc_version() { + local gcc_version + gcc_version=$(gcc -dumpversion) + if dpkg --compare-versions "$gcc_version" lt 11; then + return 0 # GCC version is less than 11 + else + return 1 # GCC version is 11 or greater + fi +} + +# Update package list +apt-get update -y + +# install system dependencies +apt-get install -y build-essential valgrind libstdc++6 binutils python3 uuid-dev ccache + +# Check and install GCC 11 if necessary +if check_gcc_version; then + echo "GCC version is less than 11. Installing GCC 11..." + add-apt-repository -y ppa:ubuntu-toolchain-r/test + apt-get update + apt-get install -y g++-11 gcc-11 + update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 100 + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 100 +else + echo "GCC version is 11 or greater. No need to install GCC 11." +fi diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 9ba65cfee..4297eee8d 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -43,31 +43,23 @@ isa() make -C tests/riscv/isa run-simx make -C tests/riscv/isa run-rtlsim - make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null - make -C tests/riscv/isa run-rtlsim-32f + make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32f - make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null - make -C tests/riscv/isa run-rtlsim-32f + make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32f - make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP" make -C sim/rtlsim > /dev/null - make -C tests/riscv/isa run-rtlsim-32f + make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32f if [ "$XLEN" == "64" ] then - make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null - make -C tests/riscv/isa run-rtlsim-64d + make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64d - make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null - make -C tests/riscv/isa run-rtlsim-64d + make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64d - make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null - make -C tests/riscv/isa run-rtlsim-64f + make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64f - make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null - make -C tests/riscv/isa run-rtlsim-64f + make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64f - make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null - make -C tests/riscv/isa run-rtlsim-64fx + make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64fx fi # clean build @@ -102,10 +94,18 @@ regression() # test global barrier CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tgbar" --cores=2 CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=opae --app=dogfood --args="-n1 -tgbar" --cores=2 + CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=xrt --app=dogfood --args="-n1 -tgbar" --cores=2 # test local barrier ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tbar" ./ci/blackbox.sh --driver=opae --app=dogfood --args="-n1 -tbar" + ./ci/blackbox.sh --driver=xrt --app=dogfood --args="-n1 -tbar" + + # test temp driver mode for + ./ci/blackbox.sh --driver=simx --app=vecadd --rebuild=3 + + # test for matmul + CONFIGS="-DTC_NUM=4 -DTC_SIZE=8" ./ci/blackbox.sh --cores=4 --app=matmul --driver=simx --threads=32 --warps=32 --args="-n128 -d1" echo "regression tests done!" } @@ -129,23 +129,15 @@ opencl() vm(){ echo "begin vm tests..." - make -C sim/simx - make -C runtime/simx - - make -C tests/kernel run-simx - - # Regression tests + make -C sim/simx clean && CONFIGS="-DVM_ENABLE" make -C sim/simx + make -C runtime/simx clean && CONFIGS="-DVM_ENABLE" make -C runtime/simx + make -C tests/opencl run-simx make -C tests/regression run-simx - # test global barrier - CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tgbar" --cores=2 - - # test local barrier - ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tbar" - - # OpenCL tests + make -C sim/simx clean && CONFIGS="-DVM_ENABLE -DVM_ADDR_MODE=BARE" make -C sim/simx + make -C runtime/simx clean && CONFIGS="-DVM_ENABLE -DVM_ADDR_MODE=BARE" make -C runtime/simx make -C tests/opencl run-simx - ./ci/blackbox.sh --driver=simx --app=lbm --warps=8 + make -C tests/regression run-simx echo "vm tests done!" } @@ -261,37 +253,39 @@ config2() # test opaesim ./ci/blackbox.sh --driver=opae --app=printf ./ci/blackbox.sh --driver=opae --app=diverge + ./ci/blackbox.sh --driver=xrt --app=diverge # disable DPI - CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood - CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood + if [ "$XLEN" == "64" ]; then + # need to disable trig on 64-bit due to a bug inside fpnew's sqrt core. + CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-xtrig -xbar -xgbar" + CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood --args="-xtrig -xbar -xgbar" + CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=xrt --app=dogfood --args="-xtrig -xbar -xgbar" + else + CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --app=dogfood + CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=opae --app=dogfood + CONFIGS="-DDPI_DISABLE -DFPU_FPNEW" ./ci/blackbox.sh --driver=xrt --app=dogfood + fi # custom program startup address make -C tests/regression/dogfood clean-kernel - if [ "$XLEN" == "64" ]; then - STARTUP_ADDR=0x180000000 make -C tests/regression/dogfood - else - STARTUP_ADDR=0x80000000 make -C tests/regression/dogfood - fi + STARTUP_ADDR=0x80000000 make -C tests/regression/dogfood ./ci/blackbox.sh --driver=simx --app=dogfood ./ci/blackbox.sh --driver=rtlsim --app=dogfood make -C tests/regression/dogfood clean-kernel # disabling M & F extensions - make -C sim/rtlsim clean && CONFIGS="-DEXT_M_DISABLE -DEXT_F_DISABLE" make -C sim/rtlsim > /dev/null - make -C tests/riscv/isa run-rtlsim-32i + make -C sim/rtlsim clean && CONFIGS="-DEXT_M_DISABLE -DEXT_F_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32i make -C sim/rtlsim clean # disabling ZICOND extension CONFIGS="-DEXT_ZICOND_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=demo - # test AXI bus - AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --app=mstress - - # test 128-bit MEM block + # test 128-bit memory block CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=opae --app=mstress + CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=xrt --app=mstress - # test XLEN-bit MEM block + # test XLEN-bit memory block CONFIGS="-DMEM_BLOCK_SIZE=$XSIZE" ./ci/blackbox.sh --driver=opae --app=mstress CONFIGS="-DMEM_BLOCK_SIZE=$XSIZE" ./ci/blackbox.sh --driver=simx --app=mstress @@ -299,11 +293,27 @@ config2() CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=rtlsim --app=mstress --threads=8 CONFIGS="-DMEM_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=simx --app=mstress --threads=8 - # test single-bank DRAM - CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=opae --app=mstress + # test single-bank memory + if [ "$XLEN" == "64" ]; then + CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=48" ./ci/blackbox.sh --driver=opae --app=mstress + CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=48" ./ci/blackbox.sh --driver=xrt --app=mstress + else + CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32" ./ci/blackbox.sh --driver=opae --app=mstress + CONFIGS="-DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32" ./ci/blackbox.sh --driver=xrt --app=mstress + fi + + # test larger memory address + if [ "$XLEN" == "64" ]; then + CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=49" ./ci/blackbox.sh --driver=opae --app=mstress + CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=49" ./ci/blackbox.sh --driver=xrt --app=mstress + else + CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=33" ./ci/blackbox.sh --driver=opae --app=mstress + CONFIGS="-DPLATFORM_MEMORY_ADDR_WIDTH=33" ./ci/blackbox.sh --driver=xrt --app=mstress + fi - # test 27-bit DRAM address - CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=opae --app=mstress + # test memory banks interleaving + CONFIGS="-DPLATFORM_MEMORY_INTERLEAVE=1" ./ci/blackbox.sh --driver=opae --app=mstress + CONFIGS="-DPLATFORM_MEMORY_INTERLEAVE=0" ./ci/blackbox.sh --driver=opae --app=mstress echo "configuration-2 tests done!" } @@ -329,20 +339,32 @@ debug() test_csv_trace + CONFIGS="-O0" ./ci/blackbox.sh --driver=opae --app=demo --args="-n1" + CONFIGS="-O0" ./ci/blackbox.sh --driver=xrt --app=demo --args="-n1" CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" + CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=xrt --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" CONFIGS="-DSOCKET_SIZE=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --debug=1 --perf=1 --app=demo --args="-n1" - ./ci/blackbox.sh --driver=opae --cores=1 --scope --app=demo --args="-n1" echo "debugging tests done!" } +scope() +{ + echo "begin scope tests..." + + SCOPE_DEPTH=256 ./ci/blackbox.sh --driver=opae --app=demo --args="-n1" --scope + SCOPE_DEPTH=256 ./ci/blackbox.sh --driver=xrt --app=demo --args="-n1" --scope + + echo "debugging scope done!" +} + stress() { echo "begin stress tests..." # test verilator reset values CONFIGS="-DVERILATOR_RESET_VALUE=1 -DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=opae --cores=2 --clusters=2 --l2cache --l3cache --app=dogfood - CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --args="-n128" --l2cache + CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=xrt --app=sgemmx --args="-n128" --l2cache echo "stress tests done!" } @@ -360,7 +382,7 @@ synthesis() show_usage() { echo "Vortex Regression Test" - echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cache] [--config1] [--config2] [--debug] [--stress] [--synthesis] [--all] [--h|--help]" + echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cache] [--config1] [--config2] [--debug] [--scope] [--stress] [--synthesis] [--all] [--h|--help]" } declare -a tests=() @@ -401,6 +423,9 @@ while [ "$1" != "" ]; do --debug ) tests+=("debug") ;; + --scope ) + tests+=("scope") + ;; --stress ) tests+=("stress") ;; @@ -415,9 +440,11 @@ while [ "$1" != "" ]; do tests+=("regression") tests+=("opencl") tests+=("cache") + tests+=("vm") tests+=("config1") tests+=("config2") tests+=("debug") + tests+=("scope") tests+=("stress") tests+=("synthesis") ;; diff --git a/ci/system_updates.sh b/ci/system_updates.sh deleted file mode 100755 index 43abbe5ab..000000000 --- a/ci/system_updates.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/sh - -# Copyright 2019-2023 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -e - -apt-get update -y - -add-apt-repository -y ppa:ubuntu-toolchain-r/test -apt-get update -apt-get install -y g++-11 gcc-11 -update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 100 -update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 100 - -apt-get install -y build-essential valgrind libstdc++6 binutils python uuid-dev ccache diff --git a/ci/toolchain_env.sh.in b/ci/toolchain_env.sh.in index be140d28d..9c3387c13 100755 --- a/ci/toolchain_env.sh.in +++ b/ci/toolchain_env.sh.in @@ -15,9 +15,7 @@ # limitations under the License. TOOLDIR=${TOOLDIR:=@TOOLDIR@} - -# export VERILATOR_ROOT=$TOOLDIR/verilator -# export PATH=$VERILATOR_ROOT/bin:$PATH +export PATH=$TOOLDIR/verilator/bin:$PATH export SV2V_PATH=$TOOLDIR/sv2v export PATH=$SV2V_PATH/bin:$PATH diff --git a/ci/toolchain_install.sh.in b/ci/toolchain_install.sh.in index 935568ff0..01ebe889b 100755 --- a/ci/toolchain_install.sh.in +++ b/ci/toolchain_install.sh.in @@ -23,9 +23,9 @@ OSVERSION=${OSVERSION:=@OSVERSION@} riscv32() { case $OSVERSION in - "centos/7") parts=$(eval echo {a..h}) ;; - "ubuntu/focal") parts=$(eval echo {a..k}) ;; - *) parts=$(eval echo {a..j}) ;; + "centos/7") parts=$(eval echo {a..l}) ;; + "ubuntu/bionic") parts=$(eval echo {a..j}) ;; + *) parts=$(eval echo {a..k}) ;; esac rm -f riscv32-gnu-toolchain.tar.bz2.parta* for x in $parts @@ -41,7 +41,7 @@ riscv32() riscv64() { case $OSVERSION in - "centos/7") parts=$(eval echo {a..h}) ;; + "centos/7") parts=$(eval echo {a..l}) ;; *) parts=$(eval echo {a..j}) ;; esac rm -f riscv64-gnu-toolchain.tar.bz2.parta* diff --git a/ci/trace_csv.py b/ci/trace_csv.py index 4a36f5f6a..077f8027e 100755 --- a/ci/trace_csv.py +++ b/ci/trace_csv.py @@ -44,7 +44,8 @@ def load_config(filename): 'num_barriers': int(config_match.group(7)), } return config - return None + print("Error: missing CONFIGS: header") + sys.exit(1) def parse_simx(log_lines): pc_pattern = r"PC=(0x[0-9a-fA-F]+)" @@ -274,6 +275,8 @@ def split_log_file(log_filename): if current_sublog is not None: sublogs.append(current_sublog) + else: + sublogs.append(log_lines) return sublogs diff --git a/ci/travis_run.py b/ci/travis_run.py index 907cf5ce4..70459cbee 100755 --- a/ci/travis_run.py +++ b/ci/travis_run.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Copyright 2019-2023 # diff --git a/config.mk.in b/config.mk.in index 8ec052094..57f77059e 100644 --- a/config.mk.in +++ b/config.mk.in @@ -31,9 +31,4 @@ RISCV_TOOLCHAIN_PATH ?= $(TOOLDIR)/riscv$(XLEN)-gnu-toolchain RISCV_PREFIX ?= riscv$(XLEN)-unknown-elf RISCV_SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/$(RISCV_PREFIX) -VORTEX_RT_PATH ?= $(VORTEX_HOME)/runtime -VORTEX_KN_PATH ?= $(VORTEX_HOME)/kernel - THIRD_PARTY_DIR ?= $(VORTEX_HOME)/third_party - -VM_ENABLE ?= @VM_ENABLE@ \ No newline at end of file diff --git a/configure b/configure index f2e4781ef..d2483a796 100755 --- a/configure +++ b/configure @@ -26,6 +26,8 @@ detect_osversion() { case "$VERSION_CODENAME" in bionic) osversion="ubuntu/bionic";; focal) osversion="ubuntu/focal";; + jammy) osversion="ubuntu/focal";; + noble) osversion="ubuntu/focal";; # Add new versions as needed esac ;; @@ -63,7 +65,7 @@ copy_files() { filename_no_ext="${filename%.in}" dest_file="$dest_dir/$filename_no_ext" mkdir -p "$dest_dir" - sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@INSTALLDIR@|$PREFIX|g; s|@VM_ENABLE@|$VM_ENABLE|g" "$file" > "$dest_file" + sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@INSTALLDIR@|$PREFIX|g; s|@CURRENTDIR@|$CURRENT_DIR|g" "$file" > "$dest_file" # apply permissions to bash scripts read -r firstline < "$dest_file" if [[ "$firstline" =~ ^#!.*bash ]]; then @@ -114,7 +116,6 @@ default_xlen=32 default_tooldir=$HOME/tools default_osversion=$(detect_osversion) default_prefix=$CURRENT_DIR -default_vm=0 # load default configuration parameters from existing config.mk if [ -f "config.mk" ]; then @@ -127,7 +128,6 @@ if [ -f "config.mk" ]; then TOOLDIR\ ?*) default_tooldir=${value//\?=/} ;; OSVERSION\ ?*) default_osversion=${value//\?=/} ;; PREFIX\ ?*) default_prefix=${value//\?=/} ;; - VM_ENABLE\ ?*) default_vm=${value//\?=/} ;; esac done < config.mk fi @@ -137,7 +137,6 @@ XLEN=${XLEN:=$default_xlen} TOOLDIR=${TOOLDIR:=$default_tooldir} OSVERSION=${OSVERSION:=$default_osversion} PREFIX=${PREFIX:=$default_prefix} -VM_ENABLE=${VM_ENABLE:=$default_vm} # parse command line arguments usage() { @@ -146,7 +145,6 @@ usage() { echo " --tooldir= Set the TOOLDIR path (default: $HOME/tools)" echo " --osversion= Set the OS Version (default: $(detect_osversion))" echo " --prefix= Set installation directory" - echo " --vm_enable= Enable Virtual Memory support (default: 0)" exit 1 } while [[ "$#" -gt 0 ]]; do @@ -155,7 +153,6 @@ while [[ "$#" -gt 0 ]]; do --tooldir=*) TOOLDIR="${1#*=}" ;; --osversion=*) OSVERSION="${1#*=}" ;; --prefix=*) PREFIX="${1#*=}" ;; - --vm_enable=*) VM_ENABLE="${1#*=}" ;; -h|--help) usage ;; *) echo "Unknown parameter passed: $1"; usage ;; esac diff --git a/hw/dpi/util_dpi.cpp b/hw/dpi/util_dpi.cpp index 020816b0b..d804d4885 100644 --- a/hw/dpi/util_dpi.cpp +++ b/hw/dpi/util_dpi.cpp @@ -47,8 +47,6 @@ extern "C" { void dpi_trace(int level, const char* format, ...); void dpi_trace_start(); void dpi_trace_stop(); - - uint64_t dpi_uuid_gen(bool reset, int wid); } bool sim_trace_enabled(); @@ -204,17 +202,3 @@ void dpi_trace_start() { void dpi_trace_stop() { sim_trace_enable(false); } - -/////////////////////////////////////////////////////////////////////////////// - -std::unordered_map g_uuid_gens; - -uint64_t dpi_uuid_gen(bool reset, int wid) { - if (reset) { - g_uuid_gens.clear(); - return 0; - } - uint32_t instr_uuid = g_uuid_gens[wid]++; - uint64_t uuid = (uint64_t(wid) << 32) | instr_uuid; - return uuid; -} \ No newline at end of file diff --git a/hw/dpi/util_dpi.vh b/hw/dpi/util_dpi.vh index 0da62b041..74b095af1 100644 --- a/hw/dpi/util_dpi.vh +++ b/hw/dpi/util_dpi.vh @@ -30,6 +30,4 @@ import "DPI-C" function void dpi_trace(input int level, input string format /*ve import "DPI-C" function void dpi_trace_start(); import "DPI-C" function void dpi_trace_stop(); -import "DPI-C" function longint dpi_uuid_gen(input logic reset, input int wid); - `endif diff --git a/hw/rtl/VX_cluster.sv b/hw/rtl/VX_cluster.sv index 714e69dd4..73d9b34ab 100644 --- a/hw/rtl/VX_cluster.sv +++ b/hw/rtl/VX_cluster.sv @@ -56,14 +56,12 @@ module VX_cluster import VX_gpu_pkg::*; #( VX_gbar_bus_if per_socket_gbar_bus_if[`NUM_SOCKETS](); VX_gbar_bus_if gbar_bus_if(); - `RESET_RELAY (gbar_reset, reset); - VX_gbar_arb #( .NUM_REQS (`NUM_SOCKETS), .OUT_BUF ((`NUM_SOCKETS > 2) ? 1 : 0) // bgar_unit has no backpressure ) gbar_arb ( .clk (clk), - .reset (gbar_reset), + .reset (reset), .bus_in_if (per_socket_gbar_bus_if), .bus_out_if (gbar_bus_if) ); @@ -72,7 +70,7 @@ module VX_cluster import VX_gpu_pkg::*; #( .INSTANCE_ID ($sformatf("gbar%0d", CLUSTER_ID)) ) gbar_unit ( .clk (clk), - .reset (gbar_reset), + .reset (reset), .gbar_bus_if (gbar_bus_if) ); @@ -102,8 +100,8 @@ module VX_cluster import VX_gpu_pkg::*; #( .WRITEBACK (`L2_WRITEBACK), .DIRTY_BYTES (`L2_WRITEBACK), .UUID_WIDTH (`UUID_WIDTH), - .CORE_OUT_BUF (2), - .MEM_OUT_BUF (2), + .CORE_OUT_BUF (3), + .MEM_OUT_BUF (3), .NC_ENABLE (1), .PASSTHRU (!`L2_ENABLED) ) l2cache ( @@ -118,21 +116,17 @@ module VX_cluster import VX_gpu_pkg::*; #( /////////////////////////////////////////////////////////////////////////// - VX_dcr_bus_if socket_dcr_bus_tmp_if(); - assign socket_dcr_bus_tmp_if.write_valid = dcr_bus_if.write_valid && (dcr_bus_if.write_addr >= `VX_DCR_BASE_STATE_BEGIN && dcr_bus_if.write_addr < `VX_DCR_BASE_STATE_END); - assign socket_dcr_bus_tmp_if.write_addr = dcr_bus_if.write_addr; - assign socket_dcr_bus_tmp_if.write_data = dcr_bus_if.write_data; - wire [`NUM_SOCKETS-1:0] per_socket_busy; - VX_dcr_bus_if socket_dcr_bus_if(); - `BUFFER_DCR_BUS_IF (socket_dcr_bus_if, socket_dcr_bus_tmp_if, (`NUM_SOCKETS > 1)); - // Generate all sockets - for (genvar socket_id = 0; socket_id < `NUM_SOCKETS; ++socket_id) begin : sockets + for (genvar socket_id = 0; socket_id < `NUM_SOCKETS; ++socket_id) begin : g_sockets `RESET_RELAY (socket_reset, reset); + VX_dcr_bus_if socket_dcr_bus_if(); + wire is_base_dcr_addr = (dcr_bus_if.write_addr >= `VX_DCR_BASE_STATE_BEGIN && dcr_bus_if.write_addr < `VX_DCR_BASE_STATE_END); + `BUFFER_DCR_BUS_IF (socket_dcr_bus_if, dcr_bus_if, is_base_dcr_addr, (`NUM_SOCKETS > 1)) + VX_socket #( .SOCKET_ID ((CLUSTER_ID * `NUM_SOCKETS) + socket_id), .INSTANCE_ID ($sformatf("%s-socket%0d", INSTANCE_ID, socket_id)) diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 3826918f4..c349f367a 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -111,6 +111,24 @@ `define SOCKET_SIZE `MIN(4, `NUM_CORES) `endif +// Size of Tensor Core +`ifndef TC_SIZE +`define TC_SIZE 8 +`endif + +// Number of TCs per Warp +`ifndef TC_NUM +`define TC_NUM 4 +`endif + +`ifndef NUM_TCU_LANES +`define NUM_TCU_LANES `TC_NUM +`endif + +`ifndef NUM_TCU_BLOCKS +`define NUM_TCU_BLOCKS `ISSUE_WIDTH +`endif + `ifdef L2_ENABLE `define L2_ENABLED 1 `else @@ -159,7 +177,7 @@ `endif `ifndef STARTUP_ADDR -`define STARTUP_ADDR 64'h180000000 +`define STARTUP_ADDR 64'h080000000 `endif `ifndef USER_BASE_ADDR @@ -172,7 +190,7 @@ `ifdef VM_ENABLE `ifndef PAGE_TABLE_BASE_ADDR -`define PAGE_TABLE_BASE_ADDR 64'h1F0000000 +`define PAGE_TABLE_BASE_ADDR 64'h0F0000000 `endif `endif @@ -229,15 +247,17 @@ `endif `define STACK_SIZE (1 << `STACK_LOG2_SIZE) -`define RESET_DELAY 8 +`define RESET_DELAY 8 `ifndef STALL_TIMEOUT `define STALL_TIMEOUT (100000 * (1 ** (`L2_ENABLED + `L3_ENABLED))) `endif `ifndef SV_DPI +`ifndef DPI_DISABLE `define DPI_DISABLE `endif +`endif `ifndef FPU_FPNEW `ifndef FPU_DSP @@ -719,7 +739,7 @@ `endif `ifndef MEMORY_BANKS -`define MEMORY_BANKS 8 +`define MEMORY_BANKS 2 `endif // Number of Memory Ports from LLC diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 686124c16..4ccb00880 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -50,10 +50,16 @@ `define PERF_CTR_BITS 44 `ifndef NDEBUG +`define UUID_ENABLE +`define UUID_WIDTH 44 +`else +`ifdef SCOPE +`define UUID_ENABLE `define UUID_WIDTH 44 `else `define UUID_WIDTH 1 `endif +`endif `define PC_BITS (`XLEN-1) `define OFFSET_BITS 12 @@ -227,22 +233,19 @@ `define INST_FENCE_D 1'h0 `define INST_FENCE_I 1'h1 -`define INST_FPU_ADD 4'b0000 -`define INST_FPU_SUB 4'b0001 -`define INST_FPU_MUL 4'b0010 -`define INST_FPU_DIV 4'b0011 -`define INST_FPU_SQRT 4'b0100 -`define INST_FPU_CMP 4'b0101 // frm: LE=0, LT=1, EQ=2 -`define INST_FPU_F2F 4'b0110 -`define INST_FPU_MISC 4'b0111 // frm: SGNJ=0, SGNJN=1, SGNJX=2, CLASS=3, MVXW=4, MVWX=5, FMIN=6, FMAX=7 -`define INST_FPU_F2I 4'b1000 -`define INST_FPU_F2U 4'b1001 -`define INST_FPU_I2F 4'b1010 -`define INST_FPU_U2F 4'b1011 -`define INST_FPU_MADD 4'b1100 -`define INST_FPU_MSUB 4'b1101 -`define INST_FPU_NMSUB 4'b1110 -`define INST_FPU_NMADD 4'b1111 +`define INST_FPU_ADD 4'b0000 // SUB=fmt[1] +`define INST_FPU_MUL 4'b0001 +`define INST_FPU_MADD 4'b0010 // SUB=fmt[1] +`define INST_FPU_NMADD 4'b0011 // SUB=fmt[1] +`define INST_FPU_DIV 4'b0100 +`define INST_FPU_SQRT 4'b0101 +`define INST_FPU_F2I 4'b1000 // fmt[0]: F32=0, F64=1, fmt[1]: I32=0, I64=1 +`define INST_FPU_F2U 4'b1001 // fmt[0]: F32=0, F64=1, fmt[1]: I32=0, I64=1 +`define INST_FPU_I2F 4'b1010 // fmt[0]: F32=0, F64=1, fmt[1]: I32=0, I64=1 +`define INST_FPU_U2F 4'b1011 // fmt[0]: F32=0, F64=1, fmt[1]: I32=0, I64=1 +`define INST_FPU_CMP 4'b1100 // frm: LE=0, LT=1, EQ=2 +`define INST_FPU_F2F 4'b1101 // fmt[0]: F32=0, F64=1 +`define INST_FPU_MISC 4'b1110 // frm: SGNJ=0, SGNJN=1, SGNJX=2, CLASS=3, MVXW=4, MVWX=5, FMIN=6, FMAX=7 `define INST_FPU_BITS 4 `define INST_FPU_IS_CLASS(op, frm) (op == `INST_FPU_MISC && frm == 3) `define INST_FPU_IS_MVXW(op, frm) (op == `INST_FPU_MISC && frm == 4) @@ -267,14 +270,14 @@ /////////////////////////////////////////////////////////////////////////////// -`define CACHE_MEM_TAG_WIDTH(mshr_size, num_banks) \ - (`CLOG2(mshr_size) + `CLOG2(num_banks)) +`define CACHE_MEM_TAG_WIDTH(mshr_size, num_banks, uuid_width) \ + (uuid_width + `CLOG2(mshr_size) + `CLOG2(num_banks)) `define CACHE_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, tag_width) \ (`CLOG2(num_reqs) + `CLOG2(line_size / word_size) + tag_width) -`define CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, tag_width) \ - (`MAX(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks), `CACHE_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, tag_width)) + 1) +`define CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, tag_width, uuid_width) \ + (`MAX(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks, uuid_width), `CACHE_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, tag_width)) + 1) /////////////////////////////////////////////////////////////////////////////// @@ -284,14 +287,14 @@ `define CACHE_CLUSTER_MEM_ARB_TAG(tag_width, num_caches) \ (tag_width + `ARB_SEL_BITS(`UP(num_caches), 1)) -`define CACHE_CLUSTER_MEM_TAG_WIDTH(mshr_size, num_banks, num_caches) \ - `CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks), num_caches) +`define CACHE_CLUSTER_MEM_TAG_WIDTH(mshr_size, num_banks, num_caches, uuid_width) \ + `CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks, uuid_width), num_caches) `define CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(num_reqs, line_size, word_size, tag_width, num_inputs, num_caches) \ `CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches)), num_caches) -`define CACHE_CLUSTER_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, tag_width, num_inputs, num_caches) \ - `CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches)), num_caches) +`define CACHE_CLUSTER_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, tag_width, num_inputs, num_caches, uuid_width) \ + `CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches), uuid_width), num_caches) /////////////////////////////////////////////////////////////////////////////// @@ -303,10 +306,10 @@ `define L1_ENABLE `endif -`define ADDR_TYPE_FLUSH 0 -`define ADDR_TYPE_IO 1 -`define ADDR_TYPE_LOCAL 2 // shoud be last since optional -`define ADDR_TYPE_WIDTH (`ADDR_TYPE_LOCAL + `LMEM_ENABLED) +`define MEM_REQ_FLAG_FLUSH 0 +`define MEM_REQ_FLAG_IO 1 +`define MEM_REQ_FLAG_LOCAL 2 // shoud be last since optional +`define MEM_REQ_FLAGS_WIDTH (`MEM_REQ_FLAG_LOCAL + `LMEM_ENABLED) `define VX_MEM_BYTEEN_WIDTH `L3_LINE_SIZE `define VX_MEM_ADDR_WIDTH (`MEM_ADDR_WIDTH - `CLOG2(`L3_LINE_SIZE)) @@ -320,6 +323,18 @@ /////////////////////////////////////////////////////////////////////////////// +`define NEG_EDGE(dst, src) \ + wire dst; \ + VX_edge_trigger #( \ + .POS (0), \ + .INIT (0) \ + ) __``dst``__ ( \ + .clk (clk), \ + .reset (1'b0), \ + .data_in (src), \ + .data_out (dst) \ + ) + `define BUFFER_EX(dst, src, ena, latency) \ VX_pipe_register #( \ .DATAW ($bits(dst)), \ @@ -359,43 +374,60 @@ assign src.rsp_data = dst.rsp_data; \ assign dst.rsp_ready = src.rsp_ready +`define ASSIGN_VX_MEM_BUS_RO_IF(dst, src) \ + assign dst.req_valid = src.req_valid; \ + assign dst.req_data.rw = 0; \ + assign dst.req_data.addr = src.req_data.addr; \ + assign dst.req_data.data = '0; \ + assign dst.req_data.byteen = '1; \ + assign dst.req_data.flags = src.req_data.flags; \ + assign dst.req_data.tag = src.req_data.tag; \ + assign src.req_ready = dst.req_ready; \ + assign src.rsp_valid = dst.rsp_valid; \ + assign src.rsp_data.data = dst.rsp_data.data; \ + assign src.rsp_data.tag = dst.rsp_data.tag; \ + assign dst.rsp_ready = src.rsp_ready + `define ASSIGN_VX_MEM_BUS_IF_X(dst, src, TD, TS) \ assign dst.req_valid = src.req_valid; \ assign dst.req_data.rw = src.req_data.rw; \ - assign dst.req_data.byteen = src.req_data.byteen; \ assign dst.req_data.addr = src.req_data.addr; \ - assign dst.req_data.atype = src.req_data.atype; \ assign dst.req_data.data = src.req_data.data; \ - if (TD != TS) \ + assign dst.req_data.byteen = src.req_data.byteen; \ + assign dst.req_data.flags = src.req_data.flags; \ + /* verilator lint_off GENUNNAMED */ \ + if (TD != TS) begin \ assign dst.req_data.tag = {src.req_data.tag, {(TD-TS){1'b0}}}; \ - else \ + end else begin \ assign dst.req_data.tag = src.req_data.tag; \ + end \ + /* verilator lint_on GENUNNAMED */ \ assign src.req_ready = dst.req_ready; \ assign src.rsp_valid = dst.rsp_valid; \ assign src.rsp_data.data = dst.rsp_data.data; \ assign src.rsp_data.tag = dst.rsp_data.tag[TD-1 -: TS]; \ assign dst.rsp_ready = src.rsp_ready -`define ASSIGN_VX_LSU_MEM_IF(dst, src) \ - assign dst.req_valid = src.req_valid; \ - assign dst.req_data = src.req_data; \ - assign src.req_ready = dst.req_ready; \ - assign src.rsp_valid = dst.rsp_valid; \ - assign src.rsp_data = dst.rsp_data; \ - assign dst.rsp_ready = src.rsp_ready - -`define BUFFER_DCR_BUS_IF(dst, src, enable) \ - if (enable) begin \ - reg [(1 + `VX_DCR_ADDR_WIDTH + `VX_DCR_DATA_WIDTH)-1:0] __dst; \ - always @(posedge clk) begin \ - __dst <= {src.write_valid, src.write_addr, src.write_data}; \ - end \ - assign {dst.write_valid, dst.write_addr, dst.write_data} = __dst; \ +`define BUFFER_DCR_BUS_IF(dst, src, ena, latency) \ + /* verilator lint_off GENUNNAMED */ \ + if (latency != 0) begin \ + VX_pipe_register #( \ + .DATAW (1 + `VX_DCR_ADDR_WIDTH + `VX_DCR_DATA_WIDTH), \ + .DEPTH (latency) \ + ) pipe_reg ( \ + .clk (clk), \ + .reset (1'b0), \ + .enable (1'b1), \ + .data_in ({src.write_valid && ena, src.write_addr, src.write_data}), \ + .data_out ({dst.write_valid, dst.write_addr, dst.write_data}) \ + ); \ end else begin \ - assign {dst.write_valid, dst.write_addr, dst.write_data} = {src.write_valid, src.write_addr, src.write_data}; \ - end + assign {dst.write_valid, dst.write_addr, dst.write_data} = {src.write_valid && ena, src.write_addr, src.write_data}; \ + end \ + /* verilator lint_on GENUNNAMED */ `define PERF_COUNTER_ADD(dst, src, field, width, count, reg_enable) \ + /* verilator lint_off GENUNNAMED */ \ if (count > 1) begin \ wire [count-1:0][width-1:0] __reduce_add_i_field; \ wire [width-1:0] __reduce_add_o_field; \ @@ -421,9 +453,11 @@ end \ end else begin \ assign dst.``field = src[0].``field; \ - end + end \ + /* verilator lint_on GENUNNAMED */ `define ASSIGN_BLOCKED_WID(dst, src, block_idx, block_size) \ + /* verilator lint_off GENUNNAMED */ \ if (block_size != 1) begin \ if (block_size != `NUM_WARPS) begin \ assign dst = {src[`NW_WIDTH-1:`CLOG2(block_size)], `CLOG2(block_size)'(block_idx)}; \ @@ -432,6 +466,7 @@ end \ end else begin \ assign dst = src; \ - end + end \ + /* verilator lint_on GENUNNAMED */ `endif // VX_DEFINE_VH diff --git a/hw/rtl/VX_gpu_pkg.sv b/hw/rtl/VX_gpu_pkg.sv index 393f2a66f..fe35fb391 100644 --- a/hw/rtl/VX_gpu_pkg.sv +++ b/hw/rtl/VX_gpu_pkg.sv @@ -166,7 +166,7 @@ package VX_gpu_pkg; // Memory request tag bits `ifdef ICACHE_ENABLE - localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_MEM_TAG_WIDTH(`ICACHE_MSHR_SIZE, 1, `NUM_ICACHES); + localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_MEM_TAG_WIDTH(`ICACHE_MSHR_SIZE, 1, `NUM_ICACHES, `UUID_WIDTH); `else localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(1, ICACHE_LINE_SIZE, ICACHE_WORD_SIZE, ICACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_ICACHES); `endif @@ -197,7 +197,7 @@ package VX_gpu_pkg; // Memory request tag bits `ifdef DCACHE_ENABLE - localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_NC_MEM_TAG_WIDTH(`DCACHE_MSHR_SIZE, `DCACHE_NUM_BANKS, DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES); + localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_NC_MEM_TAG_WIDTH(`DCACHE_MSHR_SIZE, `DCACHE_NUM_BANKS, DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES, `UUID_WIDTH); `else localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES); `endif @@ -226,7 +226,7 @@ package VX_gpu_pkg; // Memory request tag bits `ifdef L2_ENABLE - localparam L2_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L2_MSHR_SIZE, `L2_NUM_BANKS, L2_NUM_REQS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH); + localparam L2_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L2_MSHR_SIZE, `L2_NUM_BANKS, L2_NUM_REQS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH, `UUID_WIDTH); `else localparam L2_MEM_TAG_WIDTH = `CACHE_BYPASS_TAG_WIDTH(L2_NUM_REQS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH); `endif @@ -247,7 +247,7 @@ package VX_gpu_pkg; // Memory request tag bits `ifdef L3_ENABLE - localparam L3_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L3_MSHR_SIZE, `L3_NUM_BANKS, L3_NUM_REQS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH); + localparam L3_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L3_MSHR_SIZE, `L3_NUM_BANKS, L3_NUM_REQS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH, `UUID_WIDTH); `else localparam L3_MEM_TAG_WIDTH = `CACHE_BYPASS_TAG_WIDTH(L3_NUM_REQS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH); `endif @@ -308,6 +308,430 @@ package VX_gpu_pkg; `IGNORE_UNUSED_END +////////////////////////////////// Tracing //////////////////////////////////// + +`ifdef SIMULATION + +`ifdef SV_DPI + import "DPI-C" function void dpi_trace(input int level, input string format /*verilator sformat*/); +`endif + + task trace_ex_type(input int level, input [`EX_BITS-1:0] ex_type); + case (ex_type) + `EX_ALU: `TRACE(level, ("ALU")) + `EX_LSU: `TRACE(level, ("LSU")) + `EX_SFU: `TRACE(level, ("SFU")) + `ifdef EXT_F_ENABLE + `EX_FPU: `TRACE(level, ("FPU")) + `endif + default: `TRACE(level, ("?")) + endcase + endtask + + task trace_ex_op(input int level, + input [`EX_BITS-1:0] ex_type, + input [`INST_OP_BITS-1:0] op_type, + input VX_gpu_pkg::op_args_t op_args + ); + case (ex_type) + `EX_ALU: begin + case (op_args.alu.xtype) + `ALU_TYPE_ARITH: begin + if (op_args.alu.is_w) begin + if (op_args.alu.use_imm) begin + case (`INST_ALU_BITS'(op_type)) + `INST_ALU_ADD: `TRACE(level, ("ADDIW")) + `INST_ALU_SLL: `TRACE(level, ("SLLIW")) + `INST_ALU_SRL: `TRACE(level, ("SRLIW")) + `INST_ALU_SRA: `TRACE(level, ("SRAIW")) + default: `TRACE(level, ("?")) + endcase + end else begin + case (`INST_ALU_BITS'(op_type)) + `INST_ALU_ADD: `TRACE(level, ("ADDW")) + `INST_ALU_SUB: `TRACE(level, ("SUBW")) + `INST_ALU_SLL: `TRACE(level, ("SLLW")) + `INST_ALU_SRL: `TRACE(level, ("SRLW")) + `INST_ALU_SRA: `TRACE(level, ("SRAW")) + default: `TRACE(level, ("?")) + endcase + end + end else begin + if (op_args.alu.use_imm) begin + case (`INST_ALU_BITS'(op_type)) + `INST_ALU_ADD: `TRACE(level, ("ADDI")) + `INST_ALU_SLL: `TRACE(level, ("SLLI")) + `INST_ALU_SRL: `TRACE(level, ("SRLI")) + `INST_ALU_SRA: `TRACE(level, ("SRAI")) + `INST_ALU_SLT: `TRACE(level, ("SLTI")) + `INST_ALU_SLTU: `TRACE(level, ("SLTIU")) + `INST_ALU_XOR: `TRACE(level, ("XORI")) + `INST_ALU_OR: `TRACE(level, ("ORI")) + `INST_ALU_AND: `TRACE(level, ("ANDI")) + `INST_ALU_LUI: `TRACE(level, ("LUI")) + `INST_ALU_AUIPC: `TRACE(level, ("AUIPC")) + default: `TRACE(level, ("?")) + endcase + end else begin + case (`INST_ALU_BITS'(op_type)) + `INST_ALU_ADD: `TRACE(level, ("ADD")) + `INST_ALU_SUB: `TRACE(level, ("SUB")) + `INST_ALU_SLL: `TRACE(level, ("SLL")) + `INST_ALU_SRL: `TRACE(level, ("SRL")) + `INST_ALU_SRA: `TRACE(level, ("SRA")) + `INST_ALU_SLT: `TRACE(level, ("SLT")) + `INST_ALU_SLTU: `TRACE(level, ("SLTU")) + `INST_ALU_XOR: `TRACE(level, ("XOR")) + `INST_ALU_OR: `TRACE(level, ("OR")) + `INST_ALU_AND: `TRACE(level, ("AND")) + `INST_ALU_CZEQ: `TRACE(level, ("CZERO.EQZ")) + `INST_ALU_CZNE: `TRACE(level, ("CZERO.NEZ")) + default: `TRACE(level, ("?")) + endcase + end + end + end + `ALU_TYPE_BRANCH: begin + case (`INST_BR_BITS'(op_type)) + `INST_BR_EQ: `TRACE(level, ("BEQ")) + `INST_BR_NE: `TRACE(level, ("BNE")) + `INST_BR_LT: `TRACE(level, ("BLT")) + `INST_BR_GE: `TRACE(level, ("BGE")) + `INST_BR_LTU: `TRACE(level, ("BLTU")) + `INST_BR_GEU: `TRACE(level, ("BGEU")) + `INST_BR_JAL: `TRACE(level, ("JAL")) + `INST_BR_JALR: `TRACE(level, ("JALR")) + `INST_BR_ECALL: `TRACE(level, ("ECALL")) + `INST_BR_EBREAK:`TRACE(level, ("EBREAK")) + `INST_BR_URET: `TRACE(level, ("URET")) + `INST_BR_SRET: `TRACE(level, ("SRET")) + `INST_BR_MRET: `TRACE(level, ("MRET")) + default: `TRACE(level, ("?")) + endcase + end + `ALU_TYPE_MULDIV: begin + if (op_args.alu.is_w) begin + case (`INST_M_BITS'(op_type)) + `INST_M_MUL: `TRACE(level, ("MULW")) + `INST_M_DIV: `TRACE(level, ("DIVW")) + `INST_M_DIVU: `TRACE(level, ("DIVUW")) + `INST_M_REM: `TRACE(level, ("REMW")) + `INST_M_REMU: `TRACE(level, ("REMUW")) + default: `TRACE(level, ("?")) + endcase + end else begin + case (`INST_M_BITS'(op_type)) + `INST_M_MUL: `TRACE(level, ("MUL")) + `INST_M_MULH: `TRACE(level, ("MULH")) + `INST_M_MULHSU:`TRACE(level, ("MULHSU")) + `INST_M_MULHU: `TRACE(level, ("MULHU")) + `INST_M_DIV: `TRACE(level, ("DIV")) + `INST_M_DIVU: `TRACE(level, ("DIVU")) + `INST_M_REM: `TRACE(level, ("REM")) + `INST_M_REMU: `TRACE(level, ("REMU")) + default: `TRACE(level, ("?")) + endcase + end + end + default: `TRACE(level, ("?")) + endcase + end + `EX_LSU: begin + if (op_args.lsu.is_float) begin + case (`INST_LSU_BITS'(op_type)) + `INST_LSU_LW: `TRACE(level, ("FLW")) + `INST_LSU_LD: `TRACE(level, ("FLD")) + `INST_LSU_SW: `TRACE(level, ("FSW")) + `INST_LSU_SD: `TRACE(level, ("FSD")) + default: `TRACE(level, ("?")) + endcase + end else begin + case (`INST_LSU_BITS'(op_type)) + `INST_LSU_LB: `TRACE(level, ("LB")) + `INST_LSU_LH: `TRACE(level, ("LH")) + `INST_LSU_LW: `TRACE(level, ("LW")) + `INST_LSU_LD: `TRACE(level, ("LD")) + `INST_LSU_LBU:`TRACE(level, ("LBU")) + `INST_LSU_LHU:`TRACE(level, ("LHU")) + `INST_LSU_LWU:`TRACE(level, ("LWU")) + `INST_LSU_SB: `TRACE(level, ("SB")) + `INST_LSU_SH: `TRACE(level, ("SH")) + `INST_LSU_SW: `TRACE(level, ("SW")) + `INST_LSU_SD: `TRACE(level, ("SD")) + `INST_LSU_FENCE:`TRACE(level,("FENCE")) + default: `TRACE(level, ("?")) + endcase + end + end + `EX_SFU: begin + case (`INST_SFU_BITS'(op_type)) + `INST_SFU_TMC: `TRACE(level, ("TMC")) + `INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN")) + `INST_SFU_SPLIT: begin + if (op_args.wctl.is_neg) begin + `TRACE(level, ("SPLIT.N")) + end else begin + `TRACE(level, ("SPLIT")) + end + end + `INST_SFU_JOIN: `TRACE(level, ("JOIN")) + `INST_SFU_BAR: `TRACE(level, ("BAR")) + `INST_SFU_PRED: begin + if (op_args.wctl.is_neg) begin + `TRACE(level, ("PRED.N")) + end else begin + `TRACE(level, ("PRED")) + end + end + `INST_SFU_CSRRW: begin + if (op_args.csr.use_imm) begin + `TRACE(level, ("CSRRWI")) + end else begin + `TRACE(level, ("CSRRW")) + end + end + `INST_SFU_CSRRS: begin + if (op_args.csr.use_imm) begin + `TRACE(level, ("CSRRSI")) + end else begin + `TRACE(level, ("CSRRS")) + end + end + `INST_SFU_CSRRC: begin + if (op_args.csr.use_imm) begin + `TRACE(level, ("CSRRCI")) + end else begin + `TRACE(level, ("CSRRC")) + end + end + default: `TRACE(level, ("?")) + endcase + end + `ifdef EXT_F_ENABLE + `EX_FPU: begin + case (`INST_FPU_BITS'(op_type)) + `INST_FPU_ADD: begin + if (op_args.fpu.fmt[1]) begin + if (op_args.fpu.fmt[0]) begin + `TRACE(level, ("FSUB.D")) + end else begin + `TRACE(level, ("FSUB.S")) + end + end else begin + if (op_args.fpu.fmt[0]) begin + `TRACE(level, ("FADD.D")) + end else begin + `TRACE(level, ("FADD.S")) + end + end + end + `INST_FPU_MADD: begin + if (op_args.fpu.fmt[1]) begin + if (op_args.fpu.fmt[0]) begin + `TRACE(level, ("FMSUB.D")) + end else begin + `TRACE(level, ("FMSUB.S")) + end + end else begin + if (op_args.fpu.fmt[0]) begin + `TRACE(level, ("FMADD.D")) + end else begin + `TRACE(level, ("FMADD.S")) + end + end + end + `INST_FPU_NMADD: begin + if (op_args.fpu.fmt[1]) begin + if (op_args.fpu.fmt[0]) begin + `TRACE(level, ("FNMSUB.D")) + end else begin + `TRACE(level, ("FNMSUB.S")) + end + end else begin + if (op_args.fpu.fmt[0]) begin + `TRACE(level, ("FNMADD.D")) + end else begin + `TRACE(level, ("FNMADD.S")) + end + end + end + `INST_FPU_MUL: begin + if (op_args.fpu.fmt[0]) begin + `TRACE(level, ("FMUL.D")) + end else begin + `TRACE(level, ("FMUL.S")) + end + end + `INST_FPU_DIV: begin + if (op_args.fpu.fmt[0]) begin + `TRACE(level, ("FDIV.D")) + end else begin + `TRACE(level, ("FDIV.S")) + end + end + `INST_FPU_SQRT: begin + if (op_args.fpu.fmt[0]) begin + `TRACE(level, ("FSQRT.D")) + end else begin + `TRACE(level, ("FSQRT.S")) + end + end + `INST_FPU_CMP: begin + if (op_args.fpu.fmt[0]) begin + case (op_args.fpu.frm[1:0]) + 0: `TRACE(level, ("FLE.D")) + 1: `TRACE(level, ("FLT.D")) + 2: `TRACE(level, ("FEQ.D")) + default: `TRACE(level, ("?")) + endcase + end else begin + case (op_args.fpu.frm[1:0]) + 0: `TRACE(level, ("FLE.S")) + 1: `TRACE(level, ("FLT.S")) + 2: `TRACE(level, ("FEQ.S")) + default: `TRACE(level, ("?")) + endcase + end + end + `INST_FPU_F2F: begin + if (op_args.fpu.fmt[0]) begin + `TRACE(level, ("FCVT.D.S")) + end else begin + `TRACE(level, ("FCVT.S.D")) + end + end + `INST_FPU_F2I: begin + if (op_args.fpu.fmt[0]) begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.L.D")) + end else begin + `TRACE(level, ("FCVT.W.D")) + end + end else begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.L.S")) + end else begin + `TRACE(level, ("FCVT.W.S")) + end + end + end + `INST_FPU_F2U: begin + if (op_args.fpu.fmt[0]) begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.LU.D")) + end else begin + `TRACE(level, ("FCVT.WU.D")) + end + end else begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.LU.S")) + end else begin + `TRACE(level, ("FCVT.WU.S")) + end + end + end + `INST_FPU_I2F: begin + if (op_args.fpu.fmt[0]) begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.D.L")) + end else begin + `TRACE(level, ("FCVT.D.W")) + end + end else begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.S.L")) + end else begin + `TRACE(level, ("FCVT.S.W")) + end + end + end + `INST_FPU_U2F: begin + if (op_args.fpu.fmt[0]) begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.D.LU")) + end else begin + `TRACE(level, ("FCVT.D.WU")) + end + end else begin + if (op_args.fpu.fmt[1]) begin + `TRACE(level, ("FCVT.S.LU")) + end else begin + `TRACE(level, ("FCVT.S.WU")) + end + end + end + `INST_FPU_MISC: begin + if (op_args.fpu.fmt[0]) begin + case (op_args.fpu.frm) + 0: `TRACE(level, ("FSGNJ.D")) + 1: `TRACE(level, ("FSGNJN.D")) + 2: `TRACE(level, ("FSGNJX.D")) + 3: `TRACE(level, ("FCLASS.D")) + 4: `TRACE(level, ("FMV.X.D")) + 5: `TRACE(level, ("FMV.D.X")) + 6: `TRACE(level, ("FMIN.D")) + 7: `TRACE(level, ("FMAX.D")) + endcase + end else begin + case (op_args.fpu.frm) + 0: `TRACE(level, ("FSGNJ.S")) + 1: `TRACE(level, ("FSGNJN.S")) + 2: `TRACE(level, ("FSGNJX.S")) + 3: `TRACE(level, ("FCLASS.S")) + 4: `TRACE(level, ("FMV.X.S")) + 5: `TRACE(level, ("FMV.S.X")) + 6: `TRACE(level, ("FMIN.S")) + 7: `TRACE(level, ("FMAX.S")) + endcase + end + end + default: `TRACE(level, ("?")) + endcase + end + `endif + default: `TRACE(level, ("?")) + endcase + endtask + + task trace_op_args(input int level, + input [`EX_BITS-1:0] ex_type, + input [`INST_OP_BITS-1:0] op_type, + input VX_gpu_pkg::op_args_t op_args + ); + case (ex_type) + `EX_ALU: begin + `TRACE(level, (", use_PC=%b, use_imm=%b, imm=0x%0h", op_args.alu.use_PC, op_args.alu.use_imm, op_args.alu.imm)) + end + `EX_LSU: begin + `TRACE(level, (", offset=0x%0h", op_args.lsu.offset)) + end + `EX_SFU: begin + if (`INST_SFU_IS_CSR(op_type)) begin + `TRACE(level, (", addr=0x%0h, use_imm=%b, imm=0x%0h", op_args.csr.addr, op_args.csr.use_imm, op_args.csr.imm)) + end + end + `ifdef EXT_F_ENABLE + `EX_FPU: begin + `TRACE(level, (", fmt=0x%0h, frm=0x%0h", op_args.fpu.fmt, op_args.fpu.frm)) + end + `endif + default:; + endcase + endtask + + task trace_base_dcr(input int level, input [`VX_DCR_ADDR_WIDTH-1:0] addr); + case (addr) + `VX_DCR_BASE_STARTUP_ADDR0: `TRACE(level, ("STARTUP_ADDR0")) + `VX_DCR_BASE_STARTUP_ADDR1: `TRACE(level, ("STARTUP_ADDR1")) + `VX_DCR_BASE_STARTUP_ARG0: `TRACE(level, ("STARTUP_ARG0")) + `VX_DCR_BASE_STARTUP_ARG1: `TRACE(level, ("STARTUP_ARG1")) + `VX_DCR_BASE_MPM_CLASS: `TRACE(level, ("MPM_CLASS")) + default: `TRACE(level, ("?")) + endcase + endtask + +`endif + endpackage `endif // VX_GPU_PKG_VH diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index 59f5ef0f5..3e9042737 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -22,36 +22,39 @@ /////////////////////////////////////////////////////////////////////////////// -`ifdef VIVADO -`define STRING -`else -`define STRING string -`endif +`ifdef SIMULATION + +`define STATIC_ASSERT(cond, msg) \ +generate \ + /* verilator lint_off GENUNNAMED */ \ + if (!(cond)) $error msg; \ + /* verilator lint_on GENUNNAMED */ \ +endgenerate + +`define ERROR(msg) \ + $error msg + +`define ASSERT(cond, msg) \ + assert(cond) else $error msg -`ifdef SYNTHESIS +`define RUNTIME_ASSERT(cond, msg) \ + always @(posedge clk) begin \ + assert(cond) else $error msg; \ + end + +`define __SCOPE +`define __SCOPE_X +`define __SCOPE_ON +`define __SCOPE_OFF + +`ifndef TRACING_ALL +`define TRACING_ON /* verilator tracing_on */ +`define TRACING_OFF /* verilator tracing_off */ +`else `define TRACING_ON `define TRACING_OFF -`ifndef NDEBUG - `define DEBUG_BLOCK(x) x -`else - `define DEBUG_BLOCK(x) `endif -`define IGNORE_UNOPTFLAT_BEGIN -`define IGNORE_UNOPTFLAT_END -`define IGNORE_UNUSED_BEGIN -`define IGNORE_UNUSED_END -`define IGNORE_WARNINGS_BEGIN -`define IGNORE_WARNINGS_END -`define UNUSED_PARAM(x) -`define UNUSED_SPARAM(x) -`define UNUSED_VAR(x) -`define UNUSED_PIN(x) . x () -`define UNUSED_ARG(x) x -`define TRACE(level, args) if (level <= `DEBUG_LEVEL) $write args -`else -`ifdef VERILATOR -`define TRACING_ON /* verilator tracing_on */ -`define TRACING_OFF /* verilator tracing_off */ + `ifndef NDEBUG `define DEBUG_BLOCK(x) /* verilator lint_off UNUSED */ \ x \ @@ -100,49 +103,68 @@ localparam `STRING __``x = x; \ /* verilator lint_on UNUSED */ -`define UNUSED_VAR(x) if (1) begin \ +`define UNUSED_VAR(x) /* verilator lint_off GENUNNAMED */ \ + if (1) begin \ /* verilator lint_off UNUSED */ \ wire [$bits(x)-1:0] __x = x; \ /* verilator lint_on UNUSED */ \ - end + end \ + /* verilator lint_on GENUNNAMED */ `define UNUSED_PIN(x) /* verilator lint_off PINCONNECTEMPTY */ \ . x () \ /* verilator lint_on PINCONNECTEMPTY */ + `define UNUSED_ARG(x) /* verilator lint_off UNUSED */ \ x \ /* verilator lint_on UNUSED */ -`endif `ifdef SV_DPI -`define TRACE(level, args) dpi_trace(level, $sformatf args) +`define TRACE(level, args) dpi_trace(level, $sformatf args); `else -`define TRACE(level, args) if (level <= `DEBUG_LEVEL) $write args +`define TRACE(level, args) \ + if (level <= `DEBUG_LEVEL) begin \ + $write args; \ + end `endif -`endif +`else // SYNTHESIS -`ifdef SIMULATION - `define STATIC_ASSERT(cond, msg) \ - generate \ - if (!(cond)) $error msg; \ - endgenerate +`define STATIC_ASSERT(cond, msg) +`define ERROR(msg) // +`define ASSERT(cond, msg) // +`define RUNTIME_ASSERT(cond, msg) - `define ERROR(msg) \ - $error msg +`define DEBUG_BLOCK(x) +`define TRACE(level, args) - `define ASSERT(cond, msg) \ - assert(cond) else $error msg +`define TRACING_ON +`define TRACING_OFF + +`define IGNORE_UNOPTFLAT_BEGIN +`define IGNORE_UNOPTFLAT_END +`define IGNORE_UNUSED_BEGIN +`define IGNORE_UNUSED_END +`define IGNORE_WARNINGS_BEGIN +`define IGNORE_WARNINGS_END +`define UNUSED_PARAM(x) +`define UNUSED_SPARAM(x) +`define UNUSED_VAR(x) +`define UNUSED_PIN(x) . x () +`define UNUSED_ARG(x) x + +`define __SCOPE (* mark_debug="true" *) + +`define __SCOPE_X + +`define __SCOPE_ON \ + `undef __SCOPE_X \ + `define __SCOPE_X `__SCOPE + +`define __SCOPE_OFF \ + `undef __SCOPE_X \ + `define __SCOPE_X - `define RUNTIME_ASSERT(cond, msg) \ - always @(posedge clk) begin \ - assert(cond) else $error msg; \ - end -`else - `define STATIC_ASSERT(cond, msg) - `define ERROR(msg) // - `define ASSERT(cond, msg) // - `define RUNTIME_ASSERT(cond, msg) `endif /////////////////////////////////////////////////////////////////////////////// @@ -154,6 +176,7 @@ `define NO_RW_RAM_CHECK (* altera_attribute = "-name add_pass_through_logic_to_inferred_rams off" *) `define DISABLE_BRAM (* ramstyle = "logic" *) `define PRESERVE_NET (* preserve *) +`define STRING string `elsif VIVADO `define MAX_FANOUT 8 `define IF_DATA_SIZE(x) $bits(x.data) @@ -161,6 +184,7 @@ `define NO_RW_RAM_CHECK (* rw_addr_collision = "no" *) `define DISABLE_BRAM (* ram_style = "registers" *) `define PRESERVE_NET (* keep = "true" *) +`define STRING `else `define MAX_FANOUT 8 `define IF_DATA_SIZE(x) x.DATA_WIDTH @@ -168,6 +192,7 @@ `define NO_RW_RAM_CHECK `define DISABLE_BRAM `define PRESERVE_NET +`define STRING string `endif /////////////////////////////////////////////////////////////////////////////// @@ -204,23 +229,23 @@ `define SEXT(len, x) {{(len-$bits(x)+1){x[$bits(x)-1]}}, x[$bits(x)-2:0]} `define TRACE_ARRAY1D(lvl, fmt, arr, n) \ - `TRACE(lvl, ("{")); \ + `TRACE(lvl, ("{")) \ for (integer __i = (n-1); __i >= 0; --__i) begin \ - if (__i != (n-1)) `TRACE(lvl, (", ")); \ - `TRACE(lvl, (fmt, arr[__i])); \ + if (__i != (n-1)) `TRACE(lvl, (", ")) \ + `TRACE(lvl, (fmt, arr[__i])) \ end \ - `TRACE(lvl, ("}")); + `TRACE(lvl, ("}")) `define TRACE_ARRAY2D(lvl, fmt, arr, m, n) \ - `TRACE(lvl, ("{")); \ + `TRACE(lvl, ("{")) \ for (integer __i = n-1; __i >= 0; --__i) begin \ - if (__i != (n-1)) `TRACE(lvl, (", ")); \ - `TRACE(lvl, ("{")); \ + if (__i != (n-1)) `TRACE(lvl, (", ")) \ + `TRACE(lvl, ("{")) \ for (integer __j = (m-1); __j >= 0; --__j) begin \ - if (__j != (m-1)) `TRACE(lvl, (", "));\ - `TRACE(lvl, (fmt, arr[__i][__j])); \ + if (__j != (m-1)) `TRACE(lvl, (", "))\ + `TRACE(lvl, (fmt, arr[__i][__j])) \ end \ - `TRACE(lvl, ("}")); \ + `TRACE(lvl, ("}")) \ end \ `TRACE(lvl, ("}")) @@ -239,10 +264,13 @@ `RESET_RELAY_EX (dst, src, 1, 0) // size(x): 0 -> 0, 1 -> 1, 2 -> 2, 3 -> 2, 4-> 2, 5 -> 2 -`define TO_OUT_BUF_SIZE(s) `MIN(s, 2) +`define TO_OUT_BUF_SIZE(s) `MIN(s & 7, 2) // reg(x): 0 -> 0, 1 -> 1, 2 -> 0, 3 -> 1, 4 -> 2, 5 > 3 -`define TO_OUT_BUF_REG(s) ((s < 2) ? s : (s - 2)) +`define TO_OUT_BUF_REG(s) (((s & 7) < 2) ? (s & 7) : ((s & 7) - 2)) + +// lut(x): (x & 8) != 0 +`define TO_OUT_BUF_LUTRAM(s) ((s & 8) != 0) `define REPEAT(n,f,s) `_REPEAT_``n(f,s) `define _REPEAT_0(f,s) diff --git a/hw/rtl/VX_scope.vh b/hw/rtl/VX_scope.vh index a74770640..b3d427ede 100644 --- a/hw/rtl/VX_scope.vh +++ b/hw/rtl/VX_scope.vh @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -21,48 +21,67 @@ input wire scope_bus_in, \ output wire scope_bus_out, -`define SCOPE_IO_SWITCH(__count) \ - wire scope_bus_in_w [__count]; \ - wire scope_bus_out_w [__count]; \ - `RESET_RELAY_EX(scope_reset_w, scope_reset, __count, 4); \ - VX_scope_switch #( \ - .N (__count) \ - ) scope_switch ( \ - .clk (clk), \ - .reset (scope_reset), \ - .req_in (scope_bus_in), \ - .rsp_out (scope_bus_out), \ - .req_out (scope_bus_in_w), \ - .rsp_in (scope_bus_out_w) \ - ); - `define SCOPE_IO_BIND(__i) \ .scope_reset (scope_reset_w[__i]), \ .scope_bus_in (scope_bus_in_w[__i]), \ .scope_bus_out (scope_bus_out_w[__i]), -`define SCOPE_IO_UNUSED() \ - `UNUSED_VAR (scope_reset); \ - `UNUSED_VAR (scope_bus_in); \ - assign scope_bus_out = 0; - -`define SCOPE_IO_UNUSED_W(__i) \ +`define SCOPE_IO_UNUSED(__i) \ `UNUSED_VAR (scope_reset_w[__i]); \ `UNUSED_VAR (scope_bus_in_w[__i]); \ assign scope_bus_out_w[__i] = 0; +`define SCOPE_IO_SWITCH(__count) \ + wire [__count-1:0] scope_bus_in_w; \ + wire [__count-1:0] scope_bus_out_w; \ + wire [__count-1:0] scope_reset_w = {__count{scope_reset}}; \ + VX_scope_switch #( \ + .N (__count) \ + ) scope_switch ( \ + .clk (clk), \ + .reset (scope_reset), \ + .req_in (scope_bus_in), \ + .rsp_out (scope_bus_out), \ + .req_out (scope_bus_in_w), \ + .rsp_in (scope_bus_out_w) \ + ) + +`define SCOPE_TAP_EX(__idx, __id, __xtriggers_w, __htriggers_w, __probes_w, __xtriggers, __htriggers, __probes, __start, __stop, __depth) \ + VX_scope_tap #( \ + .SCOPE_ID (__id), \ + .XTRIGGERW(__xtriggers_w), \ + .HTRIGGERW(__htriggers_w), \ + .PROBEW (__probes_w), \ + .DEPTH (__depth) \ + ) scope_tap_``idx ( \ + .clk (clk), \ + .reset (scope_reset_w[__idx]), \ + .start (__start), \ + .stop (__stop), \ + .xtriggers(__xtriggers), \ + .htriggers(__htriggers), \ + .probes (__probes), \ + .bus_in (scope_bus_in_w[__idx]), \ + .bus_out(scope_bus_out_w[__idx]) \ + ) + +`define SCOPE_TAP(__idx, __id, __xtriggers, __htriggers, __probes, __start, __stop, __depth) \ + `SCOPE_TAP_EX(__idx, __id, $bits(__xtriggers), $bits(__htriggers), $bits(__probes), __xtriggers, __htriggers, __probes, __start, __stop, __depth) + `else `define SCOPE_IO_DECL -`define SCOPE_IO_SWITCH(__count) - `define SCOPE_IO_BIND(__i) -`define SCOPE_IO_UNUSED_W(__i) - `define SCOPE_IO_UNUSED(__i) +`define SCOPE_IO_SWITCH(__count) + +`define SCOPE_TAP(__idx, __id, __xtriggers, __probes, __depth) + +`define SCOPE_TAP_EX(__idx, __id, __xtriggers_w, __probes_w, __xtriggers, __probes, __depth) + `endif `endif // VX_SCOPE_VH diff --git a/hw/rtl/VX_socket.sv b/hw/rtl/VX_socket.sv index 694edfe9c..69ff88a2c 100644 --- a/hw/rtl/VX_socket.sv +++ b/hw/rtl/VX_socket.sv @@ -49,14 +49,12 @@ module VX_socket import VX_gpu_pkg::*; #( `ifdef GBAR_ENABLE VX_gbar_bus_if per_core_gbar_bus_if[`SOCKET_SIZE](); - `RESET_RELAY (gbar_arb_reset, reset); - VX_gbar_arb #( .NUM_REQS (`SOCKET_SIZE), .OUT_BUF ((`SOCKET_SIZE > 1) ? 2 : 0) ) gbar_arb ( .clk (clk), - .reset (gbar_arb_reset), + .reset (reset), .bus_in_if (per_core_gbar_bus_if), .bus_out_if (gbar_bus_if) ); @@ -105,7 +103,7 @@ module VX_socket import VX_gpu_pkg::*; #( .UUID_WIDTH (`UUID_WIDTH), .WRITE_ENABLE (0), .NC_ENABLE (0), - .CORE_OUT_BUF (2), + .CORE_OUT_BUF (3), .MEM_OUT_BUF (2) ) icache ( `ifdef PERF_ENABLE @@ -152,7 +150,7 @@ module VX_socket import VX_gpu_pkg::*; #( .WRITEBACK (`DCACHE_WRITEBACK), .DIRTY_BYTES (`DCACHE_WRITEBACK), .NC_ENABLE (1), - .CORE_OUT_BUF (2), + .CORE_OUT_BUF (3), .MEM_OUT_BUF (2) ) dcache ( `ifdef PERF_ENABLE @@ -180,13 +178,13 @@ module VX_socket import VX_gpu_pkg::*; #( `ASSIGN_VX_MEM_BUS_IF_X (l1_mem_bus_if[1], dcache_mem_bus_if, L1_MEM_TAG_WIDTH, DCACHE_MEM_TAG_WIDTH); VX_mem_arb #( - .NUM_INPUTS (2), - .DATA_SIZE (`L1_LINE_SIZE), - .TAG_WIDTH (L1_MEM_TAG_WIDTH), - .TAG_SEL_IDX (0), - .ARBITER ("R"), - .REQ_OUT_BUF (2), - .RSP_OUT_BUF (2) + .NUM_INPUTS (2), + .DATA_SIZE (`L1_LINE_SIZE), + .TAG_WIDTH (L1_MEM_TAG_WIDTH), + .TAG_SEL_IDX(0), + .ARBITER ("P"), // prioritize the icache + .REQ_OUT_BUF(3), + .RSP_OUT_BUF(3) ) mem_arb ( .clk (clk), .reset (reset), @@ -200,14 +198,14 @@ module VX_socket import VX_gpu_pkg::*; #( wire [`SOCKET_SIZE-1:0] per_core_busy; - VX_dcr_bus_if core_dcr_bus_if(); - `BUFFER_DCR_BUS_IF (core_dcr_bus_if, dcr_bus_if, (`SOCKET_SIZE > 1)); - // Generate all cores - for (genvar core_id = 0; core_id < `SOCKET_SIZE; ++core_id) begin : cores + for (genvar core_id = 0; core_id < `SOCKET_SIZE; ++core_id) begin : g_cores `RESET_RELAY (core_reset, reset); + VX_dcr_bus_if core_dcr_bus_if(); + `BUFFER_DCR_BUS_IF (core_dcr_bus_if, dcr_bus_if, 1'b1, (`SOCKET_SIZE > 1)) + VX_core #( .CORE_ID ((SOCKET_ID * `SOCKET_SIZE) + core_id), .INSTANCE_ID ($sformatf("%s-core%0d", INSTANCE_ID, core_id)) diff --git a/hw/rtl/VX_types.vh b/hw/rtl/VX_types.vh index 2eac22a5a..048ba0a5c 100644 --- a/hw/rtl/VX_types.vh +++ b/hw/rtl/VX_types.vh @@ -201,4 +201,10 @@ `define VX_CSR_NUM_CORES 12'hFC2 `define VX_CSR_LOCAL_MEM_BASE 12'hFC3 +`define VX_MAT_MUL_SIZE 12'hFC4 // VX_MAT_MUL_SIZE = Matrix Size / TC Size +`define VX_TC_NUM 12'hFC5 +`define VX_TC_SIZE 12'hFC6 + + + `endif // VX_TYPES_VH diff --git a/hw/rtl/Vortex.sv b/hw/rtl/Vortex.sv index 978259101..e07aaae4d 100644 --- a/hw/rtl/Vortex.sv +++ b/hw/rtl/Vortex.sv @@ -86,8 +86,8 @@ module Vortex import VX_gpu_pkg::*; ( .WRITEBACK (`L3_WRITEBACK), .DIRTY_BYTES (`L3_WRITEBACK), .UUID_WIDTH (`UUID_WIDTH), - .CORE_OUT_BUF (2), - .MEM_OUT_BUF (2), + .CORE_OUT_BUF (3), + .MEM_OUT_BUF (3), .NC_ENABLE (1), .PASSTHRU (!`L3_ENABLED) ) l3cache ( @@ -109,7 +109,7 @@ module Vortex import VX_gpu_pkg::*; ( assign mem_req_data = mem_bus_if.req_data.data; assign mem_req_tag = mem_bus_if.req_data.tag; assign mem_bus_if.req_ready = mem_req_ready; - `UNUSED_VAR (mem_bus_if.req_data.atype) + `UNUSED_VAR (mem_bus_if.req_data.flags) assign mem_bus_if.rsp_valid = mem_rsp_valid; assign mem_bus_if.rsp_data.data = mem_rsp_data; @@ -129,12 +129,12 @@ module Vortex import VX_gpu_pkg::*; ( wire [`NUM_CLUSTERS-1:0] per_cluster_busy; // Generate all clusters - for (genvar cluster_id = 0; cluster_id < `NUM_CLUSTERS; ++cluster_id) begin : clusters + for (genvar cluster_id = 0; cluster_id < `NUM_CLUSTERS; ++cluster_id) begin : g_clusters `RESET_RELAY (cluster_reset, reset); VX_dcr_bus_if cluster_dcr_bus_if(); - `BUFFER_DCR_BUS_IF (cluster_dcr_bus_if, dcr_bus_if, (`NUM_CLUSTERS > 1)); + `BUFFER_DCR_BUS_IF (cluster_dcr_bus_if, dcr_bus_if, 1'b1, (`NUM_CLUSTERS > 1)) VX_cluster #( .CLUSTER_ID (cluster_id), @@ -189,16 +189,26 @@ module Vortex import VX_gpu_pkg::*; ( `endif + // dump device configuration + initial begin + `TRACE(0, ("CONFIGS: num_threads=%0d, num_warps=%0d, num_cores=%0d, num_clusters=%0d, socket_size=%0d, local_mem_base=0x%0h, num_barriers=%0d\n", + `NUM_THREADS, `NUM_WARPS, `NUM_CORES, `NUM_CLUSTERS, `SOCKET_SIZE, `LMEM_BASE_ADDR, `NUM_BARRIERS)) + end + `ifdef DBG_TRACE_MEM + wire [`UUID_WIDTH-1:0] mem_req_uuid = mem_req_tag[`VX_MEM_TAG_WIDTH-1 -: `UUID_WIDTH]; + wire [`UUID_WIDTH-1:0] mem_rsp_uuid = mem_rsp_tag[`VX_MEM_TAG_WIDTH-1 -: `UUID_WIDTH]; + always @(posedge clk) begin if (mem_req_fire) begin - if (mem_req_rw) - `TRACE(1, ("%d: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%h data=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data)); - else - `TRACE(1, ("%d: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen)); + if (mem_req_rw) begin + `TRACE(1, ("%t: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%h data=0x%h (#%0d)\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data, mem_req_uuid)) + end else begin + `TRACE(1, ("%t: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%h (#%0d)\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_uuid)) + end end if (mem_rsp_fire) begin - `TRACE(1, ("%d: MEM Rd Rsp: tag=0x%0h, data=0x%h\n", $time, mem_rsp_tag, mem_rsp_data)); + `TRACE(1, ("%t: MEM Rd Rsp: tag=0x%0h, data=0x%h (#%0d)\n", $time, mem_rsp_tag, mem_rsp_data, mem_rsp_uuid)) end end `endif diff --git a/hw/rtl/Vortex_axi.sv b/hw/rtl/Vortex_axi.sv index 5d2f5b0a7..418a2aa5c 100644 --- a/hw/rtl/Vortex_axi.sv +++ b/hw/rtl/Vortex_axi.sv @@ -15,7 +15,7 @@ module Vortex_axi import VX_gpu_pkg::*; #( parameter AXI_DATA_WIDTH = `VX_MEM_DATA_WIDTH, - parameter AXI_ADDR_WIDTH = `MEM_ADDR_WIDTH, + parameter AXI_ADDR_WIDTH = `MEM_ADDR_WIDTH + (`VX_MEM_DATA_WIDTH/8), parameter AXI_TID_WIDTH = `VX_MEM_TAG_WIDTH, parameter AXI_NUM_BANKS = 1 )( @@ -82,9 +82,11 @@ module Vortex_axi import VX_gpu_pkg::*; #( // Status output wire busy ); - `STATIC_ASSERT((AXI_DATA_WIDTH == `VX_MEM_DATA_WIDTH), ("invalid memory data size: current=%0d, expected=%0d", AXI_DATA_WIDTH, `VX_MEM_DATA_WIDTH)) - `STATIC_ASSERT((AXI_ADDR_WIDTH >= `MEM_ADDR_WIDTH), ("invalid memory address size: current=%0d, expected=%0d", AXI_ADDR_WIDTH, `VX_MEM_ADDR_WIDTH)) - //`STATIC_ASSERT((AXI_TID_WIDTH >= `VX_MEM_TAG_WIDTH), ("invalid memory tag size: current=%0d, expected=%0d", AXI_TID_WIDTH, `VX_MEM_TAG_WIDTH)) + localparam DST_LDATAW = `CLOG2(AXI_DATA_WIDTH); + localparam SRC_LDATAW = `CLOG2(`VX_MEM_DATA_WIDTH); + localparam SUB_LDATAW = DST_LDATAW - SRC_LDATAW; + localparam VX_MEM_TAG_A_WIDTH = `VX_MEM_TAG_WIDTH + `MAX(SUB_LDATAW, 0); + localparam VX_MEM_ADDR_A_WIDTH = `VX_MEM_ADDR_WIDTH - SUB_LDATAW; wire mem_req_valid; wire mem_req_rw; @@ -99,33 +101,11 @@ module Vortex_axi import VX_gpu_pkg::*; #( wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag; wire mem_rsp_ready; - wire [`MEM_ADDR_WIDTH-1:0] m_axi_awaddr_unqual [AXI_NUM_BANKS]; - wire [`MEM_ADDR_WIDTH-1:0] m_axi_araddr_unqual [AXI_NUM_BANKS]; + `SCOPE_IO_SWITCH (1); - wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_awid_unqual [AXI_NUM_BANKS]; - wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_arid_unqual [AXI_NUM_BANKS]; - - wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_bid_unqual [AXI_NUM_BANKS]; - wire [`VX_MEM_TAG_WIDTH-1:0] m_axi_rid_unqual [AXI_NUM_BANKS]; - - for (genvar i = 0; i < AXI_NUM_BANKS; ++i) begin - assign m_axi_awaddr[i] = `MEM_ADDR_WIDTH'(m_axi_awaddr_unqual[i]); - assign m_axi_araddr[i] = `MEM_ADDR_WIDTH'(m_axi_araddr_unqual[i]); - - assign m_axi_awid[i] = AXI_TID_WIDTH'(m_axi_awid_unqual[i]); - assign m_axi_arid[i] = AXI_TID_WIDTH'(m_axi_arid_unqual[i]); - - assign m_axi_rid_unqual[i] = `VX_MEM_TAG_WIDTH'(m_axi_rid[i]); - assign m_axi_bid_unqual[i] = `VX_MEM_TAG_WIDTH'(m_axi_bid[i]); - end + Vortex vortex ( + `SCOPE_IO_BIND (0) - VX_axi_adapter #( - .DATA_WIDTH (`VX_MEM_DATA_WIDTH), - .ADDR_WIDTH (`MEM_ADDR_WIDTH), - .TAG_WIDTH (`VX_MEM_TAG_WIDTH), - .NUM_BANKS (AXI_NUM_BANKS), - .RSP_OUT_BUF((AXI_NUM_BANKS > 1) ? 2 : 0) - ) axi_adapter ( .clk (clk), .reset (reset), @@ -142,10 +122,96 @@ module Vortex_axi import VX_gpu_pkg::*; #( .mem_rsp_tag (mem_rsp_tag), .mem_rsp_ready (mem_rsp_ready), + .dcr_wr_valid (dcr_wr_valid), + .dcr_wr_addr (dcr_wr_addr), + .dcr_wr_data (dcr_wr_data), + + .busy (busy) + ); + + wire mem_req_valid_a; + wire mem_req_rw_a; + wire [(AXI_DATA_WIDTH/8)-1:0] mem_req_byteen_a; + wire [VX_MEM_ADDR_A_WIDTH-1:0] mem_req_addr_a; + wire [AXI_DATA_WIDTH-1:0] mem_req_data_a; + wire [VX_MEM_TAG_A_WIDTH-1:0] mem_req_tag_a; + wire mem_req_ready_a; + + wire mem_rsp_valid_a; + wire [AXI_DATA_WIDTH-1:0] mem_rsp_data_a; + wire [VX_MEM_TAG_A_WIDTH-1:0] mem_rsp_tag_a; + wire mem_rsp_ready_a; + + VX_mem_adapter #( + .SRC_DATA_WIDTH (`VX_MEM_DATA_WIDTH), + .DST_DATA_WIDTH (AXI_DATA_WIDTH), + .SRC_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH), + .DST_ADDR_WIDTH (VX_MEM_ADDR_A_WIDTH), + .SRC_TAG_WIDTH (`VX_MEM_TAG_WIDTH), + .DST_TAG_WIDTH (VX_MEM_TAG_A_WIDTH), + .REQ_OUT_BUF (0), + .RSP_OUT_BUF (0) + ) mem_adapter ( + .clk (clk), + .reset (reset), + + .mem_req_valid_in (mem_req_valid), + .mem_req_addr_in (mem_req_addr), + .mem_req_rw_in (mem_req_rw), + .mem_req_byteen_in (mem_req_byteen), + .mem_req_data_in (mem_req_data), + .mem_req_tag_in (mem_req_tag), + .mem_req_ready_in (mem_req_ready), + + .mem_rsp_valid_in (mem_rsp_valid), + .mem_rsp_data_in (mem_rsp_data), + .mem_rsp_tag_in (mem_rsp_tag), + .mem_rsp_ready_in (mem_rsp_ready), + + .mem_req_valid_out (mem_req_valid_a), + .mem_req_addr_out (mem_req_addr_a), + .mem_req_rw_out (mem_req_rw_a), + .mem_req_byteen_out (mem_req_byteen_a), + .mem_req_data_out (mem_req_data_a), + .mem_req_tag_out (mem_req_tag_a), + .mem_req_ready_out (mem_req_ready_a), + + .mem_rsp_valid_out (mem_rsp_valid_a), + .mem_rsp_data_out (mem_rsp_data_a), + .mem_rsp_tag_out (mem_rsp_tag_a), + .mem_rsp_ready_out (mem_rsp_ready_a) + ); + + VX_axi_adapter #( + .DATA_WIDTH (AXI_DATA_WIDTH), + .ADDR_WIDTH_IN (VX_MEM_ADDR_A_WIDTH), + .ADDR_WIDTH_OUT (AXI_ADDR_WIDTH), + .TAG_WIDTH_IN (VX_MEM_TAG_A_WIDTH), + .TAG_WIDTH_OUT (AXI_TID_WIDTH), + .NUM_BANKS (AXI_NUM_BANKS), + .BANK_INTERLEAVE(0), + .RSP_OUT_BUF ((AXI_NUM_BANKS > 1) ? 2 : 0) + ) axi_adapter ( + .clk (clk), + .reset (reset), + + .mem_req_valid (mem_req_valid_a), + .mem_req_rw (mem_req_rw_a), + .mem_req_byteen (mem_req_byteen_a), + .mem_req_addr (mem_req_addr_a), + .mem_req_data (mem_req_data_a), + .mem_req_tag (mem_req_tag_a), + .mem_req_ready (mem_req_ready_a), + + .mem_rsp_valid (mem_rsp_valid_a), + .mem_rsp_data (mem_rsp_data_a), + .mem_rsp_tag (mem_rsp_tag_a), + .mem_rsp_ready (mem_rsp_ready_a), + .m_axi_awvalid (m_axi_awvalid), .m_axi_awready (m_axi_awready), - .m_axi_awaddr (m_axi_awaddr_unqual), - .m_axi_awid (m_axi_awid_unqual), + .m_axi_awaddr (m_axi_awaddr), + .m_axi_awid (m_axi_awid), .m_axi_awlen (m_axi_awlen), .m_axi_awsize (m_axi_awsize), .m_axi_awburst (m_axi_awburst), @@ -163,13 +229,13 @@ module Vortex_axi import VX_gpu_pkg::*; #( .m_axi_bvalid (m_axi_bvalid), .m_axi_bready (m_axi_bready), - .m_axi_bid (m_axi_bid_unqual), + .m_axi_bid (m_axi_bid), .m_axi_bresp (m_axi_bresp), .m_axi_arvalid (m_axi_arvalid), .m_axi_arready (m_axi_arready), - .m_axi_araddr (m_axi_araddr_unqual), - .m_axi_arid (m_axi_arid_unqual), + .m_axi_araddr (m_axi_araddr), + .m_axi_arid (m_axi_arid), .m_axi_arlen (m_axi_arlen), .m_axi_arsize (m_axi_arsize), .m_axi_arburst (m_axi_arburst), @@ -182,37 +248,9 @@ module Vortex_axi import VX_gpu_pkg::*; #( .m_axi_rvalid (m_axi_rvalid), .m_axi_rready (m_axi_rready), .m_axi_rdata (m_axi_rdata), - .m_axi_rlast (m_axi_rlast) , - .m_axi_rid (m_axi_rid_unqual), + .m_axi_rlast (m_axi_rlast), + .m_axi_rid (m_axi_rid), .m_axi_rresp (m_axi_rresp) ); - `SCOPE_IO_SWITCH (1) - - Vortex vortex ( - `SCOPE_IO_BIND (0) - - .clk (clk), - .reset (reset), - - .mem_req_valid (mem_req_valid), - .mem_req_rw (mem_req_rw), - .mem_req_byteen (mem_req_byteen), - .mem_req_addr (mem_req_addr), - .mem_req_data (mem_req_data), - .mem_req_tag (mem_req_tag), - .mem_req_ready (mem_req_ready), - - .mem_rsp_valid (mem_rsp_valid), - .mem_rsp_data (mem_rsp_data), - .mem_rsp_tag (mem_rsp_tag), - .mem_rsp_ready (mem_rsp_ready), - - .dcr_wr_valid (dcr_wr_valid), - .dcr_wr_addr (dcr_wr_addr), - .dcr_wr_data (dcr_wr_data), - - .busy (busy) - ); - endmodule diff --git a/hw/rtl/afu/opae/local_mem_cfg_pkg.sv b/hw/rtl/afu/opae/local_mem_cfg_pkg.sv index ef9fae28a..c63825548 100644 --- a/hw/rtl/afu/opae/local_mem_cfg_pkg.sv +++ b/hw/rtl/afu/opae/local_mem_cfg_pkg.sv @@ -30,7 +30,17 @@ //`include "platform_afu_top_config.vh" -`ifdef PLATFORM_PROVIDES_LOCAL_MEMORY +`ifndef PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH +`define PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH (`PLATFORM_MEMORY_ADDR_WIDTH - $clog2(`PLATFORM_MEMORY_DATA_WIDTH/8)) +`endif + +`ifndef PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH +`define PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH `PLATFORM_MEMORY_DATA_WIDTH +`endif + +`ifndef PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH +`define PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH `PLATFORM_MEMORY_BURST_CNT_WIDTH +`endif package local_mem_cfg_pkg; @@ -57,5 +67,3 @@ package local_mem_cfg_pkg; typedef logic [LOCAL_MEM_DATA_N_BYTES-1:0] t_local_mem_byte_mask; endpackage // local_mem_cfg_pkg - -`endif // PLATFORM_PROVIDES_LOCAL_MEMORY diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index 93f63c48d..7e0bcfaed 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -18,6 +18,10 @@ `endif `include "VX_define.vh" +`ifndef PLATFORM_MEMORY_INTERLEAVE +`define PLATFORM_MEMORY_INTERLEAVE 1 +`endif + module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_gpu_pkg::*; #( parameter NUM_LOCAL_MEM_BANKS = 2 ) ( @@ -40,16 +44,17 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ output t_local_mem_burst_cnt avs_burstcount [NUM_LOCAL_MEM_BANKS], input wire avs_readdatavalid [NUM_LOCAL_MEM_BANKS] ); - localparam LMEM_DATA_WIDTH = $bits(t_local_mem_data); localparam LMEM_DATA_SIZE = LMEM_DATA_WIDTH / 8; - localparam LMEM_ADDR_WIDTH = $bits(t_local_mem_addr); + localparam LMEM_ADDR_WIDTH = `VX_MEM_ADDR_WIDTH + ($clog2(`VX_MEM_DATA_WIDTH) - $clog2(LMEM_DATA_WIDTH)); localparam LMEM_BURST_CTRW = $bits(t_local_mem_burst_cnt); localparam CCI_DATA_WIDTH = $bits(t_ccip_clData); localparam CCI_DATA_SIZE = CCI_DATA_WIDTH / 8; localparam CCI_ADDR_WIDTH = $bits(t_ccip_clAddr); + localparam RESET_CTR_WIDTH = `CLOG2(`RESET_DELAY+1); + localparam AVS_RD_QUEUE_SIZE = 32; localparam _VX_MEM_TAG_WIDTH = `VX_MEM_TAG_WIDTH; localparam _AVS_REQ_TAGW_VX = _VX_MEM_TAG_WIDTH + `CLOG2(LMEM_DATA_WIDTH) - `CLOG2(`VX_MEM_DATA_WIDTH); @@ -64,6 +69,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ localparam AFU_ID_L = 16'h0002; // AFU ID Lower localparam AFU_ID_H = 16'h0004; // AFU ID Higher + localparam CMD_IDLE = 0; localparam CMD_MEM_READ = `AFU_IMAGE_CMD_MEM_READ; localparam CMD_MEM_WRITE = `AFU_IMAGE_CMD_MEM_WRITE; localparam CMD_DCR_WRITE = `AFU_IMAGE_CMD_DCR_WRITE; @@ -78,7 +84,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ localparam COUT_TID_WIDTH = `CLOG2(`VX_MEM_BYTEEN_WIDTH); localparam COUT_QUEUE_DATAW = COUT_TID_WIDTH + 8; - localparam COUT_QUEUE_SIZE = 64; + localparam COUT_QUEUE_SIZE = 1024; localparam MMIO_DEV_CAPS = `AFU_IMAGE_MMIO_DEV_CAPS; localparam MMIO_ISA_CAPS = `AFU_IMAGE_MMIO_ISA_CAPS; @@ -96,7 +102,9 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ wire [127:0] afu_id = `AFU_ACCEL_UUID; - wire [63:0] dev_caps = {16'b0, + wire [63:0] dev_caps = {8'b0, + 5'(`PLATFORM_MEMORY_ADDR_WIDTH-20), + 3'(`CLOG2(`PLATFORM_MEMORY_BANKS)), 8'(`LMEM_ENABLED ? `LMEM_LOG_SIZE : 0), 16'(`NUM_CORES * `NUM_CLUSTERS), 8'(`NUM_WARPS), @@ -139,14 +147,12 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ // MMIO controller //////////////////////////////////////////////////////////// - t_ccip_c0_ReqMmioHdr mmio_hdr; - assign mmio_hdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr); - `UNUSED_VAR (mmio_hdr) + t_ccip_c0_ReqMmioHdr mmio_req_hdr; + assign mmio_req_hdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr[$bits(t_ccip_c0_ReqMmioHdr)-1:0]); + `UNUSED_VAR (mmio_req_hdr) - `STATIC_ASSERT(($bits(t_ccip_c0_ReqMmioHdr)-$bits(mmio_hdr.address)) == 12, ("Oops!")) - - t_if_ccip_c2_Tx mmio_tx; - assign af2cp_sTxPort.c2 = mmio_tx; + t_if_ccip_c2_Tx mmio_rsp; + assign af2cp_sTxPort.c2 = mmio_rsp; `ifdef SCOPE @@ -170,33 +176,35 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ if (reset) begin cmd_scope_reading <= 0; cmd_scope_writing <= 0; - scope_bus_in <= 0; + scope_bus_in <= 0; end else begin + scope_bus_in <= 0; if (scope_bus_out) begin cmd_scope_reading <= 1; scope_bus_ctr <= 63; end - scope_bus_in <= 0; if (cp2af_sRxPort.c0.mmioWrValid - && (MMIO_SCOPE_WRITE == mmio_hdr.address)) begin + && (MMIO_SCOPE_WRITE == mmio_req_hdr.address)) begin cmd_scope_wdata <= 64'(cp2af_sRxPort.c0.data); cmd_scope_writing <= 1; scope_bus_ctr <= 63; scope_bus_in <= 1; end - end - if (cmd_scope_writing) begin - scope_bus_in <= 1'(cmd_scope_wdata >> scope_bus_ctr); - scope_bus_ctr <= scope_bus_ctr - 1; - if (scope_bus_ctr == 0) begin - cmd_scope_writing <= 0; + if (cmd_scope_writing) begin + scope_bus_in <= cmd_scope_wdata[scope_bus_ctr]; + scope_bus_ctr <= scope_bus_ctr - 6'd1; + if (scope_bus_ctr == 0) begin + cmd_scope_writing <= 0; + scope_bus_ctr <= 0; + end end - end - if (cmd_scope_reading) begin - cmd_scope_rdata <= {cmd_scope_rdata[62:0], scope_bus_out}; - scope_bus_ctr <= scope_bus_ctr - 1; - if (scope_bus_ctr == 0) begin - cmd_scope_reading <= 0; + if (cmd_scope_reading) begin + cmd_scope_rdata <= {cmd_scope_rdata[62:0], scope_bus_out}; + scope_bus_ctr <= scope_bus_ctr - 6'd1; + if (scope_bus_ctr == 0) begin + cmd_scope_reading <= 0; + scope_bus_ctr <= 0; + end end end end @@ -206,6 +214,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ wire [COUT_QUEUE_DATAW-1:0] cout_q_dout; wire cout_q_full, cout_q_empty; + wire [COUT_QUEUE_DATAW-1:0] cout_q_dout_s = cout_q_dout & {COUT_QUEUE_DATAW{!cout_q_empty}}; + `ifdef SIMULATION `ifndef VERILATOR // disable assertions until full reset @@ -226,60 +236,22 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ `endif `endif + // MMIO controller //////////////////////////////////////////////////////////// + + // Handle MMIO read requests always @(posedge clk) begin if (reset) begin - mmio_tx.mmioRdValid <= 0; - mmio_tx.hdr <= '0; + mmio_rsp.mmioRdValid <= 0; end else begin - mmio_tx.mmioRdValid <= cp2af_sRxPort.c0.mmioRdValid; - mmio_tx.hdr.tid <= mmio_hdr.tid; - end - // serve MMIO write request - if (cp2af_sRxPort.c0.mmioWrValid) begin - case (mmio_hdr.address) - MMIO_CMD_ARG0: begin - cmd_args[0] <= 64'(cp2af_sRxPort.c0.data); - `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_CMD_ARG0: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data))); - `endif - end - MMIO_CMD_ARG1: begin - cmd_args[1] <= 64'(cp2af_sRxPort.c0.data); - `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_CMD_ARG1: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data))); - `endif - end - MMIO_CMD_ARG2: begin - cmd_args[2] <= 64'(cp2af_sRxPort.c0.data); - `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_CMD_ARG2: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data))); - `endif - end - MMIO_CMD_TYPE: begin - `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_CMD_TYPE: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data))); - `endif - end - `ifdef SCOPE - MMIO_SCOPE_WRITE: begin - `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_SCOPE_WRITE: data=0x%h\n", $time, cmd_scope_wdata)); - `endif - end - `endif - default: begin - `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: Unknown MMIO Wr: addr=0x%0h, data=0x%h\n", $time, mmio_hdr.address, 64'(cp2af_sRxPort.c0.data))); - `endif - end - endcase + mmio_rsp.mmioRdValid <= cp2af_sRxPort.c0.mmioRdValid; end - // serve MMIO read requests + mmio_rsp.hdr.tid <= mmio_req_hdr.tid; + if (cp2af_sRxPort.c0.mmioRdValid) begin - case (mmio_hdr.address) + case (mmio_req_hdr.address) // AFU header - 16'h0000: mmio_tx.data <= { + 16'h0000: mmio_rsp.data <= { 4'b0001, // Feature type = AFU 8'b0, // reserved 4'b0, // afu minor revision = 0 @@ -289,46 +261,89 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ 4'b0, // afu major revision = 0 12'b0 // feature ID = 0 }; - AFU_ID_L: mmio_tx.data <= afu_id[63:0]; // afu id low - AFU_ID_H: mmio_tx.data <= afu_id[127:64]; // afu id hi - 16'h0006: mmio_tx.data <= 64'h0; // next AFU - 16'h0008: mmio_tx.data <= 64'h0; // reserved + AFU_ID_L: mmio_rsp.data <= afu_id[63:0]; // afu id low + AFU_ID_H: mmio_rsp.data <= afu_id[127:64]; // afu id hi + 16'h0006: mmio_rsp.data <= 64'h0; // next AFU + 16'h0008: mmio_rsp.data <= 64'h0; // reserved MMIO_STATUS: begin - mmio_tx.data <= 64'({cout_q_dout, !cout_q_empty, 8'(state)}); + mmio_rsp.data <= 64'({cout_q_dout_s, !cout_q_empty, 8'(state)}); `ifdef DBG_TRACE_AFU - if (state != STATE_WIDTH'(mmio_tx.data)) begin - `TRACE(2, ("%d: MMIO_STATUS: addr=0x%0h, state=%0d\n", $time, mmio_hdr.address, state)); + if (state != STATE_WIDTH'(mmio_rsp.data)) begin + `TRACE(2, ("%t: AFU: MMIO_STATUS: addr=0x%0h, state=%0d\n", $time, mmio_req_hdr.address, state)) end `endif end `ifdef SCOPE MMIO_SCOPE_READ: begin - mmio_tx.data <= cmd_scope_rdata; + mmio_rsp.data <= cmd_scope_rdata; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_SCOPE_READ: data=0x%h\n", $time, cmd_scope_rdata)); + `TRACE(2, ("%t: AFU: MMIO_SCOPE_READ: data=0x%h\n", $time, cmd_scope_rdata)) `endif end `endif MMIO_DEV_CAPS: begin - mmio_tx.data <= dev_caps; + mmio_rsp.data <= dev_caps; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: MMIO_DEV_CAPS: data=0x%h\n", $time, dev_caps)); + `TRACE(2, ("%t: AFU: MMIO_DEV_CAPS: data=0x%h\n", $time, dev_caps)) `endif end MMIO_ISA_CAPS: begin - mmio_tx.data <= isa_caps; + mmio_rsp.data <= isa_caps; `ifdef DBG_TRACE_AFU - if (state != STATE_WIDTH'(mmio_tx.data)) begin - `TRACE(2, ("%d: MMIO_ISA_CAPS: data=%0d\n", $time, isa_caps)); + if (state != STATE_WIDTH'(mmio_rsp.data)) begin + `TRACE(2, ("%t: AFU: MMIO_ISA_CAPS: data=%0d\n", $time, isa_caps)) end `endif end default: begin - mmio_tx.data <= 64'h0; + mmio_rsp.data <= 64'h0; + `ifdef DBG_TRACE_AFU + `TRACE(2, ("%t: AFU: Unknown MMIO Rd: addr=0x%0h\n", $time, mmio_req_hdr.address)) + `endif + end + endcase + end + end + + // Handle MMIO write requests + always @(posedge clk) begin + if (cp2af_sRxPort.c0.mmioWrValid) begin + case (mmio_req_hdr.address) + MMIO_CMD_ARG0: begin + cmd_args[0] <= 64'(cp2af_sRxPort.c0.data); + `ifdef DBG_TRACE_AFU + `TRACE(2, ("%t: AFU: MMIO_CMD_ARG0: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data))) + `endif + end + MMIO_CMD_ARG1: begin + cmd_args[1] <= 64'(cp2af_sRxPort.c0.data); `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: Unknown MMIO Rd: addr=0x%0h\n", $time, mmio_hdr.address)); + `TRACE(2, ("%t: AFU: MMIO_CMD_ARG1: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data))) `endif end + MMIO_CMD_ARG2: begin + cmd_args[2] <= 64'(cp2af_sRxPort.c0.data); + `ifdef DBG_TRACE_AFU + `TRACE(2, ("%t: AFU: MMIO_CMD_ARG2: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data))) + `endif + end + MMIO_CMD_TYPE: begin + `ifdef DBG_TRACE_AFU + `TRACE(2, ("%t: AFU: MMIO_CMD_TYPE: data=%0d\n", $time, 64'(cp2af_sRxPort.c0.data))) + `endif + end + `ifdef SCOPE + MMIO_SCOPE_WRITE: begin + `ifdef DBG_TRACE_AFU + `TRACE(2, ("%t: AFU: MMIO_SCOPE_WRITE: data=0x%h\n", $time, 64'(cp2af_sRxPort.c0.data))) + `endif + end + `endif + default: begin + `ifdef DBG_TRACE_AFU + `TRACE(2, ("%t: Unknown MMIO Wr: addr=0x%0h, data=0x%h\n", $time, mmio_req_hdr.address, 64'(cp2af_sRxPort.c0.data))) + `endif + end endcase end end @@ -338,56 +353,48 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ wire cmd_mem_rd_done; reg cmd_mem_wr_done; + reg [RESET_CTR_WIDTH-1:0] vx_reset_ctr; reg vx_busy_wait; - reg vx_running; + reg vx_reset = 1; // asserted at initialization wire vx_busy; - reg [`CLOG2(`RESET_DELAY+1)-1:0] vx_reset_ctr; - always @(posedge clk) begin - if (state == STATE_RUN) begin - vx_reset_ctr <= vx_reset_ctr + $bits(vx_reset_ctr)'(1); - end else begin - vx_reset_ctr <= '0; - end - end - - wire is_mmio_wr_cmd = cp2af_sRxPort.c0.mmioWrValid && (MMIO_CMD_TYPE == mmio_hdr.address); + wire is_mmio_wr_cmd = cp2af_sRxPort.c0.mmioWrValid && (MMIO_CMD_TYPE == mmio_req_hdr.address); wire [CMD_TYPE_WIDTH-1:0] cmd_type = is_mmio_wr_cmd ? - CMD_TYPE_WIDTH'(cp2af_sRxPort.c0.data) : CMD_TYPE_WIDTH'(0); + CMD_TYPE_WIDTH'(cp2af_sRxPort.c0.data) : CMD_TYPE_WIDTH'(CMD_IDLE); always @(posedge clk) begin if (reset) begin - state <= STATE_IDLE; - vx_busy_wait <= 0; - vx_running <= 0; + state <= STATE_IDLE; + vx_reset <= 1; end else begin case (state) STATE_IDLE: begin case (cmd_type) CMD_MEM_READ: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE MEM_READ: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size)); + `TRACE(2, ("%t: AFU: Goto STATE MEM_READ: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size)) `endif state <= STATE_MEM_READ; end CMD_MEM_WRITE: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE MEM_WRITE: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size)); + `TRACE(2, ("%t: AFU: Goto STATE MEM_WRITE: ia=0x%0h addr=0x%0h size=%0d\n", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size)) `endif state <= STATE_MEM_WRITE; end CMD_DCR_WRITE: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE DCR_WRITE: addr=0x%0h data=%0d\n", $time, cmd_dcr_addr, cmd_dcr_data)); + `TRACE(2, ("%t: AFU: Goto STATE DCR_WRITE: addr=0x%0h data=%0d\n", $time, cmd_dcr_addr, cmd_dcr_data)) `endif state <= STATE_DCR_WRITE; end CMD_RUN: begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE RUN\n", $time)); + `TRACE(2, ("%t: AFU: Goto STATE RUN\n", $time)) `endif state <= STATE_RUN; - vx_running <= 0; + vx_reset_ctr <= RESET_CTR_WIDTH'(`RESET_DELAY-1); + vx_reset <= 1; end default: begin state <= state; @@ -398,54 +405,56 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ if (cmd_mem_rd_done) begin state <= STATE_IDLE; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE IDLE\n", $time)); + `TRACE(2, ("%t: AFU: Goto STATE IDLE\n", $time)) `endif end end STATE_MEM_WRITE: begin if (cmd_mem_wr_done) begin state <= STATE_IDLE; - `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE IDLE\n", $time)); - `endif end end STATE_DCR_WRITE: begin state <= STATE_IDLE; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE IDLE\n", $time)); + `TRACE(2, ("%t: AFU: Goto STATE IDLE\n", $time)) `endif end STATE_RUN: begin - if (vx_running) begin - if (vx_busy_wait) begin - // wait until the gpu goes busy - if (vx_busy) begin - vx_busy_wait <= 0; - end - end else begin - // wait until the gpu is not busy - if (~vx_busy) begin - state <= STATE_IDLE; - `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: End execution\n", $time)); - `TRACE(2, ("%d: STATE IDLE\n", $time)); - `endif - end - end + if (vx_reset) begin + // wait until the reset network is ready + if (vx_reset_ctr == RESET_CTR_WIDTH'(0)) begin + `ifdef DBG_TRACE_AFU + `TRACE(2, ("%t: AFU: Begin execution\n", $time)) + `endif + vx_busy_wait <= 1; + vx_reset <= 0; + end end else begin - // wait until the reset sequence is complete - if (vx_reset_ctr == (`RESET_DELAY-1)) begin - `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: Begin execution\n", $time)); - `endif - vx_running <= 1; - vx_busy_wait <= 1; - end + if (vx_busy_wait) begin + // wait until processor goes busy + if (vx_busy) begin + vx_busy_wait <= 0; + end + end else begin + // wait until the processor is not busy + if (~vx_busy) begin + `ifdef DBG_TRACE_AFU + `TRACE(2, ("%t: AFU: End execution\n", $time)) + `TRACE(2, ("%t: AFU: Goto STATE IDLE\n", $time)) + `endif + state <= STATE_IDLE; + end + end end end default:; endcase + + // ensure reset network initialization + if (vx_reset_ctr != RESET_CTR_WIDTH'(0)) begin + vx_reset_ctr <= vx_reset_ctr - RESET_CTR_WIDTH'(1); + end end end @@ -475,8 +484,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .TAG_WIDTH (AVS_REQ_TAGW) ) cci_vx_mem_bus_if[2](); - `RESET_RELAY (cci_adapter_reset, reset); - VX_mem_adapter #( .SRC_DATA_WIDTH (CCI_DATA_WIDTH), .DST_DATA_WIDTH (LMEM_DATA_WIDTH), @@ -488,7 +495,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .RSP_OUT_BUF (0) ) cci_mem_adapter ( .clk (clk), - .reset (cci_adapter_reset), + .reset (reset), .mem_req_valid_in (cci_mem_req_valid), .mem_req_addr_in (cci_mem_req_addr), @@ -517,8 +524,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .mem_rsp_ready_out (cci_vx_mem_bus_if[1].rsp_ready) ); - assign cci_vx_mem_bus_if[1].req_data.atype = '0; - `UNUSED_VAR (cci_vx_mem_bus_if[1].req_data.atype) + assign cci_vx_mem_bus_if[1].req_data.flags = '0; //-- @@ -528,8 +534,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ assign vx_mem_req_valid_qual = vx_mem_req_valid && ~vx_mem_is_cout; - `RESET_RELAY (vx_adapter_reset, reset); - VX_mem_adapter #( .SRC_DATA_WIDTH (`VX_MEM_DATA_WIDTH), .DST_DATA_WIDTH (LMEM_DATA_WIDTH), @@ -541,7 +545,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .RSP_OUT_BUF (2) ) vx_mem_adapter ( .clk (clk), - .reset (vx_adapter_reset), + .reset (reset), .mem_req_valid_in (vx_mem_req_valid_qual), .mem_req_addr_in (vx_mem_req_addr), @@ -570,8 +574,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .mem_rsp_ready_out (cci_vx_mem_bus_if[0].rsp_ready) ); - assign cci_vx_mem_bus_if[0].req_data.atype = '0; - `UNUSED_VAR (cci_vx_mem_bus_if[0].req_data.atype) + assign cci_vx_mem_bus_if[0].req_data.flags = '0; //-- VX_mem_bus_if #( @@ -597,20 +600,20 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ //-- - `RESET_RELAY (avs_adapter_reset, reset); - VX_avs_adapter #( .DATA_WIDTH (LMEM_DATA_WIDTH), - .ADDR_WIDTH (LMEM_ADDR_WIDTH), + .ADDR_WIDTH_IN (LMEM_ADDR_WIDTH), + .ADDR_WIDTH_OUT($bits(t_local_mem_addr)), .BURST_WIDTH (LMEM_BURST_CTRW), .NUM_BANKS (NUM_LOCAL_MEM_BANKS), .TAG_WIDTH (AVS_REQ_TAGW + 1), .RD_QUEUE_SIZE (AVS_RD_QUEUE_SIZE), + .BANK_INTERLEAVE(`PLATFORM_MEMORY_INTERLEAVE), .REQ_OUT_BUF (2), .RSP_OUT_BUF (0) ) avs_adapter ( .clk (clk), - .reset (avs_adapter_reset), + .reset (reset), // Memory request .mem_req_valid (mem_bus_if[0].req_valid), @@ -639,8 +642,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ .avs_readdatavalid(avs_readdatavalid) ); - assign mem_bus_if[0].req_data.atype = '0; - `UNUSED_VAR (mem_bus_if[0].req_data.atype) + `UNUSED_VAR (mem_bus_if[0].req_data.flags) // CCI-P Read Request /////////////////////////////////////////////////////////// @@ -748,7 +750,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ cci_rd_req_addr <= cci_rd_req_addr + 1; cci_rd_req_ctr <= cci_rd_req_ctr + $bits(cci_rd_req_ctr)'(1); `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: CCI Rd Req: addr=0x%0h, tag=0x%0h, rem=%0d, pending=%0d\n", $time, cci_rd_req_addr, cci_rd_req_tag, (cmd_data_size - cci_rd_req_ctr - 1), cci_pending_reads)); + `TRACE(2, ("%t: AFU: CCI Rd Req: addr=0x%0h, tag=0x%0h, rem=%0d, pending=%0d\n", $time, cci_rd_req_addr, cci_rd_req_tag, (cmd_data_size - cci_rd_req_ctr - 1), cci_pending_reads)) `endif end @@ -758,13 +760,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ cci_mem_wr_req_addr_base <= cci_mem_wr_req_addr_base + CCI_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE); end `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: CCI Rd Rsp: idx=%0d, ctr=%0d, data=0x%h\n", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data)); + `TRACE(2, ("%t: AFU: CCI Rd Rsp: idx=%0d, ctr=%0d, data=0x%h\n", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data)) `endif end if (cci_rdq_pop) begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: CCI Rd Queue Pop: pending=%0d\n", $time, cci_pending_reads)); + `TRACE(2, ("%t: AFU: CCI Rd Queue Pop: pending=%0d\n", $time, cci_pending_reads)) `endif end @@ -902,13 +904,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ cci_wr_req_done <= 1; end `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: CCI Wr Req: addr=0x%0h, rem=%0d, pending=%0d, data=0x%h\n", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes, af2cp_sTxPort.c1.data)); + `TRACE(2, ("%t: AFU: CCI Wr Req: addr=0x%0h, rem=%0d, pending=%0d, data=0x%h\n", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes, af2cp_sTxPort.c1.data)) `endif end if (cci_wr_rsp_fire) begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: CCI Wr Rsp: pending=%0d\n", $time, cci_pending_writes)); + `TRACE(2, ("%t: AFU: CCI Wr Rsp: pending=%0d\n", $time, cci_pending_writes)) `endif end end @@ -926,17 +928,17 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ // Vortex /////////////////////////////////////////////////////////////////// - wire vx_dcr_wr_valid = (STATE_DCR_WRITE == state); + wire vx_dcr_wr_valid = (STATE_DCR_WRITE == state); wire [`VX_DCR_ADDR_WIDTH-1:0] vx_dcr_wr_addr = cmd_dcr_addr; wire [`VX_DCR_DATA_WIDTH-1:0] vx_dcr_wr_data = cmd_dcr_data; - `SCOPE_IO_SWITCH (2) + `SCOPE_IO_SWITCH (2); Vortex vortex ( `SCOPE_IO_BIND (1) .clk (clk), - .reset (reset || ~vx_running), + .reset (vx_reset), // Memory request .mem_req_valid (vx_mem_req_valid), @@ -966,7 +968,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ wire [COUT_TID_WIDTH-1:0] cout_tid; - VX_onehot_encoder #( + VX_encoder #( .N (`VX_MEM_BYTEEN_WIDTH) ) cout_tid_enc ( .data_in (vx_mem_req_byteen), @@ -987,7 +989,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ wire cout_q_push = vx_mem_req_valid && vx_mem_is_cout && ~cout_q_full; wire cout_q_pop = cp2af_sRxPort.c0.mmioRdValid - && (mmio_hdr.address == MMIO_STATUS) + && (mmio_req_hdr.address == MMIO_STATUS) && ~cout_q_empty; VX_fifo_queue #( @@ -1010,58 +1012,59 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ // SCOPE ////////////////////////////////////////////////////////////////////// `ifdef DBG_SCOPE_AFU - wire mem_req_fire = mem_bus_if[0].req_valid && mem_bus_if[0].req_ready; - wire mem_rsp_fire = mem_bus_if[0].rsp_valid && mem_bus_if[0].rsp_ready; - wire avs_write_fire = avs_write[0] && ~avs_waitrequest[0]; - wire avs_read_fire = avs_read[0] && ~avs_waitrequest[0]; - wire [$bits(t_local_mem_addr)-1:0] mem_bus_if_addr = mem_bus_if[0].req_data.addr; - reg [STATE_WIDTH-1:0] state_prev; always @(posedge clk) begin state_prev <= state; end - wire state_changed = (state != state_prev); - - VX_scope_tap #( - .SCOPE_ID (0), - .TRIGGERW (24), - .PROBEW (431) - ) scope_tap ( - .clk(clk), - .reset(scope_reset_w[0]), - .start(1'b0), - .stop(1'b0), - .triggers({ - reset, - state_changed, - mem_req_fire, - mem_rsp_fire, - avs_write_fire, - avs_read_fire, + wire state_changed = (state != state_prev); + wire vx_mem_req_fire = vx_mem_req_valid && vx_mem_req_ready; + wire vx_mem_rsp_fire = vx_mem_rsp_valid && vx_mem_rsp_ready; + wire avs_req_fire = (avs_write[0] || avs_read[0]) && ~avs_waitrequest[0]; + + `NEG_EDGE (reset_negedge, reset); + `SCOPE_TAP (0, 0, { + vx_reset, + vx_busy, + vx_mem_req_valid, + vx_mem_req_ready, + vx_mem_rsp_valid, + vx_mem_rsp_ready, + avs_read[0], + avs_write[0], avs_waitrequest[0], - avs_readdatavalid[0], - cp2af_sRxPort.c0.mmioRdValid, - cp2af_sRxPort.c0.mmioWrValid, cp2af_sRxPort.c0.rspValid, cp2af_sRxPort.c1.rspValid, af2cp_sTxPort.c0.valid, af2cp_sTxPort.c1.valid, cp2af_sRxPort.c0TxAlmFull, - cp2af_sRxPort.c1TxAlmFull, - af2cp_sTxPort.c2.mmioRdValid, - cci_wr_req_fire, - cci_wr_rsp_fire, + cp2af_sRxPort.c1TxAlmFull + },{ + state_changed, + vx_dcr_wr_valid, // ack-free + avs_readdatavalid[0], // ack-free + cp2af_sRxPort.c0.mmioRdValid, // ack-free + cp2af_sRxPort.c0.mmioWrValid, // ack-free + af2cp_sTxPort.c2.mmioRdValid, // ack-free + cp2af_sRxPort.c0.rspValid, // ack-free + cp2af_sRxPort.c1.rspValid, // ack-free cci_rd_req_fire, - cci_rd_rsp_fire, - cci_pending_reads_full, - cci_pending_writes_empty, - cci_pending_writes_full - }), - .probes({ + cci_wr_req_fire, + avs_req_fire, + vx_mem_req_fire, + vx_mem_rsp_fire + },{ cmd_type, state, - mmio_hdr.address, - mmio_hdr.length, + vx_mem_req_rw, + vx_mem_req_byteen, + vx_mem_req_addr, + vx_mem_req_data, + vx_mem_req_tag, + vx_mem_rsp_data, + vx_mem_rsp_tag, + vx_dcr_wr_addr, + vx_dcr_wr_data, + mmio_req_hdr.address, cp2af_sRxPort.c0.hdr.mdata, af2cp_sTxPort.c0.hdr.address, af2cp_sTxPort.c0.hdr.mdata, @@ -1073,14 +1076,12 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ cci_mem_wr_req_ctr, cci_rd_req_ctr, cci_rd_rsp_ctr, - cci_wr_req_ctr, - mem_bus_if_addr - }), - .bus_in(scope_bus_in_w[0]), - .bus_out(scope_bus_out_w[0]) - ); + cci_wr_req_ctr + }, + reset_negedge, 1'b0, 4096 + ); `else - `SCOPE_IO_UNUSED_W(0) + `SCOPE_IO_UNUSED(0) `endif /////////////////////////////////////////////////////////////////////////////// @@ -1089,13 +1090,13 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ always @(posedge clk) begin for (integer i = 0; i < NUM_LOCAL_MEM_BANKS; ++i) begin if (avs_write[i] && ~avs_waitrequest[i]) begin - `TRACE(2, ("%d: AVS Wr Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h, data=0x%h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i], avs_writedata[i])); + `TRACE(2, ("%t: AVS Wr Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h, data=0x%h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i], avs_writedata[i])) end if (avs_read[i] && ~avs_waitrequest[i]) begin - `TRACE(2, ("%d: AVS Rd Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i])); + `TRACE(2, ("%t: AVS Rd Req [%0d]: addr=0x%0h, byteen=0x%0h, burst=0x%0h\n", $time, i, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i])) end if (avs_readdatavalid[i]) begin - `TRACE(2, ("%d: AVS Rd Rsp [%0d]: data=0x%h\n", $time, i, avs_readdata[i])); + `TRACE(2, ("%t: AVS Rd Rsp [%0d]: data=0x%h\n", $time, i, avs_readdata[i])) end end end diff --git a/hw/rtl/afu/opae/vortex_afu.vh b/hw/rtl/afu/opae/vortex_afu.vh index 6aa532983..31f09ae90 100644 --- a/hw/rtl/afu/opae/vortex_afu.vh +++ b/hw/rtl/afu/opae/vortex_afu.vh @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,9 +17,9 @@ `define AFU_ACCEL_NAME "vortex_afu" `define AFU_ACCEL_UUID 128'h35F9452B_25C2_434C_93D5_6F8C60DB361C -`define AFU_IMAGE_CMD_MEM_READ 1 +`define AFU_IMAGE_CMD_MEM_READ 1 `define AFU_IMAGE_CMD_MEM_WRITE 2 -`define AFU_IMAGE_CMD_RUN 3 +`define AFU_IMAGE_CMD_RUN 3 `define AFU_IMAGE_CMD_DCR_WRITE 4 `define AFU_IMAGE_CMD_MAX_VALUE 4 diff --git a/hw/rtl/afu/xrt/VX_afu_ctrl.sv b/hw/rtl/afu/xrt/VX_afu_ctrl.sv index 687b55a8c..d14328c7d 100644 --- a/hw/rtl/afu/xrt/VX_afu_ctrl.sv +++ b/hw/rtl/afu/xrt/VX_afu_ctrl.sv @@ -14,22 +14,20 @@ `include "vortex_afu.vh" module VX_afu_ctrl #( - parameter AXI_ADDR_WIDTH = 8, - parameter AXI_DATA_WIDTH = 32, - parameter AXI_NUM_BANKS = 1 + parameter S_AXI_ADDR_WIDTH = 8, + parameter S_AXI_DATA_WIDTH = 32 ) ( // axi4 lite slave signals input wire clk, input wire reset, - input wire clk_en, input wire s_axi_awvalid, - input wire [AXI_ADDR_WIDTH-1:0] s_axi_awaddr, + input wire [S_AXI_ADDR_WIDTH-1:0] s_axi_awaddr, output wire s_axi_awready, input wire s_axi_wvalid, - input wire [AXI_DATA_WIDTH-1:0] s_axi_wdata, - input wire [AXI_DATA_WIDTH/8-1:0] s_axi_wstrb, + input wire [S_AXI_DATA_WIDTH-1:0] s_axi_wdata, + input wire [S_AXI_DATA_WIDTH/8-1:0]s_axi_wstrb, output wire s_axi_wready, output wire s_axi_bvalid, @@ -37,11 +35,11 @@ module VX_afu_ctrl #( input wire s_axi_bready, input wire s_axi_arvalid, - input wire [AXI_ADDR_WIDTH-1:0] s_axi_araddr, + input wire [S_AXI_ADDR_WIDTH-1:0] s_axi_araddr, output wire s_axi_arready, output wire s_axi_rvalid, - output wire [AXI_DATA_WIDTH-1:0] s_axi_rdata, + output wire [S_AXI_DATA_WIDTH-1:0] s_axi_rdata, output wire [1:0] s_axi_rresp, input wire s_axi_rready, @@ -57,8 +55,6 @@ module VX_afu_ctrl #( output wire scope_bus_out, `endif - output wire [63:0] mem_base [AXI_NUM_BANKS], - output wire dcr_wr_valid, output wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr, output wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data @@ -110,39 +106,36 @@ module VX_afu_ctrl #( ADDR_DEV_0 = 8'h10, ADDR_DEV_1 = 8'h14, - //ADDR_DEV_CTRL = 8'h18, - ADDR_ISA_0 = 8'h1C, - ADDR_ISA_1 = 8'h20, - //ADDR_ISA_CTRL = 8'h24, + ADDR_ISA_0 = 8'h18, + ADDR_ISA_1 = 8'h1C, - ADDR_DCR_0 = 8'h28, - ADDR_DCR_1 = 8'h2C, - //ADDR_DCR_CTRL = 8'h30, + ADDR_DCR_0 = 8'h20, + ADDR_DCR_1 = 8'h24, `ifdef SCOPE - ADDR_SCP_0 = 8'h34, - ADDR_SCP_1 = 8'h38, - //ADDR_SCP_CTRL = 8'h3C, + ADDR_SCP_0 = 8'h28, + ADDR_SCP_1 = 8'h2C, `endif - ADDR_MEM_0 = 8'h40, - ADDR_MEM_1 = 8'h44, - //ADDR_MEM_CTRL = 8'h48, - ADDR_BITS = 8; localparam - WSTATE_IDLE = 2'd0, + WSTATE_ADDR = 2'd0, WSTATE_DATA = 2'd1, - WSTATE_RESP = 2'd2; + WSTATE_RESP = 2'd2, + WSTATE_WIDTH = 2; localparam - RSTATE_IDLE = 2'd0, - RSTATE_DATA = 2'd1; + RSTATE_ADDR = 2'd0, + RSTATE_DATA = 2'd1, + RSTATE_RESP = 2'd2, + RSTATE_WIDTH = 2; // device caps - wire [63:0] dev_caps = {16'b0, + wire [63:0] dev_caps = {8'b0, + 5'(`PLATFORM_MEMORY_ADDR_WIDTH-20), + 3'(`CLOG2(`PLATFORM_MEMORY_BANKS)), 8'(`LMEM_ENABLED ? `LMEM_LOG_SIZE : 0), 16'(`NUM_CORES * `NUM_CLUSTERS), 8'(`NUM_WARPS), @@ -153,16 +146,18 @@ module VX_afu_ctrl #( 2'(`CLOG2(`XLEN)-4), 30'(`MISA_STD)}; - reg [1:0] wstate; + reg [WSTATE_WIDTH-1:0] wstate; reg [ADDR_BITS-1:0] waddr; wire [31:0] wmask; wire s_axi_aw_fire; wire s_axi_w_fire; + wire s_axi_b_fire; - reg [1:0] rstate; + logic [RSTATE_WIDTH-1:0] rstate; reg [31:0] rdata; - wire [ADDR_BITS-1:0] raddr; + reg [ADDR_BITS-1:0] raddr; wire s_axi_ar_fire; + wire s_axi_r_fire; reg ap_reset_r; reg ap_start_r; @@ -170,20 +165,23 @@ module VX_afu_ctrl #( reg gie_r; reg [1:0] ier_r; reg [1:0] isr_r; - reg [63:0] mem_r [AXI_NUM_BANKS]; reg [31:0] dcra_r; reg [31:0] dcrv_r; reg dcr_wr_valid_r; + logic wready_stall; + logic rvalid_stall; + `ifdef SCOPE - reg [63:0] scope_bus_wdata; - reg [63:0] scope_bus_rdata; + reg [63:0] scope_bus_wdata, scope_bus_rdata; reg [5:0] scope_bus_ctr; - reg cmd_scope_reading; - reg cmd_scope_writing; + reg cmd_scope_writing, cmd_scope_reading; reg scope_bus_out_r; + reg scope_rdata_valid; + + reg is_scope_waddr, is_scope_raddr; always @(posedge clk) begin if (reset) begin @@ -191,18 +189,33 @@ module VX_afu_ctrl #( cmd_scope_writing <= 0; scope_bus_ctr <= '0; scope_bus_out_r <= 0; - end else if (clk_en) begin + is_scope_waddr <= 0; + is_scope_raddr <= 0; + scope_bus_rdata <= '0; + scope_rdata_valid <= 0; + end else begin + scope_bus_out_r <= 0; + if (s_axi_aw_fire) begin + is_scope_waddr <= (s_axi_awaddr[ADDR_BITS-1:0] == ADDR_SCP_0) + || (s_axi_awaddr[ADDR_BITS-1:0] == ADDR_SCP_1); + end + if (s_axi_ar_fire) begin + is_scope_raddr <= (s_axi_araddr[ADDR_BITS-1:0] == ADDR_SCP_0) + || (s_axi_araddr[ADDR_BITS-1:0] == ADDR_SCP_1); + end if (s_axi_w_fire && waddr == ADDR_SCP_0) begin scope_bus_wdata[31:0] <= (s_axi_wdata & wmask) | (scope_bus_wdata[31:0] & ~wmask); end if (s_axi_w_fire && waddr == ADDR_SCP_1) begin scope_bus_wdata[63:32] <= (s_axi_wdata & wmask) | (scope_bus_wdata[63:32] & ~wmask); cmd_scope_writing <= 1; + scope_rdata_valid <= 0; scope_bus_out_r <= 1; scope_bus_ctr <= 63; end if (scope_bus_in) begin cmd_scope_reading <= 1; + scope_bus_rdata <= '0; scope_bus_ctr <= 63; end if (cmd_scope_reading) begin @@ -210,13 +223,16 @@ module VX_afu_ctrl #( scope_bus_ctr <= scope_bus_ctr - 1; if (scope_bus_ctr == 0) begin cmd_scope_reading <= 0; + scope_rdata_valid <= 1; + scope_bus_ctr <= 0; end end if (cmd_scope_writing) begin - scope_bus_out_r <= 1'(scope_bus_wdata >> scope_bus_ctr); + scope_bus_out_r <= scope_bus_wdata[scope_bus_ctr]; scope_bus_ctr <= scope_bus_ctr - 1; if (scope_bus_ctr == 0) begin cmd_scope_writing <= 0; + scope_bus_ctr <= 0; end end end @@ -224,41 +240,50 @@ module VX_afu_ctrl #( assign scope_bus_out = scope_bus_out_r; + assign wready_stall = is_scope_waddr && cmd_scope_writing; + assign rvalid_stall = is_scope_raddr && ~scope_rdata_valid; + +`else + + assign wready_stall = 0; + assign rvalid_stall = 0; + `endif - // AXI Write + // AXI Write Request + assign s_axi_awready = (wstate == WSTATE_ADDR); + assign s_axi_wready = (wstate == WSTATE_DATA) && ~wready_stall; - assign s_axi_awready = (wstate == WSTATE_IDLE); - assign s_axi_wready = (wstate == WSTATE_DATA); + // AXI Write Response assign s_axi_bvalid = (wstate == WSTATE_RESP); assign s_axi_bresp = 2'b00; // OKAY - assign s_axi_aw_fire = s_axi_awvalid && s_axi_awready; - assign s_axi_w_fire = s_axi_wvalid && s_axi_wready; - - for (genvar i = 0; i < 4; ++i) begin + for (genvar i = 0; i < 4; ++i) begin : g_wmask assign wmask[8 * i +: 8] = {8{s_axi_wstrb[i]}}; end + assign s_axi_aw_fire = s_axi_awvalid && s_axi_awready; + assign s_axi_w_fire = s_axi_wvalid && s_axi_wready; + assign s_axi_b_fire = s_axi_bvalid && s_axi_bready; + // wstate always @(posedge clk) begin if (reset) begin - wstate <= WSTATE_IDLE; - end else if (clk_en) begin + wstate <= WSTATE_ADDR; + end else begin case (wstate) - WSTATE_IDLE: wstate <= s_axi_awvalid ? WSTATE_DATA : WSTATE_IDLE; - WSTATE_DATA: wstate <= s_axi_wvalid ? WSTATE_RESP : WSTATE_DATA; - WSTATE_RESP: wstate <= s_axi_bready ? WSTATE_IDLE : WSTATE_RESP; - default: wstate <= WSTATE_IDLE; + WSTATE_ADDR: wstate <= s_axi_aw_fire ? WSTATE_DATA : WSTATE_ADDR; + WSTATE_DATA: wstate <= s_axi_w_fire ? WSTATE_RESP : WSTATE_DATA; + WSTATE_RESP: wstate <= s_axi_b_fire ? WSTATE_ADDR : WSTATE_RESP; + default: wstate <= WSTATE_ADDR; endcase end end // waddr always @(posedge clk) begin - if (clk_en) begin - if (s_axi_aw_fire) - waddr <= s_axi_awaddr[ADDR_BITS-1:0]; + if (s_axi_aw_fire) begin + waddr <= s_axi_awaddr[ADDR_BITS-1:0]; end end @@ -276,16 +301,13 @@ module VX_afu_ctrl #( dcra_r <= '0; dcrv_r <= '0; dcr_wr_valid_r <= 0; + end else begin + dcr_wr_valid_r <= 0; + ap_reset_r <= 0; - for (integer i = 0; i < AXI_NUM_BANKS; ++i) begin - mem_r[i] <= '0; - end - end else if (clk_en) begin if (ap_ready) ap_start_r <= auto_restart_r; - dcr_wr_valid_r <= 0; - if (s_axi_w_fire) begin case (waddr) ADDR_AP_CTRL: begin @@ -317,16 +339,7 @@ module VX_afu_ctrl #( dcrv_r <= (s_axi_wdata & wmask) | (dcrv_r & ~wmask); dcr_wr_valid_r <= 1; end - default: begin - for (integer i = 0; i < AXI_NUM_BANKS; ++i) begin - if (waddr == (ADDR_MEM_0 + 8'(i) * 8'd12)) begin - mem_r[i][31:0] <= (s_axi_wdata & wmask) | (mem_r[i][31:0] & ~wmask); - end - if (waddr == (ADDR_MEM_1 + 8'(i) * 8'd12)) begin - mem_r[i][63:32] <= (s_axi_wdata & wmask) | (mem_r[i][63:32] & ~wmask); - end - end - end + default:; endcase if (ier_r[0] & ap_done) @@ -337,83 +350,86 @@ module VX_afu_ctrl #( end end - // AXI Read + // AXI Read Request + assign s_axi_arready = (rstate == RSTATE_ADDR); - assign s_axi_arready = (rstate == RSTATE_IDLE); - assign s_axi_rvalid = (rstate == RSTATE_DATA); + // AXI Read Response + assign s_axi_rvalid = (rstate == RSTATE_RESP); assign s_axi_rdata = rdata; assign s_axi_rresp = 2'b00; // OKAY assign s_axi_ar_fire = s_axi_arvalid && s_axi_arready; - assign raddr = s_axi_araddr[ADDR_BITS-1:0]; + assign s_axi_r_fire = s_axi_rvalid && s_axi_rready; // rstate always @(posedge clk) begin if (reset) begin - rstate <= RSTATE_IDLE; - end else if (clk_en) begin + rstate <= RSTATE_ADDR; + end else begin case (rstate) - RSTATE_IDLE: rstate <= s_axi_arvalid ? RSTATE_DATA : RSTATE_IDLE; - RSTATE_DATA: rstate <= (s_axi_rready & s_axi_rvalid) ? RSTATE_IDLE : RSTATE_DATA; - default: rstate <= RSTATE_IDLE; + RSTATE_ADDR: rstate <= s_axi_ar_fire ? RSTATE_DATA : RSTATE_ADDR; + RSTATE_DATA: rstate <= (~rvalid_stall) ? RSTATE_RESP : RSTATE_DATA; + RSTATE_RESP: rstate <= s_axi_r_fire ? RSTATE_ADDR : RSTATE_RESP; + default: rstate <= RSTATE_ADDR; endcase end end + // raddr + always @(posedge clk) begin + if (s_axi_ar_fire) begin + raddr <= s_axi_araddr[ADDR_BITS-1:0]; + end + end + // rdata always @(posedge clk) begin - if (clk_en) begin - if (s_axi_ar_fire) begin - rdata <= '0; - case (raddr) - ADDR_AP_CTRL: begin - rdata[0] <= ap_start_r; - rdata[1] <= ap_done; - rdata[2] <= ap_idle; - rdata[3] <= ap_ready; - rdata[7] <= auto_restart_r; - end - ADDR_GIE: begin - rdata <= 32'(gie_r); - end - ADDR_IER: begin - rdata <= 32'(ier_r); - end - ADDR_ISR: begin - rdata <= 32'(isr_r); - end - ADDR_DEV_0: begin - rdata <= dev_caps[31:0]; - end - ADDR_DEV_1: begin - rdata <= dev_caps[63:32]; - end - ADDR_ISA_0: begin - rdata <= isa_caps[31:0]; - end - ADDR_ISA_1: begin - rdata <= isa_caps[63:32]; - end - `ifdef SCOPE - ADDR_SCP_0: begin - rdata <= scope_bus_rdata[31:0]; - end - ADDR_SCP_1: begin - rdata <= scope_bus_rdata[63:32]; - end - `endif - default:; - endcase + rdata <= '0; + case (raddr) + ADDR_AP_CTRL: begin + rdata[0] <= ap_start_r; + rdata[1] <= ap_done; + rdata[2] <= ap_idle; + rdata[3] <= ap_ready; + rdata[7] <= auto_restart_r; end - end + ADDR_GIE: begin + rdata <= 32'(gie_r); + end + ADDR_IER: begin + rdata <= 32'(ier_r); + end + ADDR_ISR: begin + rdata <= 32'(isr_r); + end + ADDR_DEV_0: begin + rdata <= dev_caps[31:0]; + end + ADDR_DEV_1: begin + rdata <= dev_caps[63:32]; + end + ADDR_ISA_0: begin + rdata <= isa_caps[31:0]; + end + ADDR_ISA_1: begin + rdata <= isa_caps[63:32]; + end + `ifdef SCOPE + ADDR_SCP_0: begin + rdata <= scope_bus_rdata[31:0]; + end + ADDR_SCP_1: begin + rdata <= scope_bus_rdata[63:32]; + end + `endif + default:; + endcase end assign ap_reset = ap_reset_r; assign ap_start = ap_start_r; assign interrupt = gie_r & (| isr_r); - assign mem_base = mem_r; - assign dcr_wr_valid = dcr_wr_valid_r; assign dcr_wr_addr = `VX_DCR_ADDR_WIDTH'(dcra_r); assign dcr_wr_data = `VX_DCR_DATA_WIDTH'(dcrv_r); diff --git a/hw/rtl/afu/xrt/VX_afu_wrap.sv b/hw/rtl/afu/xrt/VX_afu_wrap.sv index a844802e9..2b1bfb7c2 100644 --- a/hw/rtl/afu/xrt/VX_afu_wrap.sv +++ b/hw/rtl/afu/xrt/VX_afu_wrap.sv @@ -16,17 +16,21 @@ module VX_afu_wrap #( parameter C_S_AXI_CTRL_ADDR_WIDTH = 8, parameter C_S_AXI_CTRL_DATA_WIDTH = 32, - parameter C_M_AXI_MEM_ID_WIDTH = `M_AXI_MEM_ID_WIDTH, - parameter C_M_AXI_MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH, - parameter C_M_AXI_MEM_DATA_WIDTH = `VX_MEM_DATA_WIDTH + parameter C_M_AXI_MEM_ID_WIDTH = 32, + parameter C_M_AXI_MEM_DATA_WIDTH = 512, + parameter C_M_AXI_MEM_ADDR_WIDTH = 25, + parameter C_M_AXI_MEM_NUM_BANKS = 2 ) ( // System signals - input wire ap_clk, - input wire ap_rst_n, + input wire clk, + input wire reset, // AXI4 master interface - `REPEAT (`M_AXI_MEM_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA), - +`ifdef PLATFORM_MERGED_MEMORY_INTERFACE + `REPEAT (1, GEN_AXI_MEM, REPEAT_COMMA), +`else + `REPEAT (`PLATFORM_MEMORY_BANKS, GEN_AXI_MEM, REPEAT_COMMA), +`endif // AXI4-Lite slave interface input wire s_axi_ctrl_awvalid, output wire s_axi_ctrl_awready, @@ -48,11 +52,18 @@ module VX_afu_wrap #( output wire interrupt ); - localparam C_M_AXI_MEM_NUM_BANKS = `M_AXI_MEM_NUM_BANKS; +`ifdef PLATFORM_MERGED_MEMORY_INTERFACE + localparam M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH + $clog2(`PLATFORM_MEMORY_BANKS); +`else + localparam M_AXI_MEM_ADDR_WIDTH = `PLATFORM_MEMORY_ADDR_WIDTH; +`endif localparam STATE_IDLE = 0; localparam STATE_RUN = 1; + localparam PENDING_SIZEW = 12; // max outstanding requests size + localparam C_M_AXI_MEM_NUM_BANKS_SW = `CLOG2(C_M_AXI_MEM_NUM_BANKS+1); + wire m_axi_mem_awvalid_a [C_M_AXI_MEM_NUM_BANKS]; wire m_axi_mem_awready_a [C_M_AXI_MEM_NUM_BANKS]; wire [C_M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_a [C_M_AXI_MEM_NUM_BANKS]; @@ -80,19 +91,18 @@ module VX_afu_wrap #( wire [1:0] m_axi_mem_rresp_a [C_M_AXI_MEM_NUM_BANKS]; // convert memory interface to array - `REPEAT (`M_AXI_MEM_NUM_BANKS, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON); - - wire reset = ~ap_rst_n; +`ifdef PLATFORM_MERGED_MEMORY_INTERFACE + `REPEAT (1, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON); +`else + `REPEAT (`PLATFORM_MEMORY_BANKS, AXI_MEM_TO_ARRAY, REPEAT_SEMICOLON); +`endif reg [`CLOG2(`RESET_DELAY+1)-1:0] vx_reset_ctr; - reg [15:0] vx_pending_writes; + reg [PENDING_SIZEW-1:0] vx_pending_writes; reg vx_busy_wait; - reg vx_running; - + reg vx_reset = 1; // asserted at initialization wire vx_busy; - wire [63:0] mem_base [C_M_AXI_MEM_NUM_BANKS]; - wire dcr_wr_valid; wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr; wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data; @@ -101,8 +111,8 @@ module VX_afu_wrap #( wire ap_reset; wire ap_start; - wire ap_idle = ~vx_running; - wire ap_done = ~(state == STATE_RUN || vx_pending_writes != 0); + wire ap_idle = vx_reset; + wire ap_done = (state == STATE_IDLE) && (vx_pending_writes == '0); wire ap_ready = 1'b1; `ifdef SCOPE @@ -111,24 +121,33 @@ module VX_afu_wrap #( wire scope_reset = reset; `endif - always @(posedge ap_clk) begin + always @(posedge clk) begin if (reset || ap_reset) begin - state <= STATE_IDLE; - vx_busy_wait <= 0; - vx_running <= 0; + state <= STATE_IDLE; + vx_reset <= 1; end else begin case (state) STATE_IDLE: begin if (ap_start) begin `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: STATE RUN\n", $time)); + `TRACE(2, ("%t: AFU: Goto STATE RUN\n", $time)) `endif state <= STATE_RUN; - vx_running <= 0; + vx_reset_ctr <= (`RESET_DELAY-1); + vx_reset <= 1; end end STATE_RUN: begin - if (vx_running) begin + if (vx_reset) begin + // wait until the reset network is ready + if (vx_reset_ctr == 0) begin + `ifdef DBG_TRACE_AFU + `TRACE(2, ("%t: AFU: Begin execution\n", $time)) + `endif + vx_busy_wait <= 1; + vx_reset <= 0; + end + end else begin if (vx_busy_wait) begin // wait until processor goes busy if (vx_busy) begin @@ -137,67 +156,63 @@ module VX_afu_wrap #( end else begin // wait until the processor is not busy if (~vx_busy) begin - state <= STATE_IDLE; `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: End execution\n", $time)); - `TRACE(2, ("%d: STATE IDLE\n", $time)); + `TRACE(2, ("%t: AFU: End execution\n", $time)) + `TRACE(2, ("%t: AFU: Goto STATE IDLE\n", $time)) `endif + state <= STATE_IDLE; end end - end else begin - // wait until the reset sequence is complete - if (vx_reset_ctr == (`RESET_DELAY-1)) begin - `ifdef DBG_TRACE_AFU - `TRACE(2, ("%d: AFU: Begin execution\n", $time)); - `endif - vx_running <= 1; - vx_busy_wait <= 1; - end end end endcase - end - end - - reg m_axi_mem_wfire; - reg m_axi_mem_bfire; - always @(*) begin - m_axi_mem_wfire = 0; - m_axi_mem_bfire = 0; - for (integer i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin - m_axi_mem_wfire |= m_axi_mem_wvalid_a[i] && m_axi_mem_wready_a[i]; - m_axi_mem_bfire |= m_axi_mem_bvalid_a[i] && m_axi_mem_bready_a[i]; + // ensure reset network initialization + if (vx_reset_ctr != '0) begin + vx_reset_ctr <= vx_reset_ctr - 1; + end end end - always @(posedge ap_clk) begin - if (reset || ap_reset) begin - vx_pending_writes <= '0; - end else begin - if (m_axi_mem_wfire && ~m_axi_mem_bfire) - vx_pending_writes <= vx_pending_writes + 1; - if (~m_axi_mem_wfire && m_axi_mem_bfire) - vx_pending_writes <= vx_pending_writes - 1; - end + wire [C_M_AXI_MEM_NUM_BANKS-1:0] m_axi_wr_req_fire, m_axi_wr_rsp_fire; + wire [C_M_AXI_MEM_NUM_BANKS_SW-1:0] cur_wr_reqs, cur_wr_rsps; + + for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin : g_awfire + VX_axi_write_ack axi_write_ack ( + .clk (clk), + .reset (reset), + .awvalid(m_axi_mem_awvalid_a[i]), + .awready(m_axi_mem_awready_a[i]), + .wvalid (m_axi_mem_wvalid_a[i]), + .wready (m_axi_mem_wready_a[i]), + .tx_ack (m_axi_wr_req_fire[i]), + `UNUSED_PIN (aw_ack), + `UNUSED_PIN (w_ack), + `UNUSED_PIN (tx_rdy) + ); + assign m_axi_wr_rsp_fire[i] = m_axi_mem_bvalid_a[i] & m_axi_mem_bready_a[i]; end - always @(posedge ap_clk) begin - if (state == STATE_RUN) begin - vx_reset_ctr <= vx_reset_ctr + 1; + `POP_COUNT(cur_wr_reqs, m_axi_wr_req_fire); + `POP_COUNT(cur_wr_rsps, m_axi_wr_rsp_fire); + + wire signed [C_M_AXI_MEM_NUM_BANKS_SW:0] reqs_sub = (C_M_AXI_MEM_NUM_BANKS_SW+1)'(cur_wr_reqs) - + (C_M_AXI_MEM_NUM_BANKS_SW+1)'(cur_wr_rsps); + + always @(posedge clk) begin + if (reset) begin + vx_pending_writes <= '0; end else begin - vx_reset_ctr <= '0; + vx_pending_writes <= vx_pending_writes + PENDING_SIZEW'(reqs_sub); end end VX_afu_ctrl #( - .AXI_ADDR_WIDTH (C_S_AXI_CTRL_ADDR_WIDTH), - .AXI_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH), - .AXI_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS) + .S_AXI_ADDR_WIDTH (C_S_AXI_CTRL_ADDR_WIDTH), + .S_AXI_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH) ) afu_ctrl ( - .clk (ap_clk), - .reset (reset || ap_reset), - .clk_en (1'b1), + .clk (clk), + .reset (reset), .s_axi_awvalid (s_axi_ctrl_awvalid), .s_axi_awready (s_axi_ctrl_awready), @@ -229,37 +244,36 @@ module VX_afu_wrap #( .scope_bus_out (scope_bus_in), `endif - .mem_base (mem_base), - .dcr_wr_valid (dcr_wr_valid), .dcr_wr_addr (dcr_wr_addr), .dcr_wr_data (dcr_wr_data) ); - wire [`MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_w [C_M_AXI_MEM_NUM_BANKS]; - wire [`MEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_w [C_M_AXI_MEM_NUM_BANKS]; + wire [M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_u [C_M_AXI_MEM_NUM_BANKS]; + wire [M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_u [C_M_AXI_MEM_NUM_BANKS]; - for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin - assign m_axi_mem_awaddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_awaddr_w[i]) + C_M_AXI_MEM_ADDR_WIDTH'(mem_base[i]); - assign m_axi_mem_araddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_araddr_w[i]) + C_M_AXI_MEM_ADDR_WIDTH'(mem_base[i]); + for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin : g_addressing + localparam [C_M_AXI_MEM_ADDR_WIDTH-1:0] BANK_OFFSET = C_M_AXI_MEM_ADDR_WIDTH'(`PLATFORM_MEMORY_OFFSET) + C_M_AXI_MEM_ADDR_WIDTH'(i) << M_AXI_MEM_ADDR_WIDTH; + assign m_axi_mem_awaddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_awaddr_u[i]) + BANK_OFFSET; + assign m_axi_mem_araddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_araddr_u[i]) + BANK_OFFSET; end - `SCOPE_IO_SWITCH (2) + `SCOPE_IO_SWITCH (2); Vortex_axi #( .AXI_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH), - .AXI_ADDR_WIDTH (`MEM_ADDR_WIDTH), + .AXI_ADDR_WIDTH (M_AXI_MEM_ADDR_WIDTH), .AXI_TID_WIDTH (C_M_AXI_MEM_ID_WIDTH), .AXI_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS) ) vortex_axi ( `SCOPE_IO_BIND (1) - .clk (ap_clk), - .reset (reset || ap_reset || ~vx_running), + .clk (clk), + .reset (vx_reset), .m_axi_awvalid (m_axi_mem_awvalid_a), .m_axi_awready (m_axi_mem_awready_a), - .m_axi_awaddr (m_axi_mem_awaddr_w), + .m_axi_awaddr (m_axi_mem_awaddr_u), .m_axi_awid (m_axi_mem_awid_a), .m_axi_awlen (m_axi_mem_awlen_a), `UNUSED_PIN (m_axi_awsize), @@ -283,7 +297,7 @@ module VX_afu_wrap #( .m_axi_arvalid (m_axi_mem_arvalid_a), .m_axi_arready (m_axi_mem_arready_a), - .m_axi_araddr (m_axi_mem_araddr_w), + .m_axi_araddr (m_axi_mem_araddr_u), .m_axi_arid (m_axi_mem_arid_a), .m_axi_arlen (m_axi_mem_arlen_a), `UNUSED_PIN (m_axi_arsize), @@ -310,38 +324,75 @@ module VX_afu_wrap #( // SCOPE ////////////////////////////////////////////////////////////////////// +`ifdef SCOPE `ifdef DBG_SCOPE_AFU - `define TRIGGERS { \ - reset, \ - ap_start, \ - ap_done, \ - ap_idle, \ - interrupt, \ - vx_busy_wait, \ - vx_busy, \ - vx_running \ - } - - `define PROBES { \ - vx_pending_writes \ - } - - VX_scope_tap #( - .SCOPE_ID (0), - .TRIGGERW ($bits(`TRIGGERS)), - .PROBEW ($bits(`PROBES)) - ) scope_tap ( - .clk (clk), - .reset (scope_reset_w[0]), - .start (1'b0), - .stop (1'b0), - .triggers (`TRIGGERS), - .probes (`PROBES), - .bus_in (scope_bus_in_w[0]), - .bus_out (scope_bus_out_w[0]) - ); + wire m_axi_mem_awfire_0 = m_axi_mem_awvalid_a[0] & m_axi_mem_awready_a[0]; + wire m_axi_mem_arfire_0 = m_axi_mem_arvalid_a[0] & m_axi_mem_arready_a[0]; + wire m_axi_mem_wfire_0 = m_axi_mem_wvalid_a[0] & m_axi_mem_wready_a[0]; + wire m_axi_mem_bfire_0 = m_axi_mem_bvalid_a[0] & m_axi_mem_bready_a[0]; + + `NEG_EDGE (reset_negedge, reset); + `SCOPE_TAP (0, 0, { + ap_reset, + ap_start, + ap_done, + ap_idle, + interrupt, + vx_reset, + vx_busy, + m_axi_mem_awvalid_a[0], + m_axi_mem_awready_a[0], + m_axi_mem_wvalid_a[0], + m_axi_mem_wready_a[0], + m_axi_mem_bvalid_a[0], + m_axi_mem_bready_a[0], + m_axi_mem_arvalid_a[0], + m_axi_mem_arready_a[0], + m_axi_mem_rvalid_a[0], + m_axi_mem_rready_a[0] + }, { + dcr_wr_valid, + m_axi_mem_awfire_0, + m_axi_mem_arfire_0, + m_axi_mem_wfire_0, + m_axi_mem_bfire_0 + },{ + dcr_wr_addr, + dcr_wr_data, + vx_pending_writes, + m_axi_mem_awaddr_u[0], + m_axi_mem_awid_a[0], + m_axi_mem_bid_a[0], + m_axi_mem_araddr_u[0], + m_axi_mem_arid_a[0], + m_axi_mem_rid_a[0] + }, + reset_negedge, 1'b0, 4096 + ); `else - `SCOPE_IO_UNUSED_W(0) + `SCOPE_IO_UNUSED(0) +`endif +`endif +`ifdef CHIPSCOPE + ila_afu ila_afu_inst ( + .clk (clk), + .probe0 ({ + ap_reset, + ap_start, + ap_done, + ap_idle, + interrupt + }), + .probe1 ({ + vx_pending_writes, + vx_busy_wait, + vx_busy, + vx_reset, + dcr_wr_valid, + dcr_wr_addr, + dcr_wr_data + }) + ); `endif `ifdef SIMULATION @@ -352,7 +403,7 @@ module VX_afu_wrap #( initial begin $assertoff(0, vortex_axi); end - always @(posedge ap_clk) begin + always @(posedge clk) begin if (reset) begin assert_delay_ctr <= '0; assert_enabled <= 0; @@ -371,19 +422,19 @@ module VX_afu_wrap #( `endif `ifdef DBG_TRACE_AFU - always @(posedge ap_clk) begin + always @(posedge clk) begin for (integer i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin if (m_axi_mem_awvalid_a[i] && m_axi_mem_awready_a[i]) begin - `TRACE(2, ("%d: AFU Wr Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_awaddr_a[i], m_axi_mem_awid_a[i])); + `TRACE(2, ("%t: AXI Wr Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_awaddr_a[i], m_axi_mem_awid_a[i])) end if (m_axi_mem_wvalid_a[i] && m_axi_mem_wready_a[i]) begin - `TRACE(2, ("%d: AFU Wr Req [%0d]: data=0x%h\n", $time, i, m_axi_mem_wdata_a[i])); + `TRACE(2, ("%t: AXI Wr Req [%0d]: data=0x%h\n", $time, i, m_axi_mem_wdata_a[i])) end if (m_axi_mem_arvalid_a[i] && m_axi_mem_arready_a[i]) begin - `TRACE(2, ("%d: AFU Rd Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_araddr_a[i], m_axi_mem_arid_a[i])); + `TRACE(2, ("%t: AXI Rd Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_araddr_a[i], m_axi_mem_arid_a[i])) end if (m_axi_mem_rvalid_a[i] && m_axi_mem_rready_a[i]) begin - `TRACE(2, ("%d: AVS Rd Rsp [%0d]: data=0x%h, tag=0x%0h\n", $time, i, m_axi_mem_rdata_a[i], m_axi_mem_rid_a[i])); + `TRACE(2, ("%t: AXI Rd Rsp [%0d]: data=0x%h, tag=0x%0h\n", $time, i, m_axi_mem_rdata_a[i], m_axi_mem_rid_a[i])) end end end diff --git a/hw/rtl/afu/xrt/vortex_afu.v b/hw/rtl/afu/xrt/vortex_afu.v index 2c31900cb..afda57f72 100644 --- a/hw/rtl/afu/xrt/vortex_afu.v +++ b/hw/rtl/afu/xrt/vortex_afu.v @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -16,16 +16,25 @@ module vortex_afu #( parameter C_S_AXI_CTRL_ADDR_WIDTH = 8, parameter C_S_AXI_CTRL_DATA_WIDTH = 32, - parameter C_M_AXI_MEM_ID_WIDTH = `M_AXI_MEM_ID_WIDTH, + parameter C_M_AXI_MEM_ID_WIDTH = `PLATFORM_MEMORY_ID_WIDTH, + parameter C_M_AXI_MEM_DATA_WIDTH = `PLATFORM_MEMORY_DATA_WIDTH, parameter C_M_AXI_MEM_ADDR_WIDTH = 64, - parameter C_M_AXI_MEM_DATA_WIDTH = `VX_MEM_DATA_WIDTH +`ifdef PLATFORM_MERGED_MEMORY_INTERFACE + parameter C_M_AXI_MEM_NUM_BANKS = 1 +`else + parameter C_M_AXI_MEM_NUM_BANKS = `PLATFORM_MEMORY_BANKS +`endif ) ( // System signals input wire ap_clk, input wire ap_rst_n, - + // AXI4 master interface - `REPEAT (`M_AXI_MEM_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA), +`ifdef PLATFORM_MERGED_MEMORY_INTERFACE + `REPEAT (1, GEN_AXI_MEM, REPEAT_COMMA), +`else + `REPEAT (`PLATFORM_MEMORY_BANKS, GEN_AXI_MEM, REPEAT_COMMA), +`endif // AXI4-Lite slave interface input wire s_axi_ctrl_awvalid, @@ -45,8 +54,8 @@ module vortex_afu #( output wire s_axi_ctrl_bvalid, input wire s_axi_ctrl_bready, output wire [1:0] s_axi_ctrl_bresp, - - output wire interrupt + + output wire interrupt ); VX_afu_wrap #( @@ -54,16 +63,19 @@ module vortex_afu #( .C_S_AXI_CTRL_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH), .C_M_AXI_MEM_ID_WIDTH (C_M_AXI_MEM_ID_WIDTH), .C_M_AXI_MEM_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH), - .C_M_AXI_MEM_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH) + .C_M_AXI_MEM_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH), + .C_M_AXI_MEM_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS) ) afu_wrap ( - .ap_clk (ap_clk), - .ap_rst_n (ap_rst_n), - - `REPEAT (`M_AXI_MEM_NUM_BANKS, AXI_MEM_ARGS, REPEAT_COMMA), - + .clk (ap_clk), + .reset (~ap_rst_n), + `ifdef PLATFORM_MERGED_MEMORY_INTERFACE + `REPEAT (1, AXI_MEM_ARGS, REPEAT_COMMA), + `else + `REPEAT (`PLATFORM_MEMORY_BANKS, AXI_MEM_ARGS, REPEAT_COMMA), + `endif .s_axi_ctrl_awvalid (s_axi_ctrl_awvalid), .s_axi_ctrl_awready (s_axi_ctrl_awready), - .s_axi_ctrl_awaddr (s_axi_ctrl_awaddr), + .s_axi_ctrl_awaddr (s_axi_ctrl_awaddr), .s_axi_ctrl_wvalid (s_axi_ctrl_wvalid), .s_axi_ctrl_wready (s_axi_ctrl_wready), .s_axi_ctrl_wdata (s_axi_ctrl_wdata), @@ -81,5 +93,5 @@ module vortex_afu #( .interrupt (interrupt) ); - + endmodule diff --git a/hw/rtl/afu/xrt/vortex_afu.vh b/hw/rtl/afu/xrt/vortex_afu.vh index 3616b0794..8018171e7 100644 --- a/hw/rtl/afu/xrt/vortex_afu.vh +++ b/hw/rtl/afu/xrt/vortex_afu.vh @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -14,12 +14,24 @@ `ifndef VORTEX_AFU_VH `define VORTEX_AFU_VH -`ifndef M_AXI_MEM_NUM_BANKS -`define M_AXI_MEM_NUM_BANKS 1 +`ifndef PLATFORM_MEMORY_BANKS +`define PLATFORM_MEMORY_BANKS 2 `endif -`ifndef M_AXI_MEM_ID_WIDTH -`define M_AXI_MEM_ID_WIDTH 32 +`ifndef PLATFORM_MEMORY_ADDR_WIDTH +`define PLATFORM_MEMORY_ADDR_WIDTH 31 +`endif + +`ifndef PLATFORM_MEMORY_DATA_WIDTH +`define PLATFORM_MEMORY_DATA_WIDTH 512 +`endif + +`ifndef PLATFORM_MEMORY_OFFSET +`define PLATFORM_MEMORY_OFFSET 0 +`endif + +`ifndef PLATFORM_MEMORY_ID_WIDTH +`define PLATFORM_MEMORY_ID_WIDTH 32 `endif `define GEN_AXI_MEM(i) \ diff --git a/hw/rtl/cache/VX_bank_flush.sv b/hw/rtl/cache/VX_bank_flush.sv index 6c02c1e13..a01ae0e0b 100644 --- a/hw/rtl/cache/VX_bank_flush.sv +++ b/hw/rtl/cache/VX_bank_flush.sv @@ -113,14 +113,16 @@ module VX_bank_flush #( assign flush_valid = (state_r == STATE_FLUSH); assign flush_line = counter_r[`CS_LINE_SEL_BITS-1:0]; - if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin - reg [NUM_WAYS-1:0] flush_way_r; - always @(*) begin - flush_way_r = '0; - flush_way_r[counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]] = 1; - end - assign flush_way = flush_way_r; - end else begin + if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin : g_flush_way + VX_decoder #( + .N (`CS_WAY_SEL_BITS), + .D (NUM_WAYS) + ) ctr_decoder ( + .data_in (counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]), + .valid_in (1'b1), + .data_out (flush_way) + ); + end else begin : g_flush_way_all assign flush_way = {NUM_WAYS{1'b1}}; end diff --git a/hw/rtl/cache/VX_cache.sv b/hw/rtl/cache/VX_cache.sv index ae0747690..06887944c 100644 --- a/hw/rtl/cache/VX_cache.sv +++ b/hw/rtl/cache/VX_cache.sv @@ -83,7 +83,7 @@ module VX_cache import VX_gpu_pkg::*; #( localparam REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS); localparam WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS); localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE); - localparam MEM_TAG_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS; + localparam MEM_TAG_WIDTH = `CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, UUID_WIDTH); localparam WORDS_PER_LINE = LINE_SIZE / WORD_SIZE; localparam WORD_WIDTH = WORD_SIZE * 8; localparam WORD_SEL_BITS = `CLOG2(WORDS_PER_LINE); @@ -92,9 +92,10 @@ module VX_cache import VX_gpu_pkg::*; #( localparam LINE_ADDR_WIDTH = (`CS_WORD_ADDR_WIDTH - BANK_SEL_BITS - WORD_SEL_BITS); localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH + 1; localparam CORE_RSP_DATAW = WORD_WIDTH + TAG_WIDTH; + localparam BANK_MEM_TAG_WIDTH = UUID_WIDTH + MSHR_ADDR_WIDTH; - localparam CORE_REQ_BUF_ENABLE = (NUM_BANKS != 1) || (NUM_REQS != 1); - localparam MEM_REQ_BUF_ENABLE = (NUM_BANKS != 1); + localparam CORE_RSP_REG_DISABLE = (NUM_BANKS != 1) || (NUM_REQS != 1); + localparam MEM_REQ_REG_DISABLE = (NUM_BANKS != 1); localparam REQ_XBAR_BUF = (NUM_REQS > 4) ? 2 : 0; @@ -110,6 +111,7 @@ module VX_cache import VX_gpu_pkg::*; #( ) core_bus2_if[NUM_REQS](); wire [NUM_BANKS-1:0] per_bank_flush_begin; + wire [`UP(UUID_WIDTH)-1:0] flush_uuid; wire [NUM_BANKS-1:0] per_bank_flush_end; wire [NUM_BANKS-1:0] per_bank_core_req_fire; @@ -117,6 +119,8 @@ module VX_cache import VX_gpu_pkg::*; #( VX_cache_flush #( .NUM_REQS (NUM_REQS), .NUM_BANKS (NUM_BANKS), + .UUID_WIDTH(UUID_WIDTH), + .TAG_WIDTH (TAG_WIDTH), .BANK_SEL_LATENCY (`TO_OUT_BUF_REG(REQ_XBAR_BUF)) // bank xbar latency ) flush_unit ( .clk (clk), @@ -125,6 +129,7 @@ module VX_cache import VX_gpu_pkg::*; #( .core_bus_out_if (core_bus2_if), .bank_req_fire (per_bank_core_req_fire), .flush_begin (per_bank_flush_begin), + .flush_uuid (flush_uuid), .flush_end (per_bank_flush_end) ); @@ -136,17 +141,14 @@ module VX_cache import VX_gpu_pkg::*; #( wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_rsp_tag_s; wire [NUM_REQS-1:0] core_rsp_ready_s; - `RESET_RELAY_EX (core_rsp_reset, reset, NUM_REQS, `MAX_FANOUT); - - for (genvar i = 0; i < NUM_REQS; ++i) begin - + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_buf VX_elastic_buffer #( .DATAW (`CS_WORD_WIDTH + TAG_WIDTH), - .SIZE (CORE_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0), + .SIZE (CORE_RSP_REG_DISABLE ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0), .OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF)) ) core_rsp_buf ( .clk (clk), - .reset (core_rsp_reset[i]), + .reset (reset), .valid_in (core_rsp_valid_s[i]), .ready_in (core_rsp_ready_s[i]), .data_in ({core_rsp_data_s[i], core_rsp_tag_s[i]}), @@ -158,38 +160,13 @@ module VX_cache import VX_gpu_pkg::*; #( /////////////////////////////////////////////////////////////////////////// - // Memory request buffering - wire mem_req_valid_s; - wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_s; - wire mem_req_rw_s; - wire [LINE_SIZE-1:0] mem_req_byteen_s; - wire [`CS_LINE_WIDTH-1:0] mem_req_data_s; - wire [MEM_TAG_WIDTH-1:0] mem_req_tag_s; - wire mem_req_flush_s; - wire mem_req_ready_s; - - wire mem_bus_if_flush; - - VX_elastic_buffer #( - .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1), - .SIZE (MEM_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0), - .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) - ) mem_req_buf ( - .clk (clk), - .reset (reset), - .valid_in (mem_req_valid_s), - .ready_in (mem_req_ready_s), - .data_in ({mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_data_s, mem_req_tag_s, mem_req_flush_s}), - .data_out ({mem_bus_if.req_data.rw, mem_bus_if.req_data.byteen, mem_bus_if.req_data.addr, mem_bus_if.req_data.data, mem_bus_if.req_data.tag, mem_bus_if_flush}), - .valid_out (mem_bus_if.req_valid), - .ready_out (mem_bus_if.req_ready) - ); - - assign mem_bus_if.req_data.atype = mem_bus_if_flush ? `ADDR_TYPE_WIDTH'(1 << `ADDR_TYPE_FLUSH) : '0; - - /////////////////////////////////////////////////////////////////////////// + VX_mem_bus_if #( + .DATA_SIZE (LINE_SIZE), + .TAG_WIDTH (MEM_TAG_WIDTH) + ) mem_bus_tmp_if(); // Memory response buffering + wire mem_rsp_valid_s; wire [`CS_LINE_WIDTH-1:0] mem_rsp_data_s; wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_s; @@ -202,14 +179,61 @@ module VX_cache import VX_gpu_pkg::*; #( ) mem_rsp_queue ( .clk (clk), .reset (reset), - .valid_in (mem_bus_if.rsp_valid), - .ready_in (mem_bus_if.rsp_ready), - .data_in ({mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data}), + .valid_in (mem_bus_tmp_if.rsp_valid), + .ready_in (mem_bus_tmp_if.rsp_ready), + .data_in ({mem_bus_tmp_if.rsp_data.tag, mem_bus_tmp_if.rsp_data.data}), .data_out ({mem_rsp_tag_s, mem_rsp_data_s}), .valid_out (mem_rsp_valid_s), .ready_out (mem_rsp_ready_s) ); + wire [BANK_MEM_TAG_WIDTH-1:0] bank_mem_rsp_tag; + wire [`UP(`CS_BANK_SEL_BITS)-1:0] mem_rsp_bank_id; + + if (NUM_BANKS > 1) begin : g_mem_rsp_tag_s_with_banks + assign bank_mem_rsp_tag = mem_rsp_tag_s[MEM_TAG_WIDTH-1:`CS_BANK_SEL_BITS]; + assign mem_rsp_bank_id = mem_rsp_tag_s[`CS_BANK_SEL_BITS-1:0]; + end else begin : g_mem_rsp_tag_s_no_bank + assign bank_mem_rsp_tag = mem_rsp_tag_s; + assign mem_rsp_bank_id = 0; + end + + // Memory request buffering + + wire mem_req_valid; + wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr; + wire mem_req_rw; + wire [LINE_SIZE-1:0] mem_req_byteen; + wire [`CS_LINE_WIDTH-1:0] mem_req_data; + wire [MEM_TAG_WIDTH-1:0] mem_req_tag; + wire mem_req_flush; + wire mem_req_ready; + + wire mem_req_flush_b; + + VX_elastic_buffer #( + .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1), + .SIZE (MEM_REQ_REG_DISABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0), + .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) + ) mem_req_buf ( + .clk (clk), + .reset (reset), + .valid_in (mem_req_valid), + .ready_in (mem_req_ready), + .data_in ({mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_data, mem_req_tag, mem_req_flush}), + .data_out ({mem_bus_tmp_if.req_data.rw, mem_bus_tmp_if.req_data.byteen, mem_bus_tmp_if.req_data.addr, mem_bus_tmp_if.req_data.data, mem_bus_tmp_if.req_data.tag, mem_req_flush_b}), + .valid_out (mem_bus_tmp_if.req_valid), + .ready_out (mem_bus_tmp_if.req_ready) + ); + + assign mem_bus_tmp_if.req_data.flags = mem_req_flush_b ? `MEM_REQ_FLAGS_WIDTH'(1 << `MEM_REQ_FLAG_FLUSH) : '0; + + if (WRITE_ENABLE) begin : g_mem_bus_if + `ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if); + end else begin : g_mem_bus_if_ro + `ASSIGN_VX_MEM_BUS_RO_IF (mem_bus_if, mem_bus_tmp_if); + end + /////////////////////////////////////////////////////////////////////////// wire [NUM_BANKS-1:0] per_bank_core_req_valid; @@ -234,7 +258,7 @@ module VX_cache import VX_gpu_pkg::*; #( wire [NUM_BANKS-1:0] per_bank_mem_req_rw; wire [NUM_BANKS-1:0][LINE_SIZE-1:0] per_bank_mem_req_byteen; wire [NUM_BANKS-1:0][`CS_LINE_WIDTH-1:0] per_bank_mem_req_data; - wire [NUM_BANKS-1:0][MSHR_ADDR_WIDTH-1:0] per_bank_mem_req_id; + wire [NUM_BANKS-1:0][BANK_MEM_TAG_WIDTH-1:0] per_bank_mem_req_tag; wire [NUM_BANKS-1:0] per_bank_mem_req_flush; wire [NUM_BANKS-1:0] per_bank_mem_req_ready; @@ -242,11 +266,7 @@ module VX_cache import VX_gpu_pkg::*; #( assign per_bank_core_req_fire = per_bank_core_req_valid & per_bank_mem_req_ready; - if (NUM_BANKS == 1) begin - assign mem_rsp_ready_s = per_bank_mem_rsp_ready; - end else begin - assign mem_rsp_ready_s = per_bank_mem_rsp_ready[`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s)]; - end + assign mem_rsp_ready_s = per_bank_mem_rsp_ready[mem_rsp_bank_id]; // Bank requests dispatch @@ -266,35 +286,38 @@ module VX_cache import VX_gpu_pkg::*; #( wire [NUM_REQS-1:0][CORE_REQ_DATAW-1:0] core_req_data_in; wire [NUM_BANKS-1:0][CORE_REQ_DATAW-1:0] core_req_data_out; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req assign core_req_valid[i] = core_bus2_if[i].req_valid; assign core_req_rw[i] = core_bus2_if[i].req_data.rw; assign core_req_byteen[i] = core_bus2_if[i].req_data.byteen; assign core_req_addr[i] = core_bus2_if[i].req_data.addr; assign core_req_data[i] = core_bus2_if[i].req_data.data; assign core_req_tag[i] = core_bus2_if[i].req_data.tag; - assign core_req_flush[i] = core_bus2_if[i].req_data.atype[`ADDR_TYPE_FLUSH]; + assign core_req_flush[i] = core_bus2_if[i].req_data.flags[`MEM_REQ_FLAG_FLUSH]; assign core_bus2_if[i].req_ready = core_req_ready[i]; end - for (genvar i = 0; i < NUM_REQS; ++i) begin - if (WORDS_PER_LINE > 1) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req_wsel + if (WORDS_PER_LINE > 1) begin : g_wsel assign core_req_wsel[i] = core_req_addr[i][0 +: WORD_SEL_BITS]; - end else begin + end else begin : g_no_wsel assign core_req_wsel[i] = '0; end + end + + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req_line_addr assign core_req_line_addr[i] = core_req_addr[i][(BANK_SEL_BITS + WORD_SEL_BITS) +: LINE_ADDR_WIDTH]; end - if (NUM_BANKS > 1) begin - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req_bid + if (NUM_BANKS > 1) begin : g_multibanks assign core_req_bid[i] = core_req_addr[i][WORD_SEL_BITS +: BANK_SEL_BITS]; + end else begin : g_singlebank + assign core_req_bid[i] = '0; end - end else begin - assign core_req_bid = '0; end - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req_data_in assign core_req_data_in[i] = { core_req_line_addr[i], core_req_rw[i], @@ -310,18 +333,16 @@ module VX_cache import VX_gpu_pkg::*; #( wire [`PERF_CTR_BITS-1:0] perf_collisions; `endif - `RESET_RELAY (req_xbar_reset, reset); - VX_stream_xbar #( .NUM_INPUTS (NUM_REQS), .NUM_OUTPUTS (NUM_BANKS), .DATAW (CORE_REQ_DATAW), .PERF_CTR_BITS (`PERF_CTR_BITS), - .ARBITER ("F"), + .ARBITER ("R"), .OUT_BUF (REQ_XBAR_BUF) ) req_xbar ( .clk (clk), - .reset (req_xbar_reset), + .reset (reset), `ifdef PERF_ENABLE .collisions(perf_collisions), `else @@ -337,7 +358,7 @@ module VX_cache import VX_gpu_pkg::*; #( .ready_out (per_bank_core_req_ready) ); - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_core_req_data_out assign { per_bank_core_req_addr[i], per_bank_core_req_rw[i], @@ -350,17 +371,10 @@ module VX_cache import VX_gpu_pkg::*; #( end // Banks access - for (genvar bank_id = 0; bank_id < NUM_BANKS; ++bank_id) begin : banks + for (genvar bank_id = 0; bank_id < NUM_BANKS; ++bank_id) begin : g_banks wire [`CS_LINE_ADDR_WIDTH-1:0] curr_bank_mem_req_addr; - wire curr_bank_mem_rsp_valid; - - if (NUM_BANKS == 1) begin - assign curr_bank_mem_rsp_valid = mem_rsp_valid_s; - end else begin - assign curr_bank_mem_rsp_valid = mem_rsp_valid_s && (`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s) == bank_id); - end - `RESET_RELAY (bank_reset, reset); + wire curr_bank_mem_rsp_valid = mem_rsp_valid_s && (mem_rsp_bank_id == bank_id); VX_cache_bank #( .BANK_ID (bank_id), @@ -379,11 +393,11 @@ module VX_cache import VX_gpu_pkg::*; #( .WRITEBACK (WRITEBACK), .UUID_WIDTH (UUID_WIDTH), .TAG_WIDTH (TAG_WIDTH), - .CORE_OUT_BUF (CORE_REQ_BUF_ENABLE ? 0 : CORE_OUT_BUF), - .MEM_OUT_BUF (MEM_REQ_BUF_ENABLE ? 0 : MEM_OUT_BUF) + .CORE_OUT_REG (CORE_RSP_REG_DISABLE ? 0 : `TO_OUT_BUF_REG(CORE_OUT_BUF)), + .MEM_OUT_REG (MEM_REQ_REG_DISABLE ? 0 : `TO_OUT_BUF_REG(MEM_OUT_BUF)) ) bank ( .clk (clk), - .reset (bank_reset), + .reset (reset), `ifdef PERF_ENABLE .perf_read_misses (perf_read_miss_per_bank[bank_id]), @@ -416,23 +430,25 @@ module VX_cache import VX_gpu_pkg::*; #( .mem_req_rw (per_bank_mem_req_rw[bank_id]), .mem_req_byteen (per_bank_mem_req_byteen[bank_id]), .mem_req_data (per_bank_mem_req_data[bank_id]), - .mem_req_id (per_bank_mem_req_id[bank_id]), + .mem_req_tag (per_bank_mem_req_tag[bank_id]), .mem_req_flush (per_bank_mem_req_flush[bank_id]), .mem_req_ready (per_bank_mem_req_ready[bank_id]), // Memory response .mem_rsp_valid (curr_bank_mem_rsp_valid), .mem_rsp_data (mem_rsp_data_s), - .mem_rsp_id (`CS_MEM_TAG_TO_REQ_ID(mem_rsp_tag_s)), + .mem_rsp_tag (bank_mem_rsp_tag), .mem_rsp_ready (per_bank_mem_rsp_ready[bank_id]), + // Flush request .flush_begin (per_bank_flush_begin[bank_id]), + .flush_uuid (flush_uuid), .flush_end (per_bank_flush_end[bank_id]) ); - if (NUM_BANKS == 1) begin + if (NUM_BANKS == 1) begin : g_per_bank_mem_req_addr_multibanks assign per_bank_mem_req_addr[bank_id] = curr_bank_mem_req_addr; - end else begin + end else begin : g_per_bank_mem_req_addr_singlebank assign per_bank_mem_req_addr[bank_id] = `CS_LINE_TO_MEM_ADDR(curr_bank_mem_req_addr, bank_id); end end @@ -442,20 +458,18 @@ module VX_cache import VX_gpu_pkg::*; #( wire [NUM_BANKS-1:0][CORE_RSP_DATAW-1:0] core_rsp_data_in; wire [NUM_REQS-1:0][CORE_RSP_DATAW-1:0] core_rsp_data_out; - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_core_rsp_data_in assign core_rsp_data_in[i] = {per_bank_core_rsp_data[i], per_bank_core_rsp_tag[i]}; end - `RESET_RELAY (rsp_xbar_reset, reset); - VX_stream_xbar #( .NUM_INPUTS (NUM_BANKS), .NUM_OUTPUTS (NUM_REQS), .DATAW (CORE_RSP_DATAW), - .ARBITER ("F") + .ARBITER ("R") ) rsp_xbar ( .clk (clk), - .reset (rsp_xbar_reset), + .reset (reset), `UNUSED_PIN (collisions), .valid_in (per_bank_core_rsp_valid), .data_in (core_rsp_data_in), @@ -467,80 +481,48 @@ module VX_cache import VX_gpu_pkg::*; #( `UNUSED_PIN (sel_out) ); - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_data_s assign {core_rsp_data_s[i], core_rsp_tag_s[i]} = core_rsp_data_out[i]; end - /////////////////////////////////////////////////////////////////////////// - - wire mem_req_valid_p; - wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_p; - wire mem_req_rw_p; - wire [LINE_SIZE-1:0] mem_req_byteen_p; - wire [`CS_LINE_WIDTH-1:0] mem_req_data_p; - wire [MEM_TAG_WIDTH-1:0] mem_req_tag_p; - wire [MSHR_ADDR_WIDTH-1:0] mem_req_id_p; - wire mem_req_flush_p; - wire mem_req_ready_p; - // Memory request arbitration - wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + 1)-1:0] data_in; + wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + 1)-1:0] data_in; - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_data_in assign data_in[i] = { per_bank_mem_req_addr[i], per_bank_mem_req_rw[i], per_bank_mem_req_byteen[i], per_bank_mem_req_data[i], - per_bank_mem_req_id[i], + per_bank_mem_req_tag[i], per_bank_mem_req_flush[i] }; end + wire [BANK_MEM_TAG_WIDTH-1:0] bank_mem_req_tag; + VX_stream_arb #( .NUM_INPUTS (NUM_BANKS), - .DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + MSHR_ADDR_WIDTH + 1), - .ARBITER ("F") + .DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + 1), + .ARBITER ("R") ) mem_req_arb ( .clk (clk), .reset (reset), .valid_in (per_bank_mem_req_valid), .ready_in (per_bank_mem_req_ready), .data_in (data_in), - .data_out ({mem_req_addr_p, mem_req_rw_p, mem_req_byteen_p, mem_req_data_p, mem_req_id_p, mem_req_flush_p}), - .valid_out (mem_req_valid_p), - .ready_out (mem_req_ready_p), + .data_out ({mem_req_addr, mem_req_rw, mem_req_byteen, mem_req_data, bank_mem_req_tag, mem_req_flush}), + .valid_out (mem_req_valid), + .ready_out (mem_req_ready), `UNUSED_PIN (sel_out) ); - if (NUM_BANKS > 1) begin - wire [`CS_BANK_SEL_BITS-1:0] mem_req_bank_id = `CS_MEM_ADDR_TO_BANK_ID(mem_req_addr_p); - assign mem_req_tag_p = MEM_TAG_WIDTH'({mem_req_bank_id, mem_req_id_p}); - end else begin - assign mem_req_tag_p = MEM_TAG_WIDTH'(mem_req_id_p); - end - - // Memory request multi-port handling - - assign mem_req_valid_s = mem_req_valid_p; - assign mem_req_addr_s = mem_req_addr_p; - assign mem_req_tag_s = mem_req_tag_p; - assign mem_req_flush_s = mem_req_flush_p; - assign mem_req_ready_p = mem_req_ready_s; - - if (WRITE_ENABLE != 0) begin - assign mem_req_rw_s = mem_req_rw_p; - assign mem_req_byteen_s = mem_req_byteen_p; - assign mem_req_data_s = mem_req_data_p; - end else begin - `UNUSED_VAR (mem_req_byteen_p) - `UNUSED_VAR (mem_req_data_p) - `UNUSED_VAR (mem_req_rw_p) - - assign mem_req_rw_s = 0; - assign mem_req_byteen_s = {LINE_SIZE{1'b1}}; - assign mem_req_data_s = '0; + if (NUM_BANKS > 1) begin : g_mem_req_tag_multibanks + wire [`CS_BANK_SEL_BITS-1:0] mem_req_bank_id = `CS_MEM_ADDR_TO_BANK_ID(mem_req_addr); + assign mem_req_tag = MEM_TAG_WIDTH'({bank_mem_req_tag, mem_req_bank_id}); + end else begin : g_mem_req_tag + assign mem_req_tag = MEM_TAG_WIDTH'(bank_mem_req_tag); end `ifdef PERF_ENABLE @@ -567,7 +549,7 @@ module VX_cache import VX_gpu_pkg::*; #( `POP_COUNT(perf_mshr_stall_per_cycle, perf_mshr_stall_per_bank); wire [NUM_REQS-1:0] perf_crsp_stall_per_req; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_perf_crsp_stall_per_req assign perf_crsp_stall_per_req[i] = core_bus2_if[i].rsp_valid && ~core_bus2_if[i].rsp_ready; end diff --git a/hw/rtl/cache/VX_cache_bank.sv b/hw/rtl/cache/VX_cache_bank.sv index dbbb4aba3..054b7c589 100644 --- a/hw/rtl/cache/VX_cache_bank.sv +++ b/hw/rtl/cache/VX_cache_bank.sv @@ -53,13 +53,14 @@ module VX_cache_bank #( // core request tag size parameter TAG_WIDTH = UUID_WIDTH + 1, - // Core response output buffer - parameter CORE_OUT_BUF = 0, + // Core response output register + parameter CORE_OUT_REG = 0, - // Memory request output buffer - parameter MEM_OUT_BUF = 0, + // Memory request output register + parameter MEM_OUT_REG = 0, parameter MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE), + parameter MEM_TAG_WIDTH = UUID_WIDTH + MSHR_ADDR_WIDTH, parameter REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS), parameter WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS) ) ( @@ -97,18 +98,19 @@ module VX_cache_bank #( output wire mem_req_rw, output wire [LINE_SIZE-1:0] mem_req_byteen, output wire [`CS_LINE_WIDTH-1:0] mem_req_data, - output wire [MSHR_ADDR_WIDTH-1:0] mem_req_id, // index of the head entry in the mshr + output wire [MEM_TAG_WIDTH-1:0] mem_req_tag, output wire mem_req_flush, input wire mem_req_ready, // Memory response input wire mem_rsp_valid, input wire [`CS_LINE_WIDTH-1:0] mem_rsp_data, - input wire [MSHR_ADDR_WIDTH-1:0] mem_rsp_id, + input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag, output wire mem_rsp_ready, // flush input wire flush_begin, + input wire [`UP(UUID_WIDTH)-1:0] flush_uuid, output wire flush_end ); @@ -241,32 +243,54 @@ module VX_cache_bank #( wire flush_fire = flush_valid && flush_ready; wire core_req_fire = core_req_valid && core_req_ready; + wire [MSHR_ADDR_WIDTH-1:0] mem_rsp_id = mem_rsp_tag[MSHR_ADDR_WIDTH-1:0]; + + wire [TAG_WIDTH-1:0] mem_rsp_tag_s; + if (TAG_WIDTH > MEM_TAG_WIDTH) begin : g_mem_rsp_tag_s_pad + assign mem_rsp_tag_s = {mem_rsp_tag, (TAG_WIDTH-MEM_TAG_WIDTH)'(1'b0)}; + end else begin : g_mem_rsp_tag_s_cut + assign mem_rsp_tag_s = mem_rsp_tag[MEM_TAG_WIDTH-1 -: TAG_WIDTH]; + `UNUSED_VAR (mem_rsp_tag) + end + + wire [TAG_WIDTH-1:0] flush_tag; + if (UUID_WIDTH != 0) begin : g_flush_tag_uuid + assign flush_tag = {flush_uuid, (TAG_WIDTH-UUID_WIDTH)'(1'b0)}; + end else begin : g_flush_tag_0 + `UNUSED_VAR (flush_uuid) + assign flush_tag = '0; + end + assign valid_sel = init_fire || replay_fire || mem_rsp_fire || flush_fire || core_req_fire; assign rw_sel = replay_valid ? replay_rw : core_req_rw; assign byteen_sel = replay_valid ? replay_byteen : core_req_byteen; assign wsel_sel = replay_valid ? replay_wsel : core_req_wsel; assign req_idx_sel = replay_valid ? replay_idx : core_req_idx; - assign tag_sel = replay_valid ? replay_tag : core_req_tag; + assign tag_sel = (init_valid | flush_valid) ? (flush_valid ? flush_tag : '0) : + (replay_valid ? replay_tag : (mem_rsp_valid ? mem_rsp_tag_s : core_req_tag)); assign creq_flush_sel = core_req_valid && core_req_flush; assign addr_sel = (init_valid | flush_valid) ? `CS_LINE_ADDR_WIDTH'(flush_sel) : (replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr)); - if (WRITE_ENABLE) begin - assign data_sel[`CS_WORD_WIDTH-1:0] = replay_valid ? replay_data : (mem_rsp_valid ? mem_rsp_data[`CS_WORD_WIDTH-1:0] : core_req_data); - end else begin - assign data_sel[`CS_WORD_WIDTH-1:0] = mem_rsp_data[`CS_WORD_WIDTH-1:0]; + if (WRITE_ENABLE) begin : g_data_sel + for (genvar i = 0; i < `CS_LINE_WIDTH; ++i) begin : g_i + if (i < `CS_WORD_WIDTH) begin : g_lo + assign data_sel[i] = replay_valid ? replay_data[i] : (mem_rsp_valid ? mem_rsp_data[i] : core_req_data[i]); + end else begin : g_hi + assign data_sel[i] = mem_rsp_data[i]; // only the memory response fills the upper words of data_sel + end + end + end else begin : g_data_sel_ro + assign data_sel = mem_rsp_data; `UNUSED_VAR (core_req_data) `UNUSED_VAR (replay_data) end - for (genvar i = `CS_WORD_WIDTH; i < `CS_LINE_WIDTH; ++i) begin - assign data_sel[i] = mem_rsp_data[i]; // only the memory response fills the upper words of data_sel - end - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_req_uuid_sel assign req_uuid_sel = tag_sel[TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin - assign req_uuid_sel = 0; + end else begin : g_req_uuid_sel_0 + assign req_uuid_sel = '0; end VX_pipe_register #( @@ -280,10 +304,10 @@ module VX_cache_bank #( .data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush_st0, is_creq_st0, creq_flush_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0}) ); - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_req_uuid_st0 assign req_uuid_st0 = tag_st0[TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin - assign req_uuid_st0 = 0; + end else begin : g_req_uuid_st0_0 + assign req_uuid_st0 = '0; end wire do_init_st0 = valid_st0 && is_init_st0; @@ -362,10 +386,10 @@ module VX_cache_bank #( // we have a tag hit wire is_hit_st1 = (| way_sel_st1); - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_req_uuid_st1 assign req_uuid_st1 = tag_st1[TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin - assign req_uuid_st1 = 0; + end else begin : g_req_uuid_st1_0 + assign req_uuid_st1 = '0; end wire is_read_st1 = is_creq_st1 && ~rw_st1; @@ -394,7 +418,7 @@ module VX_cache_bank #( `UNUSED_VAR (do_write_miss_st1) // ensure mshr replay always get a hit - `RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("missed mshr replay")); + `RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("%t: missed mshr replay", $time)) // both tag and data stores use BRAM with no read-during-write protection. // we ned to stall the pipeline to prevent read-after-write hazards. @@ -413,14 +437,14 @@ module VX_cache_bank #( wire [`CS_LINE_WIDTH-1:0] dirty_data_st1; wire [LINE_SIZE-1:0] dirty_byteen_st1; - if (`CS_WORDS_PER_LINE > 1) begin - reg [LINE_SIZE-1:0] write_byteen_r; + if (`CS_WORDS_PER_LINE > 1) begin : g_write_byteen_st1_wsel + reg [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_byteen_w; always @(*) begin - write_byteen_r = '0; - write_byteen_r[wsel_st1 * WORD_SIZE +: WORD_SIZE] = byteen_st1; + write_byteen_w = '0; + write_byteen_w[wsel_st1] = byteen_st1; end - assign write_byteen_st1 = write_byteen_r; - end else begin + assign write_byteen_st1 = write_byteen_w; + end else begin : g_write_byteen_st1 assign write_byteen_st1 = byteen_st1; end @@ -468,9 +492,9 @@ module VX_cache_bank #( // release allocated mshr entry if we had a hit wire mshr_release_st1; - if (WRITEBACK) begin + if (WRITEBACK) begin : g_mshr_release_st1 assign mshr_release_st1 = is_hit_st1; - end else begin + end else begin : g_mshr_release_st1_ro // we need to keep missed write requests in MSHR if there is already a pending entry to the same address // this ensures that missed write requests are replayed locally in case a pending fill arrives without the write content // this can happen when writes are sent late, when the fill was already in flight. @@ -545,7 +569,7 @@ module VX_cache_bank #( // check if there are pending requests to same line in the MSHR wire [MSHR_SIZE-1:0] lookup_matches; - for (genvar i = 0; i < MSHR_SIZE; ++i) begin + for (genvar i = 0; i < MSHR_SIZE; ++i) begin : g_lookup_matches assign lookup_matches[i] = mshr_lookup_pending_st0[i] && (i != mshr_alloc_id_st0) // exclude current mshr id && (WRITEBACK || ~mshr_lookup_rw_st0[i]); // exclude write requests if writethrough @@ -567,7 +591,7 @@ module VX_cache_bank #( VX_elastic_buffer #( .DATAW (TAG_WIDTH + `CS_WORD_WIDTH + REQ_SEL_WIDTH), .SIZE (CRSQ_SIZE), - .OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF)) + .OUT_REG (CORE_OUT_REG) ) core_rsp_queue ( .clk (clk), .reset (reset), @@ -587,7 +611,7 @@ module VX_cache_bank #( wire [`CS_LINE_WIDTH-1:0] mreq_queue_data; wire [LINE_SIZE-1:0] mreq_queue_byteen; wire [`CS_LINE_ADDR_WIDTH-1:0] mreq_queue_addr; - wire [MSHR_ADDR_WIDTH-1:0] mreq_queue_id; + wire [MEM_TAG_WIDTH-1:0] mreq_queue_tag; wire mreq_queue_rw; wire mreq_queue_flush; @@ -595,16 +619,16 @@ module VX_cache_bank #( wire do_fill_or_flush_st1 = valid_st1 && is_fill_or_flush_st1; wire do_writeback_st1 = do_fill_or_flush_st1 && evict_dirty_st1; - if (WRITEBACK) begin - if (DIRTY_BYTES) begin + if (WRITEBACK) begin : g_mreq_queue_push + if (DIRTY_BYTES) begin : g_dirty_bytes // ensure dirty bytes match the tag info wire has_dirty_bytes = (| dirty_byteen_st1); - `RUNTIME_ASSERT (~do_fill_or_flush_st1 || (evict_dirty_st1 == has_dirty_bytes), ("missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", evict_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID))); + `RUNTIME_ASSERT (~do_fill_or_flush_st1 || (evict_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, evict_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID))) end assign mreq_queue_push = (((do_read_miss_st1 || do_write_miss_st1) && ~mshr_pending_st1) || do_writeback_st1) && ~rdw_hazard3_st1; - end else begin + end else begin : g_mreq_queue_push_ro `UNUSED_VAR (do_writeback_st1) assign mreq_queue_push = ((do_read_miss_st1 && ~mshr_pending_st1) || do_creq_wr_st1) @@ -613,33 +637,47 @@ module VX_cache_bank #( assign mreq_queue_pop = mem_req_valid && mem_req_ready; assign mreq_queue_addr = addr_st1; - assign mreq_queue_id = mshr_id_st1; assign mreq_queue_flush = creq_flush_st1; - if (WRITE_ENABLE) begin - assign mreq_queue_rw = WRITEBACK ? is_fill_or_flush_st1 : rw_st1; - assign mreq_queue_data = WRITEBACK ? dirty_data_st1 : write_data_st1; - assign mreq_queue_byteen = WRITEBACK ? dirty_byteen_st1 : write_byteen_st1; - end else begin + if (WRITE_ENABLE) begin : g_mreq_queue + if (WRITEBACK) begin : g_writeback + assign mreq_queue_rw = is_fill_or_flush_st1; + assign mreq_queue_data = dirty_data_st1; + assign mreq_queue_byteen = is_fill_or_flush_st1 ? dirty_byteen_st1 : '1; + end else begin : g_writethrough + assign mreq_queue_rw = rw_st1; + assign mreq_queue_data = write_data_st1; + assign mreq_queue_byteen = rw_st1 ? write_byteen_st1 : '1; + `UNUSED_VAR (is_fill_or_flush_st1) + `UNUSED_VAR (dirty_data_st1) + `UNUSED_VAR (dirty_byteen_st1) + end + end else begin : g_mreq_queue_ro assign mreq_queue_rw = 0; - assign mreq_queue_data = 0; - assign mreq_queue_byteen = 0; + assign mreq_queue_data = '0; + assign mreq_queue_byteen = '1; `UNUSED_VAR (dirty_data_st1) `UNUSED_VAR (dirty_byteen_st1) end + if (UUID_WIDTH != 0) begin : g_mreq_queue_tag_uuid + assign mreq_queue_tag = {req_uuid_st1, mshr_id_st1}; + end else begin : g_mreq_queue_tag + assign mreq_queue_tag = mshr_id_st1; + end + VX_fifo_queue #( - .DATAW (1 + `CS_LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + 1), + .DATAW (1 + `CS_LINE_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1), .DEPTH (MREQ_SIZE), .ALM_FULL (MREQ_SIZE-PIPELINE_STAGES), - .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) + .OUT_REG (MEM_OUT_REG) ) mem_req_queue ( .clk (clk), .reset (reset), .push (mreq_queue_push), .pop (mreq_queue_pop), - .data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_id, mreq_queue_byteen, mreq_queue_data, mreq_queue_flush}), - .data_out ({mem_req_rw, mem_req_addr, mem_req_id, mem_req_byteen, mem_req_data, mem_req_flush}), + .data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_byteen, mreq_queue_data, mreq_queue_tag, mreq_queue_flush}), + .data_out ({mem_req_rw, mem_req_addr, mem_req_byteen, mem_req_data, mem_req_tag, mem_req_flush}), .empty (mreq_queue_empty), .alm_full (mreq_queue_alm_full), `UNUSED_PIN (full), @@ -663,30 +701,32 @@ module VX_cache_bank #( && ~(replay_fire || mem_rsp_fire || core_req_fire || flush_fire); always @(posedge clk) begin if (input_stall || pipe_stall) begin - `TRACE(3, ("%d: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw1=%b, rdw2=%b, rdw3=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard1_sel, rdw_hazard2_sel, rdw_hazard3_st1)); + `TRACE(3, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw1=%b, rdw2=%b, rdw3=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard1_sel, rdw_hazard2_sel, rdw_hazard3_st1)) end if (mem_rsp_fire) begin - `TRACE(2, ("%d: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data)); + `TRACE(2, ("%t: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data, req_uuid_sel)) end if (replay_fire) begin - `TRACE(2, ("%d: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel)); + `TRACE(2, ("%t: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel)) end if (core_req_fire) begin - if (core_req_rw) - `TRACE(2, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel)); - else - `TRACE(2, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel)); + if (core_req_rw) begin + `TRACE(2, ("%t: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, core_req_byteen, core_req_data, req_uuid_sel)) + end else begin + `TRACE(2, ("%t: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel)) + end end if (crsp_queue_fire) begin - `TRACE(2, ("%d: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1)); + `TRACE(2, ("%t: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1)) end if (mreq_queue_push) begin - if (do_creq_wr_st1 && !WRITEBACK) - `TRACE(2, ("%d: %s writethrough: addr=0x%0h, byteen=%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)); - else if (do_writeback_st1) - `TRACE(2, ("%d: %s writeback: addr=0x%0h, byteen=%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data)); - else - `TRACE(2, ("%d: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_id, req_uuid_st1)); + if (do_creq_wr_st1 && !WRITEBACK) begin + `TRACE(2, ("%t: %s writethrough: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)) + end else if (do_writeback_st1) begin + `TRACE(2, ("%t: %s writeback: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1)) + end else begin + `TRACE(2, ("%t: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mshr_id_st1, req_uuid_st1)) + end end end `endif diff --git a/hw/rtl/cache/VX_cache_bypass.sv b/hw/rtl/cache/VX_cache_bypass.sv index 379d33e8a..4b3b3a59a 100644 --- a/hw/rtl/cache/VX_cache_bypass.sv +++ b/hw/rtl/cache/VX_cache_bypass.sv @@ -56,7 +56,8 @@ module VX_cache_bypass #( localparam DIRECT_PASSTHRU = PASSTHRU && (`CS_WORD_SEL_BITS == 0) && (NUM_REQS == 1); localparam REQ_SEL_BITS = `CLOG2(NUM_REQS); - localparam MUX_DATAW = 1 + WORD_SIZE + CORE_ADDR_WIDTH + `ADDR_TYPE_WIDTH + CORE_DATA_WIDTH + CORE_TAG_WIDTH; + localparam REQ_SEL_WIDTH = `UP(REQ_SEL_BITS); + localparam MUX_DATAW = 1 + WORD_SIZE + CORE_ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + CORE_DATA_WIDTH + CORE_TAG_WIDTH; localparam WORDS_PER_LINE = LINE_SIZE / WORD_SIZE; localparam WSEL_BITS = `CLOG2(WORDS_PER_LINE); @@ -72,16 +73,16 @@ module VX_cache_bypass #( wire core_req_nc_valid; wire [NUM_REQS-1:0] core_req_nc_valids; wire [NUM_REQS-1:0] core_req_nc_idxs; - wire [`UP(REQ_SEL_BITS)-1:0] core_req_nc_idx; + wire [REQ_SEL_WIDTH-1:0] core_req_nc_idx; wire [NUM_REQS-1:0] core_req_nc_sel; wire core_req_nc_ready; - for (genvar i = 0; i < NUM_REQS; ++i) begin - if (PASSTHRU != 0) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req_nc + if (PASSTHRU != 0) begin : g_passthru assign core_req_nc_idxs[i] = 1'b1; - end else if (NC_ENABLE) begin - assign core_req_nc_idxs[i] = core_bus_in_if[i].req_data.atype[`ADDR_TYPE_IO]; - end else begin + end else if (NC_ENABLE) begin : g_nc + assign core_req_nc_idxs[i] = core_bus_in_if[i].req_data.flags[`MEM_REQ_FLAG_IO]; + end else begin : g_no_nc assign core_req_nc_idxs[i] = 1'b0; end assign core_req_nc_valids[i] = core_bus_in_if[i].req_valid && core_req_nc_idxs[i]; @@ -100,7 +101,7 @@ module VX_cache_bypass #( .grant_ready (core_req_nc_ready) ); - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_out_if assign core_bus_out_if[i].req_valid = core_bus_in_if[i].req_valid && ~core_req_nc_idxs[i]; assign core_bus_out_if[i].req_data = core_bus_in_if[i].req_data; assign core_bus_in_if[i].req_ready = core_req_nc_valids[i] ? (core_req_nc_ready && core_req_nc_sel[i]) @@ -113,7 +114,7 @@ module VX_cache_bypass #( wire mem_req_out_rw; wire [LINE_SIZE-1:0] mem_req_out_byteen; wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_out_addr; - wire [`ADDR_TYPE_WIDTH-1:0] mem_req_out_atype; + wire [`MEM_REQ_FLAGS_WIDTH-1:0] mem_req_out_flags; wire [`CS_LINE_WIDTH-1:0] mem_req_out_data; wire [MEM_TAG_OUT_WIDTH-1:0] mem_req_out_tag; wire mem_req_out_ready; @@ -121,28 +122,28 @@ module VX_cache_bypass #( wire core_req_nc_sel_rw; wire [WORD_SIZE-1:0] core_req_nc_sel_byteen; wire [CORE_ADDR_WIDTH-1:0] core_req_nc_sel_addr; - wire [`ADDR_TYPE_WIDTH-1:0] core_req_nc_sel_atype; + wire [`MEM_REQ_FLAGS_WIDTH-1:0] core_req_nc_sel_flags; wire [CORE_DATA_WIDTH-1:0] core_req_nc_sel_data; wire [CORE_TAG_WIDTH-1:0] core_req_nc_sel_tag; wire [NUM_REQS-1:0][MUX_DATAW-1:0] core_req_nc_mux_in; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_req_nc_mux_in assign core_req_nc_mux_in[i] = { core_bus_in_if[i].req_data.rw, - core_bus_in_if[i].req_data.byteen, core_bus_in_if[i].req_data.addr, - core_bus_in_if[i].req_data.atype, core_bus_in_if[i].req_data.data, + core_bus_in_if[i].req_data.byteen, + core_bus_in_if[i].req_data.flags, core_bus_in_if[i].req_data.tag }; end assign { core_req_nc_sel_rw, - core_req_nc_sel_byteen, core_req_nc_sel_addr, - core_req_nc_sel_atype, core_req_nc_sel_data, + core_req_nc_sel_byteen, + core_req_nc_sel_flags, core_req_nc_sel_tag } = core_req_nc_mux_in[core_req_nc_idx]; @@ -151,83 +152,81 @@ module VX_cache_bypass #( assign mem_req_out_valid = mem_bus_in_if.req_valid || core_req_nc_valid; assign mem_req_out_rw = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.rw : core_req_nc_sel_rw; assign mem_req_out_addr = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.addr : core_req_nc_sel_addr[WSEL_BITS +: MEM_ADDR_WIDTH]; - assign mem_req_out_atype = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.atype : core_req_nc_sel_atype; + assign mem_req_out_flags = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.flags : core_req_nc_sel_flags; wire [MEM_TAG_ID_BITS-1:0] mem_req_tag_id_bypass; wire [CORE_TAG_ID_BITS-1:0] core_req_in_id = core_req_nc_sel_tag[CORE_TAG_ID_BITS-1:0]; - if (WORDS_PER_LINE > 1) begin - reg [WORDS_PER_LINE-1:0][WORD_SIZE-1:0] mem_req_byteen_in_r; - reg [WORDS_PER_LINE-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in_r; + if (WORDS_PER_LINE > 1) begin : g_mem_req_multi_word_line + reg [WORDS_PER_LINE-1:0][WORD_SIZE-1:0] mem_req_byteen_in_w; + reg [WORDS_PER_LINE-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in_w; wire [WSEL_BITS-1:0] req_wsel = core_req_nc_sel_addr[WSEL_BITS-1:0]; always @(*) begin - mem_req_byteen_in_r = '0; - mem_req_byteen_in_r[req_wsel] = core_req_nc_sel_byteen; + mem_req_byteen_in_w = '0; + mem_req_byteen_in_w[req_wsel] = core_req_nc_sel_byteen; - mem_req_data_in_r = 'x; - mem_req_data_in_r[req_wsel] = core_req_nc_sel_data; + mem_req_data_in_w = 'x; + mem_req_data_in_w[req_wsel] = core_req_nc_sel_data; end - assign mem_req_out_byteen = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.byteen : mem_req_byteen_in_r; - assign mem_req_out_data = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.data : mem_req_data_in_r; - if (NUM_REQS > 1) begin + assign mem_req_out_byteen = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.byteen : mem_req_byteen_in_w; + assign mem_req_out_data = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.data : mem_req_data_in_w; + if (NUM_REQS > 1) begin : g_multiple_requests assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_nc_idx, req_wsel, core_req_in_id}); - end else begin + end else begin : g_single_request assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({req_wsel, core_req_in_id}); end - end else begin + end else begin : g_mem_req_single_word_line assign mem_req_out_byteen = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.byteen : core_req_nc_sel_byteen; assign mem_req_out_data = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.data : core_req_nc_sel_data; - if (NUM_REQS > 1) begin + if (NUM_REQS > 1) begin : g_multiple_requests assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_nc_idx, core_req_in_id}); - end else begin + end else begin : g_single_request assign mem_req_tag_id_bypass = MEM_TAG_ID_BITS'({core_req_in_id}); end end wire [MEM_TAG_BYPASS_BITS-1:0] mem_req_tag_bypass; - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_mem_req_tag_bypass_with_uuid assign mem_req_tag_bypass = {core_req_nc_sel_tag[CORE_TAG_ID_BITS +: UUID_WIDTH], mem_req_tag_id_bypass}; - end else begin + end else begin : g_mem_req_tag_bypass assign mem_req_tag_bypass = mem_req_tag_id_bypass; end - if (PASSTHRU != 0) begin + if (PASSTHRU != 0) begin : g_mem_req_out_tag_passthru assign mem_req_out_tag = mem_req_tag_bypass; `UNUSED_VAR (mem_bus_in_if.req_data.tag) - end else begin - if (NC_ENABLE) begin - VX_bits_insert #( - .N (MEM_TAG_OUT_WIDTH-1), - .S (1), - .POS (TAG_SEL_IDX) - ) mem_req_tag_in_nc_insert ( - .data_in (mem_bus_in_if.req_valid ? (MEM_TAG_OUT_WIDTH-1)'(mem_bus_in_if.req_data.tag) : (MEM_TAG_OUT_WIDTH-1)'(mem_req_tag_bypass)), - .ins_in (~mem_bus_in_if.req_valid), - .data_out (mem_req_out_tag) - ); - end else begin - assign mem_req_out_tag = mem_bus_in_if.req_data.tag; - end + end else if (NC_ENABLE) begin : g_mem_req_out_tag_nc + VX_bits_insert #( + .N (MEM_TAG_OUT_WIDTH-1), + .S (1), + .POS (TAG_SEL_IDX) + ) mem_req_tag_in_nc_insert ( + .data_in (mem_bus_in_if.req_valid ? (MEM_TAG_OUT_WIDTH-1)'(mem_bus_in_if.req_data.tag) : (MEM_TAG_OUT_WIDTH-1)'(mem_req_tag_bypass)), + .ins_in (~mem_bus_in_if.req_valid), + .data_out (mem_req_out_tag) + ); + end else begin : g_mem_req_out_tag + assign mem_req_out_tag = mem_bus_in_if.req_data.tag; end assign mem_bus_in_if.req_ready = mem_req_out_ready; VX_elastic_buffer #( - .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `ADDR_TYPE_WIDTH + `CS_LINE_WIDTH + MEM_TAG_OUT_WIDTH), - .SIZE ((!DIRECT_PASSTHRU) ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0), + .DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + `CS_LINE_WIDTH + MEM_TAG_OUT_WIDTH), + .SIZE (DIRECT_PASSTHRU ? 0 : `TO_OUT_BUF_SIZE(MEM_OUT_BUF)), .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) ) mem_req_buf ( .clk (clk), .reset (reset), .valid_in (mem_req_out_valid), .ready_in (mem_req_out_ready), - .data_in ({mem_req_out_rw, mem_req_out_byteen, mem_req_out_addr, mem_req_out_atype, mem_req_out_data, mem_req_out_tag}), - .data_out ({mem_bus_out_if.req_data.rw, mem_bus_out_if.req_data.byteen, mem_bus_out_if.req_data.addr, mem_bus_out_if.req_data.atype, mem_bus_out_if.req_data.data, mem_bus_out_if.req_data.tag}), + .data_in ({mem_req_out_rw, mem_req_out_byteen, mem_req_out_addr, mem_req_out_flags, mem_req_out_data, mem_req_out_tag}), + .data_out ({mem_bus_out_if.req_data.rw, mem_bus_out_if.req_data.byteen, mem_bus_out_if.req_data.addr, mem_bus_out_if.req_data.flags, mem_bus_out_if.req_data.data, mem_bus_out_if.req_data.tag}), .valid_out (mem_bus_out_if.req_valid), .ready_out (mem_bus_out_if.req_ready) ); @@ -240,14 +239,12 @@ module VX_cache_bypass #( wire [NUM_REQS-1:0] core_rsp_in_ready; wire is_mem_rsp_nc; - if (PASSTHRU != 0) begin + if (PASSTHRU != 0) begin : g_is_mem_rsp_nc_passthru assign is_mem_rsp_nc = mem_bus_out_if.rsp_valid; - end else begin - if (NC_ENABLE) begin - assign is_mem_rsp_nc = mem_bus_out_if.rsp_valid && mem_bus_out_if.rsp_data.tag[TAG_SEL_IDX]; - end else begin - assign is_mem_rsp_nc = 1'b0; - end + end else if (NC_ENABLE) begin : g_is_mem_rsp_nc + assign is_mem_rsp_nc = mem_bus_out_if.rsp_valid && mem_bus_out_if.rsp_data.tag[TAG_SEL_IDX]; + end else begin : g_is_no_mem_rsp_nc + assign is_mem_rsp_nc = 1'b0; end wire [(MEM_TAG_OUT_WIDTH - NC_ENABLE)-1:0] mem_rsp_tag_id_nc; @@ -261,57 +258,52 @@ module VX_cache_bypass #( .data_out (mem_rsp_tag_id_nc) ); - wire [`UP(REQ_SEL_BITS)-1:0] rsp_idx; - if (NUM_REQS > 1) begin + wire [REQ_SEL_WIDTH-1:0] rsp_idx; + if (NUM_REQS > 1) begin : g_rsp_idx assign rsp_idx = mem_rsp_tag_id_nc[(CORE_TAG_ID_BITS + WSEL_BITS) +: REQ_SEL_BITS]; - end else begin + end else begin : g_rsp_idx_0 assign rsp_idx = 1'b0; end - reg [NUM_REQS-1:0] rsp_nc_valid_r; - always @(*) begin - rsp_nc_valid_r = '0; - rsp_nc_valid_r[rsp_idx] = is_mem_rsp_nc; + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_in_valid + assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || (is_mem_rsp_nc && rsp_idx == REQ_SEL_WIDTH'(i)); end - - for (genvar i = 0; i < NUM_REQS; ++i) begin - assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || rsp_nc_valid_r[i]; + + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_in_ready assign core_bus_out_if[i].rsp_ready = core_rsp_in_ready[i]; end - if (WORDS_PER_LINE > 1) begin - wire [WSEL_BITS-1:0] rsp_wsel = mem_rsp_tag_id_nc[CORE_TAG_ID_BITS +: WSEL_BITS]; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_in_data + if (WORDS_PER_LINE > 1) begin : g_wsel + wire [WSEL_BITS-1:0] rsp_wsel = mem_rsp_tag_id_nc[CORE_TAG_ID_BITS +: WSEL_BITS]; assign core_rsp_in_data[i] = core_bus_out_if[i].rsp_valid ? core_bus_out_if[i].rsp_data.data : mem_bus_out_if.rsp_data.data[rsp_wsel * CORE_DATA_WIDTH +: CORE_DATA_WIDTH]; - end - end else begin - for (genvar i = 0; i < NUM_REQS; ++i) begin + end else begin : g_no_wsel assign core_rsp_in_data[i] = core_bus_out_if[i].rsp_valid ? core_bus_out_if[i].rsp_data.data : mem_bus_out_if.rsp_data.data; end end wire [(CORE_TAG_ID_BITS + UUID_WIDTH)-1:0] mem_rsp_tag_in_nc2; - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_mem_rsp_tag_in_nc2_uuid assign mem_rsp_tag_in_nc2 = {mem_rsp_tag_id_nc[(MEM_TAG_OUT_WIDTH - NC_ENABLE)-1 -: UUID_WIDTH], mem_rsp_tag_id_nc[CORE_TAG_ID_BITS-1:0]}; - end else begin + end else begin : g_mem_rsp_tag_in_nc2 assign mem_rsp_tag_in_nc2 = mem_rsp_tag_id_nc[CORE_TAG_ID_BITS-1:0]; end - for (genvar i = 0; i < NUM_REQS; ++i) begin - if (PASSTHRU) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_in_tag + if (PASSTHRU) begin : g_passthru assign core_rsp_in_tag[i] = mem_rsp_tag_in_nc2; - end else if (NC_ENABLE) begin + end else if (NC_ENABLE) begin : g_nc assign core_rsp_in_tag[i] = core_bus_out_if[i].rsp_valid ? core_bus_out_if[i].rsp_data.tag : mem_rsp_tag_in_nc2; - end else begin + end else begin : g_no_nc assign core_rsp_in_tag[i] = core_bus_out_if[i].rsp_data.tag; end end - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_buf VX_elastic_buffer #( .DATAW (`CS_WORD_WIDTH + CORE_TAG_WIDTH), - .SIZE ((!DIRECT_PASSTHRU) ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0), + .SIZE (DIRECT_PASSTHRU ? 0 : `TO_OUT_BUF_SIZE(CORE_OUT_BUF)), .OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF)) ) core_rsp_buf ( .clk (clk), @@ -327,22 +319,22 @@ module VX_cache_bypass #( // handle memory responses //////////////////////////////////////////////// - if (PASSTHRU != 0) begin + if (PASSTHRU != 0) begin : g_mem_bus_in_if_passthru assign mem_bus_in_if.rsp_valid = 1'b0; assign mem_bus_in_if.rsp_data.data = '0; assign mem_bus_in_if.rsp_data.tag = '0; - end else if (NC_ENABLE) begin + end else if (NC_ENABLE) begin : g_mem_bus_in_if_nc assign mem_bus_in_if.rsp_valid = mem_bus_out_if.rsp_valid && ~mem_bus_out_if.rsp_data.tag[TAG_SEL_IDX]; assign mem_bus_in_if.rsp_data.data = mem_bus_out_if.rsp_data.data; assign mem_bus_in_if.rsp_data.tag = mem_rsp_tag_id_nc[MEM_TAG_IN_WIDTH-1:0]; - end else begin + end else begin : g_mem_bus_in_if assign mem_bus_in_if.rsp_valid = mem_bus_out_if.rsp_valid; assign mem_bus_in_if.rsp_data.data = mem_bus_out_if.rsp_data.data; assign mem_bus_in_if.rsp_data.tag = mem_rsp_tag_id_nc; end wire [NUM_REQS-1:0] core_rsp_out_valid; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_out_valid assign core_rsp_out_valid[i] = core_bus_out_if[i].rsp_valid; end diff --git a/hw/rtl/cache/VX_cache_cluster.sv b/hw/rtl/cache/VX_cache_cluster.sv index 939768b63..5a8bb9865 100644 --- a/hw/rtl/cache/VX_cache_cluster.sv +++ b/hw/rtl/cache/VX_cache_cluster.sv @@ -82,8 +82,8 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( localparam PASSTHRU = (NUM_UNITS == 0); localparam ARB_TAG_WIDTH = TAG_WIDTH + `ARB_SEL_BITS(NUM_INPUTS, NUM_CACHES); localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) : - (NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) : - `CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS)); + (NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH, UUID_WIDTH) : + `CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, UUID_WIDTH)); `STATIC_ASSERT(NUM_INPUTS >= NUM_CACHES, ("invalid parameter")) @@ -102,9 +102,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( .TAG_WIDTH (ARB_TAG_WIDTH) ) arb_core_bus_if[NUM_CACHES * NUM_REQS](); - `RESET_RELAY_EX (cache_arb_reset, reset, NUM_REQS, `MAX_FANOUT); - - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_arb VX_mem_bus_if #( .DATA_SIZE (WORD_SIZE), .TAG_WIDTH (TAG_WIDTH) @@ -115,7 +113,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( .TAG_WIDTH (ARB_TAG_WIDTH) ) arb_core_bus_tmp_if[NUM_CACHES](); - for (genvar j = 0; j < NUM_INPUTS; ++j) begin + for (genvar j = 0; j < NUM_INPUTS; ++j) begin : g_core_bus_tmp_if `ASSIGN_VX_MEM_BUS_IF (core_bus_tmp_if[j], core_bus_if[j * NUM_REQS + i]); end @@ -127,23 +125,20 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( .TAG_SEL_IDX (TAG_SEL_IDX), .ARBITER ("R"), .REQ_OUT_BUF ((NUM_INPUTS != NUM_CACHES) ? 2 : 0), - .RSP_OUT_BUF ((NUM_INPUTS != NUM_CACHES) ? 2 : 0) - ) cache_arb ( + .RSP_OUT_BUF ((NUM_INPUTS != NUM_CACHES) ? CORE_OUT_BUF : 0) + ) core_arb ( .clk (clk), - .reset (cache_arb_reset[i]), + .reset (reset), .bus_in_if (core_bus_tmp_if), .bus_out_if (arb_core_bus_tmp_if) ); - for (genvar k = 0; k < NUM_CACHES; ++k) begin + for (genvar k = 0; k < NUM_CACHES; ++k) begin : g_arb_core_bus_if `ASSIGN_VX_MEM_BUS_IF (arb_core_bus_if[k * NUM_REQS + i], arb_core_bus_tmp_if[k]); end end - for (genvar i = 0; i < NUM_CACHES; ++i) begin : caches - - `RESET_RELAY (cache_reset, reset); - + for (genvar i = 0; i < NUM_CACHES; ++i) begin : g_cache_wrap VX_cache_wrap #( .INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, i)), .CACHE_SIZE (CACHE_SIZE), @@ -171,7 +166,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( .cache_perf (perf_cache_unit[i]), `endif .clk (clk), - .reset (cache_reset), + .reset (reset), .core_bus_if (arb_core_bus_if[i * NUM_REQS +: NUM_REQS]), .mem_bus_if (cache_mem_bus_if[i]) ); @@ -188,7 +183,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( .TAG_WIDTH (MEM_TAG_WIDTH), .TAG_SEL_IDX (TAG_SEL_IDX), .ARBITER ("R"), - .REQ_OUT_BUF ((NUM_CACHES > 1) ? 2 : 0), + .REQ_OUT_BUF ((NUM_CACHES > 1) ? MEM_OUT_BUF : 0), .RSP_OUT_BUF ((NUM_CACHES > 1) ? 2 : 0) ) mem_arb ( .clk (clk), @@ -197,6 +192,10 @@ module VX_cache_cluster import VX_gpu_pkg::*; #( .bus_out_if (mem_bus_tmp_if) ); - `ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if[0]); + if (WRITE_ENABLE) begin : g_mem_bus_if + `ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if[0]); + end else begin : g_mem_bus_if_ro + `ASSIGN_VX_MEM_BUS_RO_IF (mem_bus_if, mem_bus_tmp_if[0]); + end endmodule diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index a114e1689..04b0ff746 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -75,45 +75,47 @@ module VX_cache_data #( wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_rdata; wire [`LOG2UP(NUM_WAYS)-1:0] way_idx; - if (WRITEBACK) begin - if (DIRTY_BYTES) begin - wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_rdata; - wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_wdata; - - for (genvar i = 0; i < NUM_WAYS; ++i) begin - wire [LINE_SIZE-1:0] wdata = write ? (bs_rdata[i] | write_byteen) : ((fill || flush) ? '0 : bs_rdata[i]); - assign bs_wdata[i] = init ? '0 : (way_sel[i] ? wdata : bs_rdata[i]); - end + if (WRITEBACK) begin : g_dirty_data + wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] transposed_rdata; + VX_transpose #( + .DATAW (`CS_WORD_WIDTH), + .N (`CS_WORDS_PER_LINE), + .M (NUM_WAYS) + ) transpose ( + .data_in (line_rdata), + .data_out (transposed_rdata) + ); + assign dirty_data = transposed_rdata[way_idx]; + end else begin : g_dirty_data_0 + assign dirty_data = '0; + end - VX_sp_ram #( - .DATAW (LINE_SIZE * NUM_WAYS), - .SIZE (`CS_LINES_PER_BANK) - ) byteen_store ( - .clk (clk), - .reset (reset), - .read (write || fill || flush), - .write (init || write || fill || flush), - .wren (1'b1), - .addr (line_sel), - .wdata (bs_wdata), - .rdata (bs_rdata) - ); - - assign dirty_byteen = bs_rdata[way_idx]; - end else begin - assign dirty_byteen = {LINE_SIZE{1'b1}}; - end + if (DIRTY_BYTES) begin : g_dirty_byteen + wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_rdata; + wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_wdata; - wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] flipped_rdata; - for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin - for (genvar j = 0; j < NUM_WAYS; ++j) begin - assign flipped_rdata[j][i] = line_rdata[i][j]; - end + for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_bs_wdata + wire [LINE_SIZE-1:0] wdata = write ? (bs_rdata[i] | write_byteen) : ((fill || flush) ? '0 : bs_rdata[i]); + assign bs_wdata[i] = init ? '0 : (way_sel[i] ? wdata : bs_rdata[i]); end - assign dirty_data = flipped_rdata[way_idx]; - end else begin - assign dirty_byteen = '0; - assign dirty_data = '0; + + VX_sp_ram #( + .DATAW (LINE_SIZE * NUM_WAYS), + .SIZE (`CS_LINES_PER_BANK) + ) byteen_store ( + .clk (clk), + .reset (reset), + .read (write || fill || flush), + .write (init || write || fill || flush), + .wren (1'b1), + .addr (line_sel), + .wdata (bs_wdata), + .rdata (bs_rdata) + ); + + assign dirty_byteen = bs_rdata[way_idx]; + end else begin : g_dirty_byteen_0 + assign dirty_byteen = '1; end // order the data layout to perform ways multiplexing last. @@ -122,17 +124,17 @@ module VX_cache_data #( wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_wdata; wire [BYTEENW-1:0] line_wren; - if (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) begin + if (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) begin : g_line_wdata wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][WORD_SIZE-1:0] wren_w; - for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin - for (genvar j = 0; j < NUM_WAYS; ++j) begin + for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin : g_i + for (genvar j = 0; j < NUM_WAYS; ++j) begin : g_j assign line_wdata[i][j] = (fill || !WRITE_ENABLE) ? fill_data[i] : write_data[i]; assign wren_w[i][j] = ((fill || !WRITE_ENABLE) ? {WORD_SIZE{1'b1}} : write_byteen[i]) & {WORD_SIZE{(way_sel[j] || (NUM_WAYS == 1))}}; end end assign line_wren = wren_w; - end else begin + end else begin : g_line_wdata_ro `UNUSED_VAR (write) `UNUSED_VAR (write_byteen) `UNUSED_VAR (write_data) @@ -140,7 +142,7 @@ module VX_cache_data #( assign line_wren = fill; end - VX_onehot_encoder #( + VX_encoder #( .N (NUM_WAYS) ) way_enc ( .data_in (way_sel), @@ -171,9 +173,9 @@ module VX_cache_data #( ); wire [NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] per_way_rdata; - if (`CS_WORDS_PER_LINE > 1) begin + if (`CS_WORDS_PER_LINE > 1) begin : g_per_way_rdata_wsel assign per_way_rdata = line_rdata[wsel]; - end else begin + end else begin : g_per_way_rdata `UNUSED_VAR (wsel) assign per_way_rdata = line_rdata; end @@ -182,16 +184,16 @@ module VX_cache_data #( `ifdef DBG_TRACE_CACHE always @(posedge clk) begin if (fill && ~stall) begin - `TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data)); + `TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data)) end if (flush && ~stall) begin - `TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, dirty_byteen, dirty_data)); + `TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, dirty_byteen, dirty_data)) end if (read && ~stall) begin - `TRACE(3, ("%d: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, read_data, req_uuid)); + `TRACE(3, ("%t: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, read_data, req_uuid)) end if (write && ~stall) begin - `TRACE(3, ("%d: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, write_byteen[wsel], write_data[wsel], req_uuid)); + `TRACE(3, ("%t: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, write_byteen[wsel], write_data[wsel], req_uuid)) end end `endif diff --git a/hw/rtl/cache/VX_cache_define.vh b/hw/rtl/cache/VX_cache_define.vh index e6d7da167..342a40a1b 100644 --- a/hw/rtl/cache/VX_cache_define.vh +++ b/hw/rtl/cache/VX_cache_define.vh @@ -57,7 +57,6 @@ `define CS_LINE_TO_MEM_ADDR(x, i) {x, `CS_BANK_SEL_BITS'(i)} `define CS_MEM_ADDR_TO_BANK_ID(x) x[0 +: `CS_BANK_SEL_BITS] `define CS_MEM_TAG_TO_REQ_ID(x) x[MSHR_ADDR_WIDTH-1:0] -`define CS_MEM_TAG_TO_BANK_ID(x) x[MSHR_ADDR_WIDTH +: `CS_BANK_SEL_BITS] `define CS_LINE_TO_FULL_ADDR(x, i) {x, (`XLEN-$bits(x))'(i << (`XLEN-$bits(x)-`CS_BANK_SEL_BITS))} `define CS_MEM_TO_FULL_ADDR(x) {x, (`XLEN-$bits(x))'(0)} diff --git a/hw/rtl/cache/VX_cache_flush.sv b/hw/rtl/cache/VX_cache_flush.sv index 7a33565fc..b318dc5af 100644 --- a/hw/rtl/cache/VX_cache_flush.sv +++ b/hw/rtl/cache/VX_cache_flush.sv @@ -18,6 +18,10 @@ module VX_cache_flush #( parameter NUM_REQS = 4, // Number of banks parameter NUM_BANKS = 1, + // Request debug identifier + parameter UUID_WIDTH = 0, + // core request tag size + parameter TAG_WIDTH = UUID_WIDTH + 1, // Bank select latency parameter BANK_SEL_LATENCY = 1 ) ( @@ -27,6 +31,7 @@ module VX_cache_flush #( VX_mem_bus_if.master core_bus_out_if [NUM_REQS], input wire [NUM_BANKS-1:0] bank_req_fire, output wire [NUM_BANKS-1:0] flush_begin, + output wire [`UP(UUID_WIDTH)-1:0] flush_uuid, input wire [NUM_BANKS-1:0] flush_end ); localparam STATE_IDLE = 0; @@ -41,13 +46,13 @@ module VX_cache_flush #( wire no_inflight_reqs; - if (BANK_SEL_LATENCY != 0) begin + if (BANK_SEL_LATENCY != 0) begin : g_bank_sel_latency localparam NUM_REQS_W = `CLOG2(NUM_REQS+1); localparam NUM_BANKS_W = `CLOG2(NUM_BANKS+1); wire [NUM_REQS-1:0] core_bus_out_fire; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_out_fire assign core_bus_out_fire[i] = core_bus_out_if[i].req_valid && core_bus_out_if[i].req_ready; end @@ -74,7 +79,7 @@ module VX_cache_flush #( `UNUSED_PIN (size) ); - end else begin + end else begin : g_no_bank_sel_latency assign no_inflight_reqs = 0; `UNUSED_VAR (bank_req_fire) end @@ -82,28 +87,38 @@ module VX_cache_flush #( reg [NUM_BANKS-1:0] flush_done, flush_done_n; wire [NUM_REQS-1:0] flush_req_mask; - for (genvar i = 0; i < NUM_REQS; ++i) begin - assign flush_req_mask[i] = core_bus_in_if[i].req_valid && core_bus_in_if[i].req_data.atype[`ADDR_TYPE_FLUSH]; + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_flush_req_mask + assign flush_req_mask[i] = core_bus_in_if[i].req_valid && core_bus_in_if[i].req_data.flags[`MEM_REQ_FLAG_FLUSH]; end wire flush_req_enable = (| flush_req_mask); reg [NUM_REQS-1:0] lock_released, lock_released_n; + reg [`UP(UUID_WIDTH)-1:0] flush_uuid_r, flush_uuid_n; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_out_req wire input_enable = ~flush_req_enable || lock_released[i]; assign core_bus_out_if[i].req_valid = core_bus_in_if[i].req_valid && input_enable; assign core_bus_out_if[i].req_data = core_bus_in_if[i].req_data; assign core_bus_in_if[i].req_ready = core_bus_out_if[i].req_ready && input_enable; end - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_in_rsp assign core_bus_in_if[i].rsp_valid = core_bus_out_if[i].rsp_valid; assign core_bus_in_if[i].rsp_data = core_bus_out_if[i].rsp_data; assign core_bus_out_if[i].rsp_ready = core_bus_in_if[i].rsp_ready; end + reg [NUM_REQS-1:0][`UP(UUID_WIDTH)-1:0] core_bus_out_uuid; wire [NUM_REQS-1:0] core_bus_out_ready; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_out_uuid + if (UUID_WIDTH != 0) begin : g_uuid + assign core_bus_out_uuid[i] = core_bus_in_if[i].req_data.tag[TAG_WIDTH-1 -: UUID_WIDTH]; + end else begin : g_no_uuid + assign core_bus_out_uuid[i] = 0; + end + end + + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_out_ready assign core_bus_out_ready[i] = core_bus_out_if[i].req_ready; end @@ -111,10 +126,16 @@ module VX_cache_flush #( state_n = state; flush_done_n = flush_done; lock_released_n = lock_released; + flush_uuid_n = flush_uuid_r; case (state) STATE_IDLE: begin if (flush_req_enable) begin state_n = (BANK_SEL_LATENCY != 0) ? STATE_WAIT1 : STATE_FLUSH; + for (integer i = NUM_REQS-1; i >= 0; --i) begin + if (flush_req_mask[i]) begin + flush_uuid_n = core_bus_out_uuid[i]; + end + end end end STATE_WAIT1: begin @@ -158,8 +179,10 @@ module VX_cache_flush #( flush_done <= flush_done_n; lock_released <= lock_released_n; end + flush_uuid_r <= flush_uuid_n; end assign flush_begin = {NUM_BANKS{state == STATE_FLUSH}}; + assign flush_uuid = flush_uuid_r; endmodule diff --git a/hw/rtl/cache/VX_cache_mshr.sv b/hw/rtl/cache/VX_cache_mshr.sv index 4f8163269..482c110dc 100644 --- a/hw/rtl/cache/VX_cache_mshr.sv +++ b/hw/rtl/cache/VX_cache_mshr.sv @@ -135,7 +135,7 @@ module VX_cache_mshr #( wire dequeue_fire = dequeue_valid && dequeue_ready; wire [MSHR_SIZE-1:0] addr_matches; - for (genvar i = 0; i < MSHR_SIZE; ++i) begin + for (genvar i = 0; i < MSHR_SIZE; ++i) begin : g_addr_matches assign addr_matches[i] = valid_table[i] && (addr_table[i] == lookup_addr); end @@ -148,7 +148,7 @@ module VX_cache_mshr #( .valid_out (allocate_rdy_n) ); - VX_onehot_encoder #( + VX_encoder #( .N (MSHR_SIZE) ) prev_sel ( .data_in (addr_matches & ~next_table_x), @@ -267,35 +267,42 @@ module VX_cache_mshr #( end else begin show_table <= allocate_fire || lookup_valid || finalize_valid || fill_valid || dequeue_fire; end - if (allocate_fire) - `TRACE(3, ("%d: %s allocate: addr=0x%0h, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_prev, allocate_id, lkp_req_uuid)); - if (lookup_valid) - `TRACE(3, ("%d: %s lookup: addr=0x%0h, matches=%b (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(lookup_addr, BANK_ID), lookup_pending, lkp_req_uuid)); - if (finalize_valid) - `TRACE(3, ("%d: %s finalize release=%b, pending=%b, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, - finalize_release, finalize_pending, finalize_prev, finalize_id, fin_req_uuid)); - if (fill_valid) - `TRACE(3, ("%d: %s fill: addr=0x%0h, addr=0x%0h, id=%0d\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), `CS_LINE_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id)); - if (dequeue_fire) - `TRACE(3, ("%d: %s dequeue: addr=0x%0h, id=%0d (#%0d)\n", $time, INSTANCE_ID, - `CS_LINE_TO_FULL_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_req_uuid)); + if (allocate_fire) begin + `TRACE(3, ("%t: %s allocate: addr=0x%0h, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_prev, allocate_id, lkp_req_uuid)) + end + if (lookup_valid) begin + `TRACE(3, ("%t: %s lookup: addr=0x%0h, matches=%b (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(lookup_addr, BANK_ID), lookup_pending, lkp_req_uuid)) + end + if (finalize_valid) begin + `TRACE(3, ("%t: %s finalize release=%b, pending=%b, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID, + finalize_release, finalize_pending, finalize_prev, finalize_id, fin_req_uuid)) + end + if (fill_valid) begin + `TRACE(3, ("%t: %s fill: addr=0x%0h, addr=0x%0h, id=%0d\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), `CS_LINE_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id)) + end + if (dequeue_fire) begin + `TRACE(3, ("%t: %s dequeue: addr=0x%0h, id=%0d (#%0d)\n", $time, INSTANCE_ID, + `CS_LINE_TO_FULL_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_req_uuid)) + end if (show_table) begin - `TRACE(3, ("%d: %s table", $time, INSTANCE_ID)); + `TRACE(3, ("%t: %s table", $time, INSTANCE_ID)) for (integer i = 0; i < MSHR_SIZE; ++i) begin if (valid_table[i]) begin - `TRACE(3, (" %0d=0x%0h", i, `CS_LINE_TO_FULL_ADDR(addr_table[i], BANK_ID))); - if (write_table[i]) - `TRACE(3, ("(w)")); - else - `TRACE(3, ("(r)")); - if (next_table[i]) - `TRACE(3, ("->%0d", next_index[i])); + `TRACE(3, (" %0d=0x%0h", i, `CS_LINE_TO_FULL_ADDR(addr_table[i], BANK_ID))) + if (write_table[i]) begin + `TRACE(3, ("(w)")) + end else begin + `TRACE(3, ("(r)")) + end + if (next_table[i]) begin + `TRACE(3, ("->%0d", next_index[i])) + end end end - `TRACE(3, ("\n")); + `TRACE(3, ("\n")) end end `endif diff --git a/hw/rtl/cache/VX_cache_tags.sv b/hw/rtl/cache/VX_cache_tags.sv index 7fef69be6..92497b80b 100644 --- a/hw/rtl/cache/VX_cache_tags.sv +++ b/hw/rtl/cache/VX_cache_tags.sv @@ -69,7 +69,7 @@ module VX_cache_tags #( wire [NUM_WAYS-1:0] read_valid; wire [NUM_WAYS-1:0] read_dirty; - if (NUM_WAYS > 1) begin + if (NUM_WAYS > 1) begin : g_evict_way reg [NUM_WAYS-1:0] evict_way_r; // cyclic assignment of replacement way always @(posedge clk) begin @@ -90,7 +90,7 @@ module VX_cache_tags #( .sel_in (evict_way), .data_out (evict_tag) ); - end else begin + end else begin : g_evict_way_0 `UNUSED_VAR (stall) assign evict_way = 1'b1; assign evict_tag = read_tag; @@ -100,7 +100,7 @@ module VX_cache_tags #( wire fill_s = fill && (!WRITEBACK || ~stall); wire flush_s = flush && (!WRITEBACK || ~stall); - for (genvar i = 0; i < NUM_WAYS; ++i) begin + for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_store wire do_fill = fill_s && evict_way[i]; wire do_flush = flush_s && (!WRITEBACK || way_sel[i]); // flush the whole line in writethrough mode @@ -113,10 +113,10 @@ module VX_cache_tags #( wire [TAG_WIDTH-1:0] line_wdata; wire [TAG_WIDTH-1:0] line_rdata; - if (WRITEBACK) begin + if (WRITEBACK) begin : g_writeback assign line_wdata = {line_valid, write, line_tag}; assign {read_valid[i], read_dirty[i], read_tag[i]} = line_rdata; - end else begin + end else begin : g_writethrough assign line_wdata = {line_valid, line_tag}; assign {read_valid[i], read_tag[i]} = line_rdata; assign read_dirty[i] = 1'b0; @@ -139,7 +139,7 @@ module VX_cache_tags #( ); end - for (genvar i = 0; i < NUM_WAYS; ++i) begin + for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_matches assign tag_matches[i] = read_valid[i] && (line_tag == read_tag[i]); end @@ -149,25 +149,27 @@ module VX_cache_tags #( wire [`CS_LINE_ADDR_WIDTH-1:0] evict_line_addr = {evict_tag, line_sel}; always @(posedge clk) begin if (fill && ~stall) begin - `TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_sel, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID))); + `TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_sel, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID))) end if (init) begin - `TRACE(3, ("%d: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel)); + `TRACE(3, ("%t: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel)) end if (flush && ~stall) begin - `TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_sel, line_sel, evict_dirty)); + `TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_sel, line_sel, evict_dirty)) end if (lookup && ~stall) begin if (tag_matches != 0) begin - if (write) - `TRACE(3, ("%d: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid)); - else - `TRACE(3, ("%d: %s read-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid)); + if (write) begin + `TRACE(3, ("%t: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid)) + end else begin + `TRACE(3, ("%t: %s read-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid)) + end end else begin - if (write) - `TRACE(3, ("%d: %s write-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid)); - else - `TRACE(3, ("%d: %s read-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid)); + if (write) begin + `TRACE(3, ("%t: %s write-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid)) + end else begin + `TRACE(3, ("%t: %s read-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid)) + end end end end diff --git a/hw/rtl/cache/VX_cache_top.sv b/hw/rtl/cache/VX_cache_top.sv index 0959701aa..3fa0e5d65 100644 --- a/hw/rtl/cache/VX_cache_top.sv +++ b/hw/rtl/cache/VX_cache_top.sv @@ -75,7 +75,7 @@ module VX_cache_top import VX_gpu_pkg::*; #( input wire [NUM_REQS-1:0] core_req_rw, input wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen, input wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr, - input wire [NUM_REQS-1:0][`ADDR_TYPE_WIDTH-1:0] core_req_atype, + input wire [NUM_REQS-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] core_req_flags, input wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data, input wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag, output wire [NUM_REQS-1:0] core_req_ready, @@ -117,7 +117,7 @@ module VX_cache_top import VX_gpu_pkg::*; #( assign core_bus_if[i].req_data.rw = core_req_rw[i]; assign core_bus_if[i].req_data.byteen = core_req_byteen[i]; assign core_bus_if[i].req_data.addr = core_req_addr[i]; - assign core_bus_if[i].req_data.atype = core_req_atype[i]; + assign core_bus_if[i].req_data.flags = core_req_flags[i]; assign core_bus_if[i].req_data.data = core_req_data[i]; assign core_bus_if[i].req_data.tag = core_req_tag[i]; assign core_req_ready[i] = core_bus_if[i].req_ready; @@ -139,7 +139,7 @@ module VX_cache_top import VX_gpu_pkg::*; #( assign mem_req_data = mem_bus_if.req_data.data; assign mem_req_tag = mem_bus_if.req_data.tag; assign mem_bus_if.req_ready = mem_req_ready; - `UNUSED_VAR (mem_bus_if.req_data.atype) + `UNUSED_VAR (mem_bus_if.req_data.flags) // Memory response assign mem_bus_if.rsp_valid = mem_rsp_valid; diff --git a/hw/rtl/cache/VX_cache_wrap.sv b/hw/rtl/cache/VX_cache_wrap.sv index 37940297f..0b8a1f3c4 100644 --- a/hw/rtl/cache/VX_cache_wrap.sv +++ b/hw/rtl/cache/VX_cache_wrap.sv @@ -84,12 +84,11 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( `STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter")) - localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE); - localparam CACHE_MEM_TAG_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS; + localparam CACHE_MEM_TAG_WIDTH = `CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, UUID_WIDTH); - localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) : - (NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) : - `CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS)); + localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) : + (NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH, UUID_WIDTH) : + CACHE_MEM_TAG_WIDTH); localparam NC_OR_BYPASS = (NC_ENABLE || PASSTHRU); @@ -103,9 +102,12 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( .TAG_WIDTH (CACHE_MEM_TAG_WIDTH) ) mem_bus_cache_if(); - if (NC_OR_BYPASS) begin + VX_mem_bus_if #( + .DATA_SIZE (LINE_SIZE), + .TAG_WIDTH (MEM_TAG_WIDTH) + ) mem_bus_tmp_if(); - `RESET_RELAY (nc_bypass_reset, reset); + if (NC_OR_BYPASS) begin : g_bypass VX_cache_bypass #( .NUM_REQS (NUM_REQS), @@ -130,51 +132,31 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( .MEM_OUT_BUF (MEM_OUT_BUF) ) cache_bypass ( .clk (clk), - .reset (nc_bypass_reset), + .reset (reset), .core_bus_in_if (core_bus_if), .core_bus_out_if(core_bus_cache_if), .mem_bus_in_if (mem_bus_cache_if), - .mem_bus_out_if (mem_bus_if) + .mem_bus_out_if (mem_bus_tmp_if) ); - end else begin + end else begin : g_no_bypass - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_cache_if `ASSIGN_VX_MEM_BUS_IF (core_bus_cache_if[i], core_bus_if[i]); end - `ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_cache_if); + `ASSIGN_VX_MEM_BUS_IF (mem_bus_tmp_if, mem_bus_cache_if); end - if (PASSTHRU != 0) begin - - for (genvar i = 0; i < NUM_REQS; ++i) begin - `UNUSED_VAR (core_bus_cache_if[i].req_valid) - `UNUSED_VAR (core_bus_cache_if[i].req_data) - assign core_bus_cache_if[i].req_ready = 0; - - assign core_bus_cache_if[i].rsp_valid = 0; - assign core_bus_cache_if[i].rsp_data = '0; - `UNUSED_VAR (core_bus_cache_if[i].rsp_ready) - end - - assign mem_bus_cache_if.req_valid = 0; - assign mem_bus_cache_if.req_data = '0; - `UNUSED_VAR (mem_bus_cache_if.req_ready) - - `UNUSED_VAR (mem_bus_cache_if.rsp_valid) - `UNUSED_VAR (mem_bus_cache_if.rsp_data) - assign mem_bus_cache_if.rsp_ready = 0; - - `ifdef PERF_ENABLE - assign cache_perf = '0; - `endif - - end else begin + if (WRITE_ENABLE) begin : g_mem_bus_if + `ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if); + end else begin : g_mem_bus_if_ro + `ASSIGN_VX_MEM_BUS_RO_IF (mem_bus_if, mem_bus_tmp_if); + end - `RESET_RELAY (cache_reset, reset); + if (PASSTHRU == 0) begin : g_cache VX_cache #( .INSTANCE_ID (INSTANCE_ID), @@ -197,7 +179,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( .MEM_OUT_BUF (NC_OR_BYPASS ? 1 : MEM_OUT_BUF) ) cache ( .clk (clk), - .reset (cache_reset), + .reset (reset), `ifdef PERF_ENABLE .cache_perf (cache_perf), `endif @@ -205,18 +187,41 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( .mem_bus_if (mem_bus_cache_if) ); + end else begin : g_passthru + + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_bus_cache_if + `UNUSED_VAR (core_bus_cache_if[i].req_valid) + `UNUSED_VAR (core_bus_cache_if[i].req_data) + assign core_bus_cache_if[i].req_ready = 0; + + assign core_bus_cache_if[i].rsp_valid = 0; + assign core_bus_cache_if[i].rsp_data = '0; + `UNUSED_VAR (core_bus_cache_if[i].rsp_ready) + end + + assign mem_bus_cache_if.req_valid = 0; + assign mem_bus_cache_if.req_data = '0; + `UNUSED_VAR (mem_bus_cache_if.req_ready) + + `UNUSED_VAR (mem_bus_cache_if.rsp_valid) + `UNUSED_VAR (mem_bus_cache_if.rsp_data) + assign mem_bus_cache_if.rsp_ready = 0; + + `ifdef PERF_ENABLE + assign cache_perf = '0; + `endif + end `ifdef DBG_TRACE_CACHE - - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_trace wire [`UP(UUID_WIDTH)-1:0] core_req_uuid; wire [`UP(UUID_WIDTH)-1:0] core_rsp_uuid; - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_core_rsp_uuid assign core_req_uuid = core_bus_if[i].req_data.tag[TAG_WIDTH-1 -: UUID_WIDTH]; assign core_rsp_uuid = core_bus_if[i].rsp_data.tag[TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin + end else begin : g_no_core_rsp_uuid assign core_req_uuid = 0; assign core_rsp_uuid = 0; end @@ -226,13 +231,14 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( always @(posedge clk) begin if (core_req_fire) begin - if (core_bus_if[i].req_data.rw) - `TRACE(1, ("%d: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid)); - else - `TRACE(1, ("%d: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid)); + if (core_bus_if[i].req_data.rw) begin + `TRACE(1, ("%t: %s core-wr-req: addr=0x%0h, tag=0x%0h, req_idx=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_bus_if[i].req_data.byteen, core_bus_if[i].req_data.data, core_req_uuid)) + end else begin + `TRACE(1, ("%t: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `TO_FULL_ADDR(core_bus_if[i].req_data.addr), core_bus_if[i].req_data.tag, i, core_req_uuid)) + end end if (core_rsp_fire) begin - `TRACE(1, ("%d: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid)); + `TRACE(1, ("%t: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid)) end end end @@ -240,10 +246,10 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( wire [`UP(UUID_WIDTH)-1:0] mem_req_uuid; wire [`UP(UUID_WIDTH)-1:0] mem_rsp_uuid; - if ((UUID_WIDTH != 0) && (NC_OR_BYPASS != 0)) begin + if ((UUID_WIDTH != 0) && (NC_OR_BYPASS != 0)) begin : g_mem_req_uuid assign mem_req_uuid = mem_bus_if.req_data.tag[MEM_TAG_WIDTH-1 -: UUID_WIDTH]; assign mem_rsp_uuid = mem_bus_if.rsp_data.tag[MEM_TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin + end else begin : g_no_mem_req_uuid assign mem_req_uuid = 0; assign mem_rsp_uuid = 0; end @@ -253,16 +259,17 @@ module VX_cache_wrap import VX_gpu_pkg::*; #( always @(posedge clk) begin if (mem_req_fire) begin - if (mem_bus_if.req_data.rw) - `TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=%h, data=0x%h (#%0d)\n", - $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_bus_if.req_data.byteen, mem_bus_if.req_data.data, mem_req_uuid)); - else - `TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n", - $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_req_uuid)); + if (mem_bus_if.req_data.rw) begin + `TRACE(1, ("%t: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", + $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_bus_if.req_data.byteen, mem_bus_if.req_data.data, mem_req_uuid)) + end else begin + `TRACE(1, ("%t: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n", + $time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_req_uuid)) + end end if (mem_rsp_fire) begin - `TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n", - $time, INSTANCE_ID, mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data, mem_rsp_uuid)); + `TRACE(1, ("%t: %s mem-rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n", + $time, INSTANCE_ID, mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data, mem_rsp_uuid)) end end `endif diff --git a/hw/rtl/core/VX_alu_int.sv b/hw/rtl/core/VX_alu_int.sv index 47bfcc6bf..53c7ae57a 100644 --- a/hw/rtl/core/VX_alu_int.sv +++ b/hw/rtl/core/VX_alu_int.sv @@ -71,19 +71,19 @@ module VX_alu_int #( wire [NUM_LANES-1:0][`XLEN-1:0] alu_in2_imm = execute_if.data.op_args.alu.use_imm ? {NUM_LANES{`SEXT(`XLEN, execute_if.data.op_args.alu.imm)}} : alu_in2; wire [NUM_LANES-1:0][`XLEN-1:0] alu_in2_br = (execute_if.data.op_args.alu.use_imm && ~is_br_op) ? {NUM_LANES{`SEXT(`XLEN, execute_if.data.op_args.alu.imm)}} : alu_in2; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_add_result assign add_result[i] = alu_in1_PC[i] + alu_in2_imm[i]; assign add_result_w[i] = `XLEN'($signed(alu_in1[i][31:0] + alu_in2_imm[i][31:0])); end - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_sub_result wire [`XLEN:0] sub_in1 = {is_signed & alu_in1[i][`XLEN-1], alu_in1[i]}; wire [`XLEN:0] sub_in2 = {is_signed & alu_in2_br[i][`XLEN-1], alu_in2_br[i]}; assign sub_result[i] = sub_in1 - sub_in2; assign sub_result_w[i] = `XLEN'($signed(alu_in1[i][31:0] - alu_in2_imm[i][31:0])); end - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_shr_result wire [`XLEN:0] shr_in1 = {is_signed && alu_in1[i][`XLEN-1], alu_in1[i]}; always @(*) begin case (alu_op[1:0]) @@ -102,7 +102,7 @@ module VX_alu_int #( assign shr_result_w[i] = `XLEN'($signed(shr_res_w)); end - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_msc_result always @(*) begin case (alu_op[1:0]) 2'b00: msc_result[i] = alu_in1[i] & alu_in2_imm[i]; // AND @@ -114,7 +114,7 @@ module VX_alu_int #( assign msc_result_w[i] = `XLEN'($signed(alu_in1[i][31:0] << alu_in2_imm[i][4:0])); // SLLW end - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_alu_result wire [`XLEN-1:0] slt_br_result = `XLEN'({is_br_op && ~(| sub_result[i][`XLEN-1:0]), sub_result[i][`XLEN]}); wire [`XLEN-1:0] sub_slt_br_result = (is_sub_op && ~is_br_op) ? sub_result[i][`XLEN-1:0] : slt_br_result; always @(*) begin @@ -141,9 +141,9 @@ module VX_alu_int #( assign cbr_dest = add_result[0][1 +: `PC_BITS]; - if (LANE_BITS != 0) begin + if (LANE_BITS != 0) begin : g_tid assign tid = execute_if.data.tid[0 +: LANE_BITS]; - end else begin + end else begin : g_tid_0 assign tid = 0; end @@ -185,7 +185,7 @@ module VX_alu_int #( .data_out ({branch_ctl_if.valid, branch_ctl_if.wid, branch_ctl_if.taken, branch_ctl_if.dest}) ); - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_commit assign commit_if.data.data[i] = (is_br_op_r && is_br_static) ? {(PC_r + `PC_BITS'(2)), 1'd0} : alu_result_r[i]; end @@ -194,8 +194,8 @@ module VX_alu_int #( `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin if (br_enable) begin - `TRACE(1, ("%d: %s-branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n", - $time, INSTANCE_ID, br_wid, {commit_if.data.PC, 1'b0}, br_taken, {br_dest, 1'b0}, commit_if.data.uuid)); + `TRACE(1, ("%t: %s branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n", + $time, INSTANCE_ID, br_wid, {commit_if.data.PC, 1'b0}, br_taken, {br_dest, 1'b0}, commit_if.data.uuid)) end end `endif diff --git a/hw/rtl/core/VX_alu_muldiv.sv b/hw/rtl/core/VX_alu_muldiv.sv index 3beb035f4..d374013bc 100644 --- a/hw/rtl/core/VX_alu_muldiv.sv +++ b/hw/rtl/core/VX_alu_muldiv.sv @@ -68,7 +68,7 @@ module VX_alu_muldiv #( wire mul_fire_in = mul_valid_in && mul_ready_in; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mul_result_tmp reg [`XLEN-1:0] mul_resultl, mul_resulth; wire [`XLEN-1:0] mul_in1 = is_alu_w ? (execute_if.data.rs1_data[i] & `XLEN'hFFFFFFFF) : execute_if.data.rs1_data[i]; wire [`XLEN-1:0] mul_in2 = is_alu_w ? (execute_if.data.rs2_data[i] & `XLEN'hFFFFFFFF) : execute_if.data.rs2_data[i]; @@ -103,7 +103,7 @@ module VX_alu_muldiv #( wire [NUM_LANES-1:0][`XLEN:0] mul_in1; wire [NUM_LANES-1:0][`XLEN:0] mul_in2; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mul_in assign mul_in1[i] = is_alu_w ? {{(`XLEN-31){execute_if.data.rs1_data[i][31]}}, execute_if.data.rs1_data[i][31:0]} : {is_signed_mul_a && execute_if.data.rs1_data[i][`XLEN-1], execute_if.data.rs1_data[i]}; assign mul_in2[i] = is_alu_w ? {{(`XLEN-31){execute_if.data.rs2_data[i][31]}}, execute_if.data.rs2_data[i][31:0]} : {is_signed_mul_b && execute_if.data.rs2_data[i][`XLEN-1], execute_if.data.rs2_data[i]}; end @@ -149,7 +149,7 @@ module VX_alu_muldiv #( `else - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_multiplier wire [`XLEN:0] mul_in1 = {is_signed_mul_a && execute_if.data.rs1_data[i][`XLEN-1], execute_if.data.rs1_data[i]}; wire [`XLEN:0] mul_in2 = {is_signed_mul_b && execute_if.data.rs2_data[i][`XLEN-1], execute_if.data.rs2_data[i]}; @@ -184,7 +184,7 @@ module VX_alu_muldiv #( `endif - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mul_result_out `ifdef XLEN_64 assign mul_result_out[i] = is_mulh_out ? mul_result_tmp[i][2*(`XLEN)-1:`XLEN] : (is_mul_w_out ? `XLEN'($signed(mul_result_tmp[i][31:0])) : @@ -219,7 +219,7 @@ module VX_alu_muldiv #( wire [NUM_LANES-1:0][`XLEN-1:0] div_in1; wire [NUM_LANES-1:0][`XLEN-1:0] div_in2; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_div_in `ifdef XLEN_64 assign div_in1[i] = is_alu_w ? {{(`XLEN-32){is_signed_op && execute_if.data.rs1_data[i][31]}}, execute_if.data.rs1_data[i][31:0]}: execute_if.data.rs1_data[i]; assign div_in2[i] = is_alu_w ? {{(`XLEN-32){is_signed_op && execute_if.data.rs2_data[i][31]}}, execute_if.data.rs2_data[i][31:0]}: execute_if.data.rs2_data[i]; @@ -234,7 +234,7 @@ module VX_alu_muldiv #( wire [NUM_LANES-1:0][`XLEN-1:0] div_result_in; wire div_fire_in = div_valid_in && div_ready_in; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_div_result_in reg [`XLEN-1:0] div_quotient, div_remainder; always @(*) begin dpi_idiv (div_fire_in, is_signed_op, div_in1[i], div_in2[i], div_quotient, div_remainder); @@ -306,7 +306,7 @@ module VX_alu_muldiv #( assign {div_uuid_out, div_wid_out, div_tmask_out, div_PC_out, div_rd_out, div_wb_out, is_rem_op_out, is_div_w_out, div_pid_out, div_sop_out, div_eop_out} = div_tag_r; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_div_result_out `ifdef XLEN_64 assign div_result_out[i] = is_rem_op_out ? (is_div_w_out ? `XLEN'($signed(div_remainder[i][31:0])) : div_remainder[i]) : (is_div_w_out ? `XLEN'($signed(div_quotient[i][31:0])) : div_quotient[i]); @@ -324,8 +324,8 @@ module VX_alu_muldiv #( VX_stream_arb #( .NUM_INPUTS (2), .DATAW (TAG_WIDTH + (NUM_LANES * `XLEN)), - .ARBITER ("F"), - .OUT_BUF (1) + .ARBITER ("P"), + .OUT_BUF (2) ) rsp_buf ( .clk (clk), .reset (reset), diff --git a/hw/rtl/core/VX_alu_unit.sv b/hw/rtl/core/VX_alu_unit.sv index 86bcaf05e..951cd811b 100644 --- a/hw/rtl/core/VX_alu_unit.sv +++ b/hw/rtl/core/VX_alu_unit.sv @@ -30,20 +30,24 @@ module VX_alu_unit #( `UNUSED_SPARAM (INSTANCE_ID) localparam BLOCK_SIZE = `NUM_ALU_BLOCKS; localparam NUM_LANES = `NUM_ALU_LANES; - localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES); - localparam PID_WIDTH = `UP(PID_BITS); - localparam RSP_ARB_DATAW= `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1; - localparam RSP_ARB_SIZE = 1 + `EXT_M_ENABLED; localparam PARTIAL_BW = (BLOCK_SIZE != `ISSUE_WIDTH) || (NUM_LANES != `NUM_THREADS); + localparam PE_COUNT = 1 + `EXT_M_ENABLED; + localparam PE_SEL_BITS = `CLOG2(PE_COUNT); + localparam PE_IDX_INT = 0; + localparam PE_IDX_MDV = PE_IDX_INT + `EXT_M_ENABLED; VX_execute_if #( .NUM_LANES (NUM_LANES) ) per_block_execute_if[BLOCK_SIZE](); + VX_commit_if #( + .NUM_LANES (NUM_LANES) + ) per_block_commit_if[BLOCK_SIZE](); + VX_dispatch_unit #( .BLOCK_SIZE (BLOCK_SIZE), .NUM_LANES (NUM_LANES), - .OUT_BUF (PARTIAL_BW ? 1 : 0) + .OUT_BUF (PARTIAL_BW ? 3 : 0) ) dispatch_unit ( .clk (clk), .reset (reset), @@ -51,26 +55,38 @@ module VX_alu_unit #( .execute_if (per_block_execute_if) ); - VX_commit_if #( - .NUM_LANES (NUM_LANES) - ) per_block_commit_if[BLOCK_SIZE](); - - for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin - - `RESET_RELAY_EN (block_reset, reset,(BLOCK_SIZE > 1)); - - wire is_muldiv_op = `EXT_M_ENABLED && (per_block_execute_if[block_idx].data.op_args.alu.xtype == `ALU_TYPE_MULDIV); + for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_alus VX_execute_if #( .NUM_LANES (NUM_LANES) - ) int_execute_if(); + ) pe_execute_if[PE_COUNT](); - VX_commit_if #( + VX_commit_if#( .NUM_LANES (NUM_LANES) - ) int_commit_if(); - - assign int_execute_if.valid = per_block_execute_if[block_idx].valid && ~is_muldiv_op; - assign int_execute_if.data = per_block_execute_if[block_idx].data; + ) pe_commit_if[PE_COUNT](); + + reg [`UP(PE_SEL_BITS)-1:0] pe_select; + always @(*) begin + pe_select = PE_IDX_INT; + if (`EXT_M_ENABLED && (per_block_execute_if[block_idx].data.op_args.alu.xtype == `ALU_TYPE_MULDIV)) + pe_select = PE_IDX_MDV; + end + + VX_pe_switch #( + .PE_COUNT (PE_COUNT), + .NUM_LANES (NUM_LANES), + .ARBITER ("R"), + .REQ_OUT_BUF (0), + .RSP_OUT_BUF (PARTIAL_BW ? 1 : 3) + ) pe_switch ( + .clk (clk), + .reset (reset), + .pe_sel (pe_select), + .execute_in_if (per_block_execute_if[block_idx]), + .commit_out_if (per_block_commit_if[block_idx]), + .execute_out_if (pe_execute_if), + .commit_in_if (pe_commit_if) + ); VX_alu_int #( .INSTANCE_ID ($sformatf("%s-int%0d", INSTANCE_ID, block_idx)), @@ -78,76 +94,23 @@ module VX_alu_unit #( .NUM_LANES (NUM_LANES) ) alu_int ( .clk (clk), - .reset (block_reset), - .execute_if (int_execute_if), + .reset (reset), + .execute_if (pe_execute_if[PE_IDX_INT]), .branch_ctl_if (branch_ctl_if[block_idx]), - .commit_if (int_commit_if) + .commit_if (pe_commit_if[PE_IDX_INT]) ); `ifdef EXT_M_ENABLE - - VX_execute_if #( - .NUM_LANES (NUM_LANES) - ) muldiv_execute_if(); - - VX_commit_if #( - .NUM_LANES (NUM_LANES) - ) muldiv_commit_if(); - - assign muldiv_execute_if.valid = per_block_execute_if[block_idx].valid && is_muldiv_op; - assign muldiv_execute_if.data = per_block_execute_if[block_idx].data; - VX_alu_muldiv #( .INSTANCE_ID ($sformatf("%s-muldiv%0d", INSTANCE_ID, block_idx)), .NUM_LANES (NUM_LANES) ) muldiv_unit ( .clk (clk), - .reset (block_reset), - .execute_if (muldiv_execute_if), - .commit_if (muldiv_commit_if) + .reset (reset), + .execute_if (pe_execute_if[PE_IDX_MDV]), + .commit_if (pe_commit_if[PE_IDX_MDV]) ); - `endif - - assign per_block_execute_if[block_idx].ready = - `ifdef EXT_M_ENABLE - is_muldiv_op ? muldiv_execute_if.ready : - `endif - int_execute_if.ready; - - // send response - - VX_stream_arb #( - .NUM_INPUTS (RSP_ARB_SIZE), - .DATAW (RSP_ARB_DATAW), - .OUT_BUF (PARTIAL_BW ? 1 : 3), - .ARBITER ("F") - ) rsp_arb ( - .clk (clk), - .reset (block_reset), - .valid_in ({ - `ifdef EXT_M_ENABLE - muldiv_commit_if.valid, - `endif - int_commit_if.valid - }), - .ready_in ({ - `ifdef EXT_M_ENABLE - muldiv_commit_if.ready, - `endif - int_commit_if.ready - }), - .data_in ({ - `ifdef EXT_M_ENABLE - muldiv_commit_if.data, - `endif - int_commit_if.data - }), - .data_out (per_block_commit_if[block_idx].data), - .valid_out (per_block_commit_if[block_idx].valid), - .ready_out (per_block_commit_if[block_idx].ready), - `UNUSED_PIN (sel_out) - ); end VX_gather_unit #( diff --git a/hw/rtl/core/VX_commit.sv b/hw/rtl/core/VX_commit.sv index d78c2ec89..d2e705674 100644 --- a/hw/rtl/core/VX_commit.sv +++ b/hw/rtl/core/VX_commit.sv @@ -13,7 +13,7 @@ `include "VX_define.vh" -module VX_commit import VX_gpu_pkg::*, VX_trace_pkg::*; #( +module VX_commit import VX_gpu_pkg::*; #( parameter `STRING INSTANCE_ID = "" ) ( input wire clk, @@ -41,28 +41,26 @@ module VX_commit import VX_gpu_pkg::*, VX_trace_pkg::*; #( wire [`ISSUE_WIDTH-1:0][`NUM_THREADS-1:0] per_issue_commit_tmask; wire [`ISSUE_WIDTH-1:0] per_issue_commit_eop; - for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin + for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin : g_commit_arbs wire [`NUM_EX_UNITS-1:0] valid_in; wire [`NUM_EX_UNITS-1:0][DATAW-1:0] data_in; wire [`NUM_EX_UNITS-1:0] ready_in; - for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin + for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin : g_data_in assign valid_in[j] = commit_if[j * `ISSUE_WIDTH + i].valid; assign data_in[j] = commit_if[j * `ISSUE_WIDTH + i].data; assign commit_if[j * `ISSUE_WIDTH + i].ready = ready_in[j]; end - `RESET_RELAY (arb_reset, reset); - VX_stream_arb #( .NUM_INPUTS (`NUM_EX_UNITS), .DATAW (DATAW), - .ARBITER ("R"), + .ARBITER ("P"), .OUT_BUF (1) ) commit_arb ( .clk (clk), - .reset (arb_reset), + .reset (reset), .valid_in (valid_in), .ready_in (ready_in), .data_in (data_in), @@ -86,7 +84,7 @@ module VX_commit import VX_gpu_pkg::*, VX_trace_pkg::*; #( assign commit_fire_any = (| per_issue_commit_fire); - for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin + for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin : g_commit_size wire [COMMIT_SIZEW-1:0] count; `POP_COUNT(count, per_issue_commit_tmask[i]); assign commit_size[i] = count; @@ -162,7 +160,7 @@ module VX_commit import VX_gpu_pkg::*, VX_trace_pkg::*; #( // Writeback - for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin + for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin : g_writeback assign writeback_if[i].valid = commit_arb_if[i].valid && commit_arb_if[i].data.wb; assign writeback_if[i].data.uuid = commit_arb_if[i].data.uuid; assign writeback_if[i].data.wis = wid_to_wis(commit_arb_if[i].data.wid); @@ -176,15 +174,15 @@ module VX_commit import VX_gpu_pkg::*, VX_trace_pkg::*; #( end `ifdef DBG_TRACE_PIPELINE - for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin - for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin + for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin : g_trace + for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin : g_j always @(posedge clk) begin if (commit_if[j * `ISSUE_WIDTH + i].valid && commit_if[j * `ISSUE_WIDTH + i].ready) begin - `TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, commit_if[j * `ISSUE_WIDTH + i].data.wid, {commit_if[j * `ISSUE_WIDTH + i].data.PC, 1'b0})); + `TRACE(1, ("%t: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, commit_if[j * `ISSUE_WIDTH + i].data.wid, {commit_if[j * `ISSUE_WIDTH + i].data.PC, 1'b0})) trace_ex_type(1, j); - `TRACE(1, (", tmask=%b, wb=%0d, rd=%0d, sop=%b, eop=%b, data=", commit_if[j * `ISSUE_WIDTH + i].data.tmask, commit_if[j * `ISSUE_WIDTH + i].data.wb, commit_if[j * `ISSUE_WIDTH + i].data.rd, commit_if[j * `ISSUE_WIDTH + i].data.sop, commit_if[j * `ISSUE_WIDTH + i].data.eop)); - `TRACE_ARRAY1D(1, "0x%0h", commit_if[j * `ISSUE_WIDTH + i].data.data, `NUM_THREADS); - `TRACE(1, (" (#%0d)\n", commit_if[j * `ISSUE_WIDTH + i].data.uuid)); + `TRACE(1, (", tmask=%b, wb=%0d, rd=%0d, sop=%b, eop=%b, data=", commit_if[j * `ISSUE_WIDTH + i].data.tmask, commit_if[j * `ISSUE_WIDTH + i].data.wb, commit_if[j * `ISSUE_WIDTH + i].data.rd, commit_if[j * `ISSUE_WIDTH + i].data.sop, commit_if[j * `ISSUE_WIDTH + i].data.eop)) + `TRACE_ARRAY1D(1, "0x%0h", commit_if[j * `ISSUE_WIDTH + i].data.data, `NUM_THREADS) + `TRACE(1, (" (#%0d)\n", commit_if[j * `ISSUE_WIDTH + i].data.uuid)) end end end diff --git a/hw/rtl/core/VX_core.sv b/hw/rtl/core/VX_core.sv index 4c82db812..260cedca3 100644 --- a/hw/rtl/core/VX_core.sv +++ b/hw/rtl/core/VX_core.sv @@ -75,31 +75,23 @@ module VX_core import VX_gpu_pkg::*; #( assign mem_perf_tmp_if.mem = mem_perf_if.mem; `endif - `RESET_RELAY (dcr_data_reset, reset); - `RESET_RELAY (schedule_reset, reset); - `RESET_RELAY (fetch_reset, reset); - `RESET_RELAY (decode_reset, reset); - `RESET_RELAY (issue_reset, reset); - `RESET_RELAY (execute_reset, reset); - `RESET_RELAY (commit_reset, reset); - base_dcrs_t base_dcrs; VX_dcr_data dcr_data ( .clk (clk), - .reset (dcr_data_reset), + .reset (reset), .dcr_bus_if (dcr_bus_if), .base_dcrs (base_dcrs) ); - `SCOPE_IO_SWITCH (3) + `SCOPE_IO_SWITCH (3); VX_schedule #( .INSTANCE_ID ($sformatf("%s-schedule", INSTANCE_ID)), .CORE_ID (CORE_ID) ) schedule ( .clk (clk), - .reset (schedule_reset), + .reset (reset), `ifdef PERF_ENABLE .sched_perf (pipeline_perf_if.sched), @@ -127,7 +119,7 @@ module VX_core import VX_gpu_pkg::*; #( ) fetch ( `SCOPE_IO_BIND (0) .clk (clk), - .reset (fetch_reset), + .reset (reset), .icache_bus_if (icache_bus_if), .schedule_if (schedule_if), .fetch_if (fetch_if) @@ -137,7 +129,7 @@ module VX_core import VX_gpu_pkg::*; #( .INSTANCE_ID ($sformatf("%s-decode", INSTANCE_ID)) ) decode ( .clk (clk), - .reset (decode_reset), + .reset (reset), .fetch_if (fetch_if), .decode_if (decode_if), .decode_sched_if(decode_sched_if) @@ -149,7 +141,7 @@ module VX_core import VX_gpu_pkg::*; #( `SCOPE_IO_BIND (1) .clk (clk), - .reset (issue_reset), + .reset (reset), `ifdef PERF_ENABLE .issue_perf (pipeline_perf_if.issue), @@ -167,7 +159,7 @@ module VX_core import VX_gpu_pkg::*; #( `SCOPE_IO_BIND (2) .clk (clk), - .reset (execute_reset), + .reset (reset), `ifdef PERF_ENABLE .mem_perf_if (mem_perf_tmp_if), @@ -192,7 +184,7 @@ module VX_core import VX_gpu_pkg::*; #( .INSTANCE_ID ($sformatf("%s-commit", INSTANCE_ID)) ) commit ( .clk (clk), - .reset (commit_reset), + .reset (reset), .commit_if (commit_if), @@ -202,134 +194,18 @@ module VX_core import VX_gpu_pkg::*; #( .commit_sched_if(commit_sched_if) ); - VX_lsu_mem_if #( - .NUM_LANES (`NUM_LSU_LANES), - .DATA_SIZE (LSU_WORD_SIZE), - .TAG_WIDTH (LSU_TAG_WIDTH) - ) lsu_dcache_if[`NUM_LSU_BLOCKS](); - -`ifdef LMEM_ENABLE - - `RESET_RELAY (lmem_unit_reset, reset); - - VX_lmem_unit #( + VX_mem_unit #( .INSTANCE_ID (INSTANCE_ID) - ) lmem_unit ( - .clk (clk), - .reset (lmem_unit_reset), + ) mem_unit ( + .clk (clk), + .reset (reset), `ifdef PERF_ENABLE - .cache_perf (mem_perf_tmp_if.lmem), + .lmem_perf (mem_perf_tmp_if.lmem), `endif - .lsu_mem_in_if (lsu_mem_if), - .lsu_mem_out_if (lsu_dcache_if) + .lsu_mem_if (lsu_mem_if), + .dcache_bus_if (dcache_bus_if) ); -`else - - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin - `ASSIGN_VX_LSU_MEM_IF (lsu_dcache_if[i], lsu_mem_if[i]); - end - -`endif - - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin - - VX_lsu_mem_if #( - .NUM_LANES (DCACHE_CHANNELS), - .DATA_SIZE (DCACHE_WORD_SIZE), - .TAG_WIDTH (DCACHE_TAG_WIDTH) - ) dcache_coalesced_if(); - - if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin - - `RESET_RELAY (mem_coalescer_reset, reset); - - VX_mem_coalescer #( - .INSTANCE_ID ($sformatf("%s-coalescer%0d", INSTANCE_ID, i)), - .NUM_REQS (`NUM_LSU_LANES), - .DATA_IN_SIZE (LSU_WORD_SIZE), - .DATA_OUT_SIZE (DCACHE_WORD_SIZE), - .ADDR_WIDTH (LSU_ADDR_WIDTH), - .ATYPE_WIDTH (`ADDR_TYPE_WIDTH), - .TAG_WIDTH (LSU_TAG_WIDTH), - .UUID_WIDTH (`UUID_WIDTH), - .QUEUE_SIZE (`LSUQ_OUT_SIZE) - ) mem_coalescer ( - .clk (clk), - .reset (mem_coalescer_reset), - - // Input request - .in_req_valid (lsu_dcache_if[i].req_valid), - .in_req_mask (lsu_dcache_if[i].req_data.mask), - .in_req_rw (lsu_dcache_if[i].req_data.rw), - .in_req_byteen (lsu_dcache_if[i].req_data.byteen), - .in_req_addr (lsu_dcache_if[i].req_data.addr), - .in_req_atype (lsu_dcache_if[i].req_data.atype), - .in_req_data (lsu_dcache_if[i].req_data.data), - .in_req_tag (lsu_dcache_if[i].req_data.tag), - .in_req_ready (lsu_dcache_if[i].req_ready), - - // Input response - .in_rsp_valid (lsu_dcache_if[i].rsp_valid), - .in_rsp_mask (lsu_dcache_if[i].rsp_data.mask), - .in_rsp_data (lsu_dcache_if[i].rsp_data.data), - .in_rsp_tag (lsu_dcache_if[i].rsp_data.tag), - .in_rsp_ready (lsu_dcache_if[i].rsp_ready), - - // Output request - .out_req_valid (dcache_coalesced_if.req_valid), - .out_req_mask (dcache_coalesced_if.req_data.mask), - .out_req_rw (dcache_coalesced_if.req_data.rw), - .out_req_byteen (dcache_coalesced_if.req_data.byteen), - .out_req_addr (dcache_coalesced_if.req_data.addr), - .out_req_atype (dcache_coalesced_if.req_data.atype), - .out_req_data (dcache_coalesced_if.req_data.data), - .out_req_tag (dcache_coalesced_if.req_data.tag), - .out_req_ready (dcache_coalesced_if.req_ready), - - // Output response - .out_rsp_valid (dcache_coalesced_if.rsp_valid), - .out_rsp_mask (dcache_coalesced_if.rsp_data.mask), - .out_rsp_data (dcache_coalesced_if.rsp_data.data), - .out_rsp_tag (dcache_coalesced_if.rsp_data.tag), - .out_rsp_ready (dcache_coalesced_if.rsp_ready) - ); - - end else begin - - `ASSIGN_VX_LSU_MEM_IF (dcache_coalesced_if, lsu_dcache_if[i]); - - end - - VX_mem_bus_if #( - .DATA_SIZE (DCACHE_WORD_SIZE), - .TAG_WIDTH (DCACHE_TAG_WIDTH) - ) dcache_bus_tmp_if[DCACHE_CHANNELS](); - - `RESET_RELAY (lsu_adapter_reset, reset); - - VX_lsu_adapter #( - .NUM_LANES (DCACHE_CHANNELS), - .DATA_SIZE (DCACHE_WORD_SIZE), - .TAG_WIDTH (DCACHE_TAG_WIDTH), - .TAG_SEL_BITS (DCACHE_TAG_WIDTH - `UUID_WIDTH), - .ARBITER ("P"), - .REQ_OUT_BUF (0), - .RSP_OUT_BUF (0) - ) lsu_adapter ( - .clk (clk), - .reset (lsu_adapter_reset), - .lsu_mem_if (dcache_coalesced_if), - .mem_bus_if (dcache_bus_tmp_if) - ); - - for (genvar j = 0; j < DCACHE_CHANNELS; ++j) begin - `ASSIGN_VX_MEM_BUS_IF (dcache_bus_if[i * DCACHE_CHANNELS + j], dcache_bus_tmp_if[j]); - end - - end - - `ifdef PERF_ENABLE wire [`CLOG2(LSU_NUM_REQS+1)-1:0] perf_dcache_rd_req_per_cycle; @@ -353,8 +229,8 @@ module VX_core import VX_gpu_pkg::*; #( wire [LSU_NUM_REQS-1:0] perf_dcache_wr_req_fire, perf_dcache_wr_req_fire_r; wire [LSU_NUM_REQS-1:0] perf_dcache_rsp_fire; - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin - for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_perf_dcache + for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin : g_j assign perf_dcache_rd_req_fire[i * `NUM_LSU_LANES + j] = lsu_mem_if[i].req_valid && lsu_mem_if[i].req_data.mask[j] && lsu_mem_if[i].req_ready && ~lsu_mem_if[i].req_data.rw; assign perf_dcache_wr_req_fire[i * `NUM_LSU_LANES + j] = lsu_mem_if[i].req_valid && lsu_mem_if[i].req_data.mask[j] && lsu_mem_if[i].req_ready && lsu_mem_if[i].req_data.rw; assign perf_dcache_rsp_fire[i * `NUM_LSU_LANES + j] = lsu_mem_if[i].rsp_valid && lsu_mem_if[i].rsp_data.mask[j] && lsu_mem_if[i].rsp_ready; diff --git a/hw/rtl/core/VX_core_top.sv b/hw/rtl/core/VX_core_top.sv index 420ae7b67..9ade1c28b 100644 --- a/hw/rtl/core/VX_core_top.sv +++ b/hw/rtl/core/VX_core_top.sv @@ -32,7 +32,7 @@ module VX_core_top import VX_gpu_pkg::*; #( output wire [DCACHE_NUM_REQS-1:0] dcache_req_rw, output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE-1:0] dcache_req_byteen, output wire [DCACHE_NUM_REQS-1:0][DCACHE_ADDR_WIDTH-1:0] dcache_req_addr, - output wire [DCACHE_NUM_REQS-1:0][`ADDR_TYPE_WIDTH-1:0] dcache_req_atype, + output wire [DCACHE_NUM_REQS-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] dcache_req_flags, output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] dcache_req_data, output wire [DCACHE_NUM_REQS-1:0][DCACHE_TAG_WIDTH-1:0] dcache_req_tag, input wire [DCACHE_NUM_REQS-1:0] dcache_req_ready, @@ -96,7 +96,7 @@ module VX_core_top import VX_gpu_pkg::*; #( assign dcache_req_rw[i] = dcache_bus_if[i].req_data.rw; assign dcache_req_byteen[i] = dcache_bus_if[i].req_data.byteen; assign dcache_req_addr[i] = dcache_bus_if[i].req_data.addr; - assign dcache_req_atype[i] = dcache_bus_if[i].req_data.atype; + assign dcache_req_flags[i] = dcache_bus_if[i].req_data.flags; assign dcache_req_data[i] = dcache_bus_if[i].req_data.data; assign dcache_req_tag[i] = dcache_bus_if[i].req_data.tag; assign dcache_bus_if[i].req_ready = dcache_req_ready[i]; @@ -119,7 +119,7 @@ module VX_core_top import VX_gpu_pkg::*; #( assign icache_req_data = icache_bus_if.req_data.data; assign icache_req_tag = icache_bus_if.req_data.tag; assign icache_bus_if.req_ready = icache_req_ready; - `UNUSED_VAR (icache_bus_if.req_data.atype) + `UNUSED_VAR (icache_bus_if.req_data.flags) assign icache_bus_if.rsp_valid = icache_rsp_valid; assign icache_bus_if.rsp_data.tag = icache_rsp_tag; diff --git a/hw/rtl/core/VX_csr_data.sv b/hw/rtl/core/VX_csr_data.sv index a2b0741ad..68bf7f739 100644 --- a/hw/rtl/core/VX_csr_data.sv +++ b/hw/rtl/core/VX_csr_data.sv @@ -83,7 +83,7 @@ import VX_fpu_pkg::*; wire [`NUM_FPU_BLOCKS-1:0][`NW_WIDTH-1:0] fpu_write_wid; fflags_t [`NUM_FPU_BLOCKS-1:0] fpu_write_fflags; - for (genvar i = 0; i < `NUM_FPU_BLOCKS; ++i) begin + for (genvar i = 0; i < `NUM_FPU_BLOCKS; ++i) begin : g_fpu_write assign fpu_write_enable[i] = fpu_csr_if[i].write_enable; assign fpu_write_wid[i] = fpu_csr_if[i].write_wid; assign fpu_write_fflags[i] = fpu_csr_if[i].write_fflags; @@ -107,7 +107,7 @@ import VX_fpu_pkg::*; end end - for (genvar i = 0; i < `NUM_FPU_BLOCKS; ++i) begin + for (genvar i = 0; i < `NUM_FPU_BLOCKS; ++i) begin : g_fpu_csr_read_frm assign fpu_csr_if[i].read_frm = fcsr[fpu_csr_if[i].read_wid][`INST_FRM_BITS+`FP_FLAGS_BITS-1:`FP_FLAGS_BITS]; end @@ -155,41 +155,41 @@ import VX_fpu_pkg::*; // CSRs read ////////////////////////////////////////////////////////////// - reg [`XLEN-1:0] read_data_ro_r; - reg [`XLEN-1:0] read_data_rw_r; - reg read_addr_valid_r; + reg [`XLEN-1:0] read_data_ro_w; + reg [`XLEN-1:0] read_data_rw_w; + reg read_addr_valid_w; always @(*) begin - read_data_ro_r = '0; - read_data_rw_r = '0; - read_addr_valid_r = 1; + read_data_ro_w = '0; + read_data_rw_w = '0; + read_addr_valid_w = 1; case (read_addr) - `VX_CSR_MVENDORID : read_data_ro_r = `XLEN'(`VENDOR_ID); - `VX_CSR_MARCHID : read_data_ro_r = `XLEN'(`ARCHITECTURE_ID); - `VX_CSR_MIMPID : read_data_ro_r = `XLEN'(`IMPLEMENTATION_ID); - `VX_CSR_MISA : read_data_ro_r = `XLEN'({2'(`CLOG2(`XLEN/16)), 30'(`MISA_STD)}); + `VX_CSR_MVENDORID : read_data_ro_w = `XLEN'(`VENDOR_ID); + `VX_CSR_MARCHID : read_data_ro_w = `XLEN'(`ARCHITECTURE_ID); + `VX_CSR_MIMPID : read_data_ro_w = `XLEN'(`IMPLEMENTATION_ID); + `VX_CSR_MISA : read_data_ro_w = `XLEN'({2'(`CLOG2(`XLEN/16)), 30'(`MISA_STD)}); `ifdef EXT_F_ENABLE - `VX_CSR_FFLAGS : read_data_rw_r = `XLEN'(fcsr[read_wid][`FP_FLAGS_BITS-1:0]); - `VX_CSR_FRM : read_data_rw_r = `XLEN'(fcsr[read_wid][`INST_FRM_BITS+`FP_FLAGS_BITS-1:`FP_FLAGS_BITS]); - `VX_CSR_FCSR : read_data_rw_r = `XLEN'(fcsr[read_wid]); + `VX_CSR_FFLAGS : read_data_rw_w = `XLEN'(fcsr[read_wid][`FP_FLAGS_BITS-1:0]); + `VX_CSR_FRM : read_data_rw_w = `XLEN'(fcsr[read_wid][`INST_FRM_BITS+`FP_FLAGS_BITS-1:`FP_FLAGS_BITS]); + `VX_CSR_FCSR : read_data_rw_w = `XLEN'(fcsr[read_wid]); `endif - `VX_CSR_MSCRATCH : read_data_rw_r = mscratch; + `VX_CSR_MSCRATCH : read_data_rw_w = mscratch; - `VX_CSR_WARP_ID : read_data_ro_r = `XLEN'(read_wid); - `VX_CSR_CORE_ID : read_data_ro_r = `XLEN'(CORE_ID); - `VX_CSR_ACTIVE_THREADS: read_data_ro_r = `XLEN'(thread_masks[read_wid]); - `VX_CSR_ACTIVE_WARPS: read_data_ro_r = `XLEN'(active_warps); - `VX_CSR_NUM_THREADS: read_data_ro_r = `XLEN'(`NUM_THREADS); - `VX_CSR_NUM_WARPS : read_data_ro_r = `XLEN'(`NUM_WARPS); - `VX_CSR_NUM_CORES : read_data_ro_r = `XLEN'(`NUM_CORES * `NUM_CLUSTERS); - `VX_CSR_LOCAL_MEM_BASE: read_data_ro_r = `XLEN'(`LMEM_BASE_ADDR); + `VX_CSR_WARP_ID : read_data_ro_w = `XLEN'(read_wid); + `VX_CSR_CORE_ID : read_data_ro_w = `XLEN'(CORE_ID); + `VX_CSR_ACTIVE_THREADS: read_data_ro_w = `XLEN'(thread_masks[read_wid]); + `VX_CSR_ACTIVE_WARPS: read_data_ro_w = `XLEN'(active_warps); + `VX_CSR_NUM_THREADS: read_data_ro_w = `XLEN'(`NUM_THREADS); + `VX_CSR_NUM_WARPS : read_data_ro_w = `XLEN'(`NUM_WARPS); + `VX_CSR_NUM_CORES : read_data_ro_w = `XLEN'(`NUM_CORES * `NUM_CLUSTERS); + `VX_CSR_LOCAL_MEM_BASE: read_data_ro_w = `XLEN'(`LMEM_BASE_ADDR); - `CSR_READ_64(`VX_CSR_MCYCLE, read_data_ro_r, cycles); + `CSR_READ_64(`VX_CSR_MCYCLE, read_data_ro_w, cycles); - `VX_CSR_MPM_RESERVED : read_data_ro_r = 'x; - `VX_CSR_MPM_RESERVED_H : read_data_ro_r = 'x; + `VX_CSR_MPM_RESERVED : read_data_ro_w = 'x; + `VX_CSR_MPM_RESERVED_H : read_data_ro_w = 'x; - `CSR_READ_64(`VX_CSR_MINSTRET, read_data_ro_r, commit_csr_if.instret); + `CSR_READ_64(`VX_CSR_MINSTRET, read_data_ro_w, commit_csr_if.instret); `VX_CSR_SATP, `VX_CSR_MSTATUS, @@ -200,77 +200,77 @@ import VX_fpu_pkg::*; `VX_CSR_MTVEC, `VX_CSR_MEPC, `VX_CSR_PMPCFG0, - `VX_CSR_PMPADDR0 : read_data_ro_r = `XLEN'(0); + `VX_CSR_PMPADDR0 : read_data_ro_w = `XLEN'(0); default: begin - read_addr_valid_r = 0; + read_addr_valid_w = 0; if ((read_addr >= `VX_CSR_MPM_USER && read_addr < (`VX_CSR_MPM_USER + 32)) || (read_addr >= `VX_CSR_MPM_USER_H && read_addr < (`VX_CSR_MPM_USER_H + 32))) begin - read_addr_valid_r = 1; + read_addr_valid_w = 1; `ifdef PERF_ENABLE case (base_dcrs.mpm_class) `VX_DCR_MPM_CLASS_CORE: begin case (read_addr) // PERF: pipeline - `CSR_READ_64(`VX_CSR_MPM_SCHED_ID, read_data_ro_r, pipeline_perf_if.sched.idles); - `CSR_READ_64(`VX_CSR_MPM_SCHED_ST, read_data_ro_r, pipeline_perf_if.sched.stalls); - `CSR_READ_64(`VX_CSR_MPM_IBUF_ST, read_data_ro_r, pipeline_perf_if.issue.ibf_stalls); - `CSR_READ_64(`VX_CSR_MPM_SCRB_ST, read_data_ro_r, pipeline_perf_if.issue.scb_stalls); - `CSR_READ_64(`VX_CSR_MPM_OPDS_ST, read_data_ro_r, pipeline_perf_if.issue.opd_stalls); - `CSR_READ_64(`VX_CSR_MPM_SCRB_ALU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_ALU]); + `CSR_READ_64(`VX_CSR_MPM_SCHED_ID, read_data_ro_w, pipeline_perf_if.sched.idles); + `CSR_READ_64(`VX_CSR_MPM_SCHED_ST, read_data_ro_w, pipeline_perf_if.sched.stalls); + `CSR_READ_64(`VX_CSR_MPM_IBUF_ST, read_data_ro_w, pipeline_perf_if.issue.ibf_stalls); + `CSR_READ_64(`VX_CSR_MPM_SCRB_ST, read_data_ro_w, pipeline_perf_if.issue.scb_stalls); + `CSR_READ_64(`VX_CSR_MPM_OPDS_ST, read_data_ro_w, pipeline_perf_if.issue.opd_stalls); + `CSR_READ_64(`VX_CSR_MPM_SCRB_ALU, read_data_ro_w, pipeline_perf_if.issue.units_uses[`EX_ALU]); `ifdef EXT_F_ENABLE - `CSR_READ_64(`VX_CSR_MPM_SCRB_FPU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_FPU]); + `CSR_READ_64(`VX_CSR_MPM_SCRB_FPU, read_data_ro_w, pipeline_perf_if.issue.units_uses[`EX_FPU]); `else - `CSR_READ_64(`VX_CSR_MPM_SCRB_FPU, read_data_ro_r, `PERF_CTR_BITS'(0)); + `CSR_READ_64(`VX_CSR_MPM_SCRB_FPU, read_data_ro_w, `PERF_CTR_BITS'(0)); `endif - `CSR_READ_64(`VX_CSR_MPM_SCRB_LSU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_LSU]); - `CSR_READ_64(`VX_CSR_MPM_SCRB_SFU, read_data_ro_r, pipeline_perf_if.issue.units_uses[`EX_SFU]); - `CSR_READ_64(`VX_CSR_MPM_SCRB_CSRS, read_data_ro_r, pipeline_perf_if.issue.sfu_uses[`SFU_CSRS]); - `CSR_READ_64(`VX_CSR_MPM_SCRB_WCTL, read_data_ro_r, pipeline_perf_if.issue.sfu_uses[`SFU_WCTL]); + `CSR_READ_64(`VX_CSR_MPM_SCRB_LSU, read_data_ro_w, pipeline_perf_if.issue.units_uses[`EX_LSU]); + `CSR_READ_64(`VX_CSR_MPM_SCRB_SFU, read_data_ro_w, pipeline_perf_if.issue.units_uses[`EX_SFU]); + `CSR_READ_64(`VX_CSR_MPM_SCRB_CSRS, read_data_ro_w, pipeline_perf_if.issue.sfu_uses[`SFU_CSRS]); + `CSR_READ_64(`VX_CSR_MPM_SCRB_WCTL, read_data_ro_w, pipeline_perf_if.issue.sfu_uses[`SFU_WCTL]); // PERF: memory - `CSR_READ_64(`VX_CSR_MPM_IFETCHES, read_data_ro_r, pipeline_perf_if.ifetches); - `CSR_READ_64(`VX_CSR_MPM_LOADS, read_data_ro_r, pipeline_perf_if.loads); - `CSR_READ_64(`VX_CSR_MPM_STORES, read_data_ro_r, pipeline_perf_if.stores); - `CSR_READ_64(`VX_CSR_MPM_IFETCH_LT, read_data_ro_r, pipeline_perf_if.ifetch_latency); - `CSR_READ_64(`VX_CSR_MPM_LOAD_LT, read_data_ro_r, pipeline_perf_if.load_latency); + `CSR_READ_64(`VX_CSR_MPM_IFETCHES, read_data_ro_w, pipeline_perf_if.ifetches); + `CSR_READ_64(`VX_CSR_MPM_LOADS, read_data_ro_w, pipeline_perf_if.loads); + `CSR_READ_64(`VX_CSR_MPM_STORES, read_data_ro_w, pipeline_perf_if.stores); + `CSR_READ_64(`VX_CSR_MPM_IFETCH_LT, read_data_ro_w, pipeline_perf_if.ifetch_latency); + `CSR_READ_64(`VX_CSR_MPM_LOAD_LT, read_data_ro_w, pipeline_perf_if.load_latency); default:; endcase end `VX_DCR_MPM_CLASS_MEM: begin case (read_addr) // PERF: icache - `CSR_READ_64(`VX_CSR_MPM_ICACHE_READS, read_data_ro_r, mem_perf_if.icache.reads); - `CSR_READ_64(`VX_CSR_MPM_ICACHE_MISS_R, read_data_ro_r, mem_perf_if.icache.read_misses); - `CSR_READ_64(`VX_CSR_MPM_ICACHE_MSHR_ST, read_data_ro_r, mem_perf_if.icache.mshr_stalls); + `CSR_READ_64(`VX_CSR_MPM_ICACHE_READS, read_data_ro_w, mem_perf_if.icache.reads); + `CSR_READ_64(`VX_CSR_MPM_ICACHE_MISS_R, read_data_ro_w, mem_perf_if.icache.read_misses); + `CSR_READ_64(`VX_CSR_MPM_ICACHE_MSHR_ST, read_data_ro_w, mem_perf_if.icache.mshr_stalls); // PERF: dcache - `CSR_READ_64(`VX_CSR_MPM_DCACHE_READS, read_data_ro_r, mem_perf_if.dcache.reads); - `CSR_READ_64(`VX_CSR_MPM_DCACHE_WRITES, read_data_ro_r, mem_perf_if.dcache.writes); - `CSR_READ_64(`VX_CSR_MPM_DCACHE_MISS_R, read_data_ro_r, mem_perf_if.dcache.read_misses); - `CSR_READ_64(`VX_CSR_MPM_DCACHE_MISS_W, read_data_ro_r, mem_perf_if.dcache.write_misses); - `CSR_READ_64(`VX_CSR_MPM_DCACHE_BANK_ST, read_data_ro_r, mem_perf_if.dcache.bank_stalls); - `CSR_READ_64(`VX_CSR_MPM_DCACHE_MSHR_ST, read_data_ro_r, mem_perf_if.dcache.mshr_stalls); + `CSR_READ_64(`VX_CSR_MPM_DCACHE_READS, read_data_ro_w, mem_perf_if.dcache.reads); + `CSR_READ_64(`VX_CSR_MPM_DCACHE_WRITES, read_data_ro_w, mem_perf_if.dcache.writes); + `CSR_READ_64(`VX_CSR_MPM_DCACHE_MISS_R, read_data_ro_w, mem_perf_if.dcache.read_misses); + `CSR_READ_64(`VX_CSR_MPM_DCACHE_MISS_W, read_data_ro_w, mem_perf_if.dcache.write_misses); + `CSR_READ_64(`VX_CSR_MPM_DCACHE_BANK_ST, read_data_ro_w, mem_perf_if.dcache.bank_stalls); + `CSR_READ_64(`VX_CSR_MPM_DCACHE_MSHR_ST, read_data_ro_w, mem_perf_if.dcache.mshr_stalls); // PERF: lmem - `CSR_READ_64(`VX_CSR_MPM_LMEM_READS, read_data_ro_r, mem_perf_if.lmem.reads); - `CSR_READ_64(`VX_CSR_MPM_LMEM_WRITES, read_data_ro_r, mem_perf_if.lmem.writes); - `CSR_READ_64(`VX_CSR_MPM_LMEM_BANK_ST, read_data_ro_r, mem_perf_if.lmem.bank_stalls); + `CSR_READ_64(`VX_CSR_MPM_LMEM_READS, read_data_ro_w, mem_perf_if.lmem.reads); + `CSR_READ_64(`VX_CSR_MPM_LMEM_WRITES, read_data_ro_w, mem_perf_if.lmem.writes); + `CSR_READ_64(`VX_CSR_MPM_LMEM_BANK_ST, read_data_ro_w, mem_perf_if.lmem.bank_stalls); // PERF: l2cache - `CSR_READ_64(`VX_CSR_MPM_L2CACHE_READS, read_data_ro_r, mem_perf_if.l2cache.reads); - `CSR_READ_64(`VX_CSR_MPM_L2CACHE_WRITES, read_data_ro_r, mem_perf_if.l2cache.writes); - `CSR_READ_64(`VX_CSR_MPM_L2CACHE_MISS_R, read_data_ro_r, mem_perf_if.l2cache.read_misses); - `CSR_READ_64(`VX_CSR_MPM_L2CACHE_MISS_W, read_data_ro_r, mem_perf_if.l2cache.write_misses); - `CSR_READ_64(`VX_CSR_MPM_L2CACHE_BANK_ST, read_data_ro_r, mem_perf_if.l2cache.bank_stalls); - `CSR_READ_64(`VX_CSR_MPM_L2CACHE_MSHR_ST, read_data_ro_r, mem_perf_if.l2cache.mshr_stalls); + `CSR_READ_64(`VX_CSR_MPM_L2CACHE_READS, read_data_ro_w, mem_perf_if.l2cache.reads); + `CSR_READ_64(`VX_CSR_MPM_L2CACHE_WRITES, read_data_ro_w, mem_perf_if.l2cache.writes); + `CSR_READ_64(`VX_CSR_MPM_L2CACHE_MISS_R, read_data_ro_w, mem_perf_if.l2cache.read_misses); + `CSR_READ_64(`VX_CSR_MPM_L2CACHE_MISS_W, read_data_ro_w, mem_perf_if.l2cache.write_misses); + `CSR_READ_64(`VX_CSR_MPM_L2CACHE_BANK_ST, read_data_ro_w, mem_perf_if.l2cache.bank_stalls); + `CSR_READ_64(`VX_CSR_MPM_L2CACHE_MSHR_ST, read_data_ro_w, mem_perf_if.l2cache.mshr_stalls); // PERF: l3cache - `CSR_READ_64(`VX_CSR_MPM_L3CACHE_READS, read_data_ro_r, mem_perf_if.l3cache.reads); - `CSR_READ_64(`VX_CSR_MPM_L3CACHE_WRITES, read_data_ro_r, mem_perf_if.l3cache.writes); - `CSR_READ_64(`VX_CSR_MPM_L3CACHE_MISS_R, read_data_ro_r, mem_perf_if.l3cache.read_misses); - `CSR_READ_64(`VX_CSR_MPM_L3CACHE_MISS_W, read_data_ro_r, mem_perf_if.l3cache.write_misses); - `CSR_READ_64(`VX_CSR_MPM_L3CACHE_BANK_ST, read_data_ro_r, mem_perf_if.l3cache.bank_stalls); - `CSR_READ_64(`VX_CSR_MPM_L3CACHE_MSHR_ST, read_data_ro_r, mem_perf_if.l3cache.mshr_stalls); + `CSR_READ_64(`VX_CSR_MPM_L3CACHE_READS, read_data_ro_w, mem_perf_if.l3cache.reads); + `CSR_READ_64(`VX_CSR_MPM_L3CACHE_WRITES, read_data_ro_w, mem_perf_if.l3cache.writes); + `CSR_READ_64(`VX_CSR_MPM_L3CACHE_MISS_R, read_data_ro_w, mem_perf_if.l3cache.read_misses); + `CSR_READ_64(`VX_CSR_MPM_L3CACHE_MISS_W, read_data_ro_w, mem_perf_if.l3cache.write_misses); + `CSR_READ_64(`VX_CSR_MPM_L3CACHE_BANK_ST, read_data_ro_w, mem_perf_if.l3cache.bank_stalls); + `CSR_READ_64(`VX_CSR_MPM_L3CACHE_MSHR_ST, read_data_ro_w, mem_perf_if.l3cache.mshr_stalls); // PERF: memory - `CSR_READ_64(`VX_CSR_MPM_MEM_READS, read_data_ro_r, mem_perf_if.mem.reads); - `CSR_READ_64(`VX_CSR_MPM_MEM_WRITES, read_data_ro_r, mem_perf_if.mem.writes); - `CSR_READ_64(`VX_CSR_MPM_MEM_LT, read_data_ro_r, mem_perf_if.mem.latency); + `CSR_READ_64(`VX_CSR_MPM_MEM_READS, read_data_ro_w, mem_perf_if.mem.reads); + `CSR_READ_64(`VX_CSR_MPM_MEM_WRITES, read_data_ro_w, mem_perf_if.mem.writes); + `CSR_READ_64(`VX_CSR_MPM_MEM_LT, read_data_ro_w, mem_perf_if.mem.latency); default:; endcase end @@ -282,12 +282,12 @@ import VX_fpu_pkg::*; endcase end - assign read_data_ro = read_data_ro_r; - assign read_data_rw = read_data_rw_r; + assign read_data_ro = read_data_ro_w; + assign read_data_rw = read_data_rw_w; `UNUSED_VAR (base_dcrs) - `RUNTIME_ASSERT(~read_enable || read_addr_valid_r, ("%t: *** invalid CSR read address: 0x%0h (#%0d)", $time, read_addr, read_uuid)) + `RUNTIME_ASSERT(~read_enable || read_addr_valid_w, ("%t: *** invalid CSR read address: 0x%0h (#%0d)", $time, read_addr, read_uuid)) `ifdef PERF_ENABLE `UNUSED_VAR (mem_perf_if.icache); diff --git a/hw/rtl/core/VX_csr_unit.sv b/hw/rtl/core/VX_csr_unit.sv index 999c9c416..be4f7321d 100644 --- a/hw/rtl/core/VX_csr_unit.sv +++ b/hw/rtl/core/VX_csr_unit.sv @@ -66,7 +66,7 @@ module VX_csr_unit import VX_gpu_pkg::*; #( wire [NUM_LANES-1:0][`XLEN-1:0] rs1_data; `UNUSED_VAR (rs1_data) - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_rs1_data assign rs1_data[i] = execute_if.data.rs1_data[i]; end @@ -113,12 +113,15 @@ module VX_csr_unit import VX_gpu_pkg::*; #( wire [NUM_LANES-1:0][`XLEN-1:0] wtid, gtid; - for (genvar i = 0; i < NUM_LANES; ++i) begin - if (PID_BITS != 0) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_wtid + if (PID_BITS != 0) begin : g_pid assign wtid[i] = `XLEN'(execute_if.data.pid * NUM_LANES + i); - end else begin + end else begin : g_no_pid assign wtid[i] = `XLEN'(i); end + end + + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_gtid assign gtid[i] = (`XLEN'(CORE_ID) << (`NW_BITS + `NT_BITS)) + (`XLEN'(execute_if.data.wid) << `NT_BITS) + wtid[i]; end diff --git a/hw/rtl/core/VX_dcr_data.sv b/hw/rtl/core/VX_dcr_data.sv index 4ac137547..042c87e55 100644 --- a/hw/rtl/core/VX_dcr_data.sv +++ b/hw/rtl/core/VX_dcr_data.sv @@ -13,7 +13,7 @@ `include "VX_define.vh" -module VX_dcr_data import VX_gpu_pkg::*, VX_trace_pkg::*; ( +module VX_dcr_data import VX_gpu_pkg::*; ( input wire clk, input wire reset, @@ -50,9 +50,9 @@ module VX_dcr_data import VX_gpu_pkg::*, VX_trace_pkg::*; ( `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin if (dcr_bus_if.write_valid) begin - `TRACE(1, ("%d: base-dcr: state=", $time)); + `TRACE(1, ("%t: base-dcr: state=", $time)) trace_base_dcr(1, dcr_bus_if.write_addr); - `TRACE(1, (", data=0x%h\n", dcr_bus_if.write_data)); + `TRACE(1, (", data=0x%h\n", dcr_bus_if.write_data)) end end `endif diff --git a/hw/rtl/core/VX_decode.sv b/hw/rtl/core/VX_decode.sv index 9660859ce..70bb181a1 100644 --- a/hw/rtl/core/VX_decode.sv +++ b/hw/rtl/core/VX_decode.sv @@ -15,19 +15,19 @@ `ifdef EXT_F_ENABLE `define USED_IREG(x) \ - x``_r = {1'b0, ``x}; \ + x``_v = {1'b0, ``x}; \ use_``x = 1 `define USED_FREG(x) \ - x``_r = {1'b1, ``x}; \ + x``_v = {1'b1, ``x}; \ use_``x = 1 `else `define USED_IREG(x) \ - x``_r = ``x; \ + x``_v = ``x; \ use_``x = 1 `endif -module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #( +module VX_decode import VX_gpu_pkg::*; #( parameter `STRING INSTANCE_ID = "" ) ( input wire clk, @@ -50,7 +50,7 @@ module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #( reg [`EX_BITS-1:0] ex_type; reg [`INST_OP_BITS-1:0] op_type; op_args_t op_args; - reg [`NR_BITS-1:0] rd_r, rs1_r, rs2_r, rs3_r; + reg [`NR_BITS-1:0] rd_v, rs1_v, rs2_v, rs3_v; reg use_rd, use_rs1, use_rs2, use_rs3; reg is_wstall; @@ -152,13 +152,13 @@ module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #( always @(*) begin - ex_type = '0; + ex_type = 'x; op_type = 'x; op_args = 'x; - rd_r = '0; - rs1_r = '0; - rs2_r = '0; - rs3_r = '0; + rd_v = '0; + rs1_v = '0; + rs2_v = '0; + rs3_v = '0; use_rd = 0; use_rs1 = 0; use_rs2 = 0; @@ -376,14 +376,16 @@ module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #( `USED_IREG (rs2); end `ifdef EXT_F_ENABLE - `INST_FMADD, - `INST_FMSUB, - `INST_FNMSUB, - `INST_FNMADD: begin + `INST_FMADD, // 7'b1000011 + `INST_FMSUB, // 7'b1000111 + `INST_FNMSUB, // 7'b1001011 + `INST_FNMADD: // 7'b1001111 + begin ex_type = `EX_FPU; - op_type = `INST_OP_BITS'({2'b11, opcode[3:2]}); + op_type = `INST_OP_BITS'({2'b00, 1'b1, opcode[3]}); op_args.fpu.frm = func3; op_args.fpu.fmt[0] = func2[0]; // float / double + op_args.fpu.fmt[1] = opcode[3] ^ opcode[2]; // SUB use_rd = 1; `USED_FREG (rd); `USED_FREG (rs1); @@ -399,9 +401,10 @@ module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #( case (func5) 5'b00000, // FADD 5'b00001, // FSUB - 5'b00010, // FMUL - 5'b00011: begin // FDIV - op_type = `INST_OP_BITS'(func5[1:0]); + 5'b00010: // FMUL + begin + op_type = `INST_OP_BITS'({2'b00, 1'b0, func5[1]}); + op_args.fpu.fmt[1] = func5[0]; // SUB `USED_FREG (rd); `USED_FREG (rs1); `USED_FREG (rs2); @@ -430,6 +433,13 @@ module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #( `USED_FREG (rs1); end `endif + 5'b00011: begin + // FDIV + op_type = `INST_OP_BITS'(`INST_FPU_DIV); + `USED_FREG (rd); + `USED_FREG (rs1); + `USED_FREG (rs2); + end 5'b01011: begin // FSQRT op_type = `INST_OP_BITS'(`INST_FPU_SQRT); @@ -527,7 +537,7 @@ module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #( end // disable write to integer register r0 - wire wb = use_rd && (rd_r != 0); + wire wb = use_rd && (rd_v != 0); VX_elastic_buffer #( .DATAW (DATAW), @@ -537,7 +547,7 @@ module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #( .reset (reset), .valid_in (fetch_if.valid), .ready_in (fetch_if.ready), - .data_in ({fetch_if.data.uuid, fetch_if.data.wid, fetch_if.data.tmask, fetch_if.data.PC, ex_type, op_type, op_args, wb, rd_r, rs1_r, rs2_r, rs3_r}), + .data_in ({fetch_if.data.uuid, fetch_if.data.wid, fetch_if.data.tmask, fetch_if.data.PC, ex_type, op_type, op_args, wb, rd_v, rs1_v, rs2_v, rs3_v}), .data_out ({decode_if.data.uuid, decode_if.data.wid, decode_if.data.tmask, decode_if.data.PC, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_args, decode_if.data.wb, decode_if.data.rd, decode_if.data.rs1, decode_if.data.rs2, decode_if.data.rs3}), .valid_out (decode_if.valid), .ready_out (decode_if.ready) @@ -547,9 +557,10 @@ module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #( wire fetch_fire = fetch_if.valid && fetch_if.ready; - assign decode_sched_if.valid = fetch_fire; - assign decode_sched_if.wid = fetch_if.data.wid; - assign decode_sched_if.is_wstall = is_wstall; + assign decode_sched_if.valid = fetch_fire; + assign decode_sched_if.wid = fetch_if.data.wid; + assign decode_sched_if.unlock = ~is_wstall; + `ifndef L1_ENABLE assign fetch_if.ibuf_pop = decode_if.ibuf_pop; `endif @@ -557,14 +568,14 @@ module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #( `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin if (decode_if.valid && decode_if.ready) begin - `TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, instr=0x%0h, ex=", $time, INSTANCE_ID, decode_if.data.wid, {decode_if.data.PC, 1'd0}, instr)); + `TRACE(1, ("%t: %s: wid=%0d, PC=0x%0h, instr=0x%0h, ex=", $time, INSTANCE_ID, decode_if.data.wid, {decode_if.data.PC, 1'd0}, instr)) trace_ex_type(1, decode_if.data.ex_type); - `TRACE(1, (", op=")); + `TRACE(1, (", op=")) trace_ex_op(1, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_args); `TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, opds=%b%b%b%b", - decode_if.data.tmask, decode_if.data.wb, decode_if.data.rd, decode_if.data.rs1, decode_if.data.rs2, decode_if.data.rs3, use_rd, use_rs1, use_rs2, use_rs3)); + decode_if.data.tmask, decode_if.data.wb, decode_if.data.rd, decode_if.data.rs1, decode_if.data.rs2, decode_if.data.rs3, use_rd, use_rs1, use_rs2, use_rs3)) trace_op_args(1, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_args); - `TRACE(1, (" (#%0d)\n", decode_if.data.uuid)); + `TRACE(1, (" (#%0d)\n", decode_if.data.uuid)) end end `endif diff --git a/hw/rtl/core/VX_dispatch.sv b/hw/rtl/core/VX_dispatch.sv index 8ea3a6125..1c24fe46d 100644 --- a/hw/rtl/core/VX_dispatch.sv +++ b/hw/rtl/core/VX_dispatch.sv @@ -33,7 +33,7 @@ module VX_dispatch import VX_gpu_pkg::*; #( localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + `INST_OP_BITS + `INST_ARGS_BITS + 1 + `NR_BITS + (3 * `NUM_THREADS * `XLEN) + `NT_WIDTH; wire [`NUM_THREADS-1:0][`NT_WIDTH-1:0] tids; - for (genvar i = 0; i < `NUM_THREADS; ++i) begin + for (genvar i = 0; i < `NUM_THREADS; ++i) begin : g_tids assign tids[i] = `NT_WIDTH'(i); end @@ -50,23 +50,19 @@ module VX_dispatch import VX_gpu_pkg::*; #( `UNUSED_PIN (valid_out) ); - wire [`NUM_EX_UNITS-1:0] operands_reset; - assign operands_if.ready = operands_reset[operands_if.data.ex_type]; - - for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin - - `RESET_RELAY (buffer_reset, reset); + wire [`NUM_EX_UNITS-1:0] operands_ready_in; + assign operands_if.ready = operands_ready_in[operands_if.data.ex_type]; + for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin : g_buffers VX_elastic_buffer #( .DATAW (DATAW), .SIZE (2), - .OUT_REG (2), // 2-cycle EB for area reduction - .LUTRAM (1) + .OUT_REG (1) ) buffer ( .clk (clk), - .reset (buffer_reset), + .reset (reset), .valid_in (operands_if.valid && (operands_if.data.ex_type == `EX_BITS'(i))), - .ready_in (operands_reset[i]), + .ready_in (operands_ready_in[i]), .data_in ({ operands_if.data.uuid, operands_if.data.wis, @@ -92,7 +88,7 @@ module VX_dispatch import VX_gpu_pkg::*; #( wire operands_if_stall = operands_if.valid && ~operands_if.ready; - for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin + for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin : g_perf_stalls always @(posedge clk) begin if (reset) begin perf_stalls_r[i] <= '0; diff --git a/hw/rtl/core/VX_dispatch_unit.sv b/hw/rtl/core/VX_dispatch_unit.sv index 618ea1221..5d37d0578 100644 --- a/hw/rtl/core/VX_dispatch_unit.sv +++ b/hw/rtl/core/VX_dispatch_unit.sv @@ -49,13 +49,12 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( wire [`ISSUE_WIDTH-1:0][IN_DATAW-1:0] dispatch_data; wire [`ISSUE_WIDTH-1:0] dispatch_ready; - for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin + for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin : g_dispatch_data assign dispatch_valid[i] = dispatch_if[i].valid; assign dispatch_data[i] = dispatch_if[i].data; assign dispatch_if[i].ready = dispatch_ready[i]; end - wire [BLOCK_SIZE-1:0][ISSUE_W-1:0] issue_indices; wire [BLOCK_SIZE-1:0] block_ready; wire [BLOCK_SIZE-1:0][NUM_LANES-1:0] block_tmask; wire [BLOCK_SIZE-1:0][2:0][NUM_LANES-1:0][`XLEN-1:0] block_regs; @@ -66,30 +65,53 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( wire batch_done = (& block_done); + // batch select logic + logic [BATCH_COUNT_W-1:0] batch_idx; - if (BATCH_COUNT != 1) begin + + if (BATCH_COUNT != 1) begin : g_batch_idx + wire [BATCH_COUNT_W-1:0] batch_idx_n; + wire [BATCH_COUNT-1:0] valid_batches; + for (genvar i = 0; i < BATCH_COUNT; ++i) begin : g_valid_batches + assign valid_batches[i] = | dispatch_valid[i * BLOCK_SIZE +: BLOCK_SIZE]; + end + + VX_generic_arbiter #( + .NUM_REQS (BATCH_COUNT), + .TYPE ("P") + ) batch_sel ( + .clk (clk), + .reset (reset), + .requests (valid_batches), + .grant_index (batch_idx_n), + `UNUSED_PIN (grant_onehot), + `UNUSED_PIN (grant_valid), + .grant_ready (batch_done) + ); + always @(posedge clk) begin if (reset) begin batch_idx <= '0; - end else begin - batch_idx <= batch_idx + BATCH_COUNT_W'(batch_done); + end else if (batch_done) begin + batch_idx <= batch_idx_n; end end - end else begin + end else begin : g_batch_idx_0 assign batch_idx = 0; `UNUSED_VAR (batch_done) end - for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin - - wire [ISSUE_W-1:0] issue_idx = ISSUE_W'(batch_idx * BLOCK_SIZE) + ISSUE_W'(block_idx); - assign issue_indices[block_idx] = issue_idx; + wire [BLOCK_SIZE-1:0][ISSUE_W-1:0] issue_indices; + for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_issue_indices + assign issue_indices[block_idx] = ISSUE_W'(batch_idx * BLOCK_SIZE) + ISSUE_W'(block_idx); + end - `RESET_RELAY_EN (block_reset, reset, (BLOCK_SIZE > 1)); + for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_blocks + wire [ISSUE_W-1:0] issue_idx = issue_indices[block_idx]; wire valid_p, ready_p; - if (`NUM_THREADS != NUM_LANES) begin + if (`NUM_THREADS > NUM_LANES) begin : g_partial_threads reg [NUM_PACKETS-1:0] sent_mask_p; wire [PID_WIDTH-1:0] start_p_n, start_p, end_p; wire dispatch_valid_r; @@ -102,7 +124,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( wire fire_eop = fire_p && is_last_p; always @(posedge clk) begin - if (block_reset) begin + if (reset) begin sent_mask_p <= '0; is_first_p <= 1; end else begin @@ -124,8 +146,8 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs2_data = dispatch_data[issue_idx][DATA_REGS_OFF + 1 * `NUM_THREADS * `XLEN +: `NUM_THREADS * `XLEN]; wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs3_data = dispatch_data[issue_idx][DATA_REGS_OFF + 0 * `NUM_THREADS * `XLEN +: `NUM_THREADS * `XLEN]; - for (genvar i = 0; i < NUM_PACKETS; ++i) begin - for (genvar j = 0; j < NUM_LANES; ++j) begin + for (genvar i = 0; i < NUM_PACKETS; ++i) begin : g_per_packet_data + for (genvar j = 0; j < NUM_LANES; ++j) begin : g_j localparam k = i * NUM_LANES + j; assign per_packet_tmask[i][j] = dispatch_tmask[k]; assign per_packet_regs[i][0][j] = dispatch_rs1_data[k]; @@ -135,10 +157,12 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( end wire [NUM_PACKETS-1:0] packet_valids; - wire [NUM_PACKETS-1:0][PID_WIDTH-1:0] packet_ids; - - for (genvar i = 0; i < NUM_PACKETS; ++i) begin + for (genvar i = 0; i < NUM_PACKETS; ++i) begin : g_packet_valids assign packet_valids[i] = (| per_packet_tmask[i]); + end + + wire [NUM_PACKETS-1:0][PID_WIDTH-1:0] packet_ids; + for (genvar i = 0; i < NUM_PACKETS; ++i) begin : g_packet_ids assign packet_ids[i] = PID_WIDTH'(i); end @@ -187,13 +211,13 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( assign block_pid[block_idx] = start_p; assign block_sop[block_idx] = is_first_p; assign block_eop[block_idx] = is_last_p; - if (FANOUT_ENABLE) begin + if (FANOUT_ENABLE) begin : g_block_ready_fanout assign block_ready[block_idx] = dispatch_valid_r && ready_p && block_enable; - end else begin + end else begin : g_block_ready assign block_ready[block_idx] = ready_p && block_enable; end - assign block_done[block_idx] = ~dispatch_valid[issue_idx] || fire_eop; - end else begin + assign block_done[block_idx] = fire_eop || ~dispatch_valid[issue_idx]; + end else begin : g_full_threads assign valid_p = dispatch_valid[issue_idx]; assign block_tmask[block_idx] = dispatch_data[issue_idx][DATA_TMASK_OFF +: `NUM_THREADS]; assign block_regs[block_idx][0] = dispatch_data[issue_idx][DATA_REGS_OFF + 2 * `NUM_THREADS * `XLEN +: `NUM_THREADS * `XLEN]; @@ -203,29 +227,31 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( assign block_sop[block_idx] = 1'b1; assign block_eop[block_idx] = 1'b1; assign block_ready[block_idx] = ready_p; - assign block_done[block_idx] = ~valid_p || ready_p; + assign block_done[block_idx] = ready_p || ~valid_p; end wire [ISSUE_ISW_W-1:0] isw; - if (BATCH_COUNT != 1) begin - if (BLOCK_SIZE != 1) begin + if (BATCH_COUNT != 1) begin : g_isw_batch + if (BLOCK_SIZE != 1) begin : g_block assign isw = {batch_idx, BLOCK_SIZE_W'(block_idx)}; - end else begin + end else begin : g_no_block assign isw = batch_idx; end - end else begin + end else begin : g_isw assign isw = block_idx; end wire [`NW_WIDTH-1:0] block_wid = wis_to_wid(dispatch_data[issue_idx][DATA_TMASK_OFF+`NUM_THREADS +: ISSUE_WIS_W], isw); + logic [OUT_DATAW-1:0] execute_data, execute_data_w; + VX_elastic_buffer #( .DATAW (OUT_DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)) ) buf_out ( .clk (clk), - .reset (block_reset), + .reset (reset), .valid_in (valid_p), .ready_in (ready_p), .data_in ({ @@ -239,17 +265,27 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #( block_pid[block_idx], block_sop[block_idx], block_eop[block_idx]}), - .data_out (execute_if[block_idx].data), + .data_out (execute_data), .valid_out (execute_if[block_idx].valid), .ready_out (execute_if[block_idx].ready) ); + + if (`NUM_THREADS != NUM_LANES) begin : g_execute_data_w_partial + assign execute_data_w = execute_data; + end else begin : g_execute_data_w_full + always @(*) begin + execute_data_w = execute_data; + execute_data_w[2:0] = {1'b0, 1'b1, 1'b1}; // default pid, sop, and eop + end + end + assign execute_if[block_idx].data = execute_data_w; end reg [`ISSUE_WIDTH-1:0] ready_in; always @(*) begin ready_in = 0; - for (integer i = 0; i < BLOCK_SIZE; ++i) begin - ready_in[issue_indices[i]] = block_ready[i] && block_eop[i]; + for (integer block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin + ready_in[issue_indices[block_idx]] = block_ready[block_idx] && block_eop[block_idx]; end end assign dispatch_ready = ready_in; diff --git a/hw/rtl/core/VX_execute.sv b/hw/rtl/core/VX_execute.sv index ded25918c..4f66757f1 100644 --- a/hw/rtl/core/VX_execute.sv +++ b/hw/rtl/core/VX_execute.sv @@ -51,41 +51,35 @@ module VX_execute import VX_gpu_pkg::*; #( VX_fpu_csr_if fpu_csr_if[`NUM_FPU_BLOCKS](); `endif - `RESET_RELAY (alu_reset, reset); - `RESET_RELAY (lsu_reset, reset); - `RESET_RELAY (sfu_reset, reset); - VX_alu_unit #( .INSTANCE_ID ($sformatf("%s-alu", INSTANCE_ID)) ) alu_unit ( .clk (clk), - .reset (alu_reset), + .reset (reset), .dispatch_if (dispatch_if[`EX_ALU * `ISSUE_WIDTH +: `ISSUE_WIDTH]), .commit_if (commit_if[`EX_ALU * `ISSUE_WIDTH +: `ISSUE_WIDTH]), .branch_ctl_if (branch_ctl_if) ); - `SCOPE_IO_SWITCH (1) + `SCOPE_IO_SWITCH (1); VX_lsu_unit #( .INSTANCE_ID ($sformatf("%s-lsu", INSTANCE_ID)) ) lsu_unit ( `SCOPE_IO_BIND (0) .clk (clk), - .reset (lsu_reset), + .reset (reset), .dispatch_if (dispatch_if[`EX_LSU * `ISSUE_WIDTH +: `ISSUE_WIDTH]), .commit_if (commit_if[`EX_LSU * `ISSUE_WIDTH +: `ISSUE_WIDTH]), .lsu_mem_if (lsu_mem_if) ); `ifdef EXT_F_ENABLE - `RESET_RELAY (fpu_reset, reset); - VX_fpu_unit #( .INSTANCE_ID ($sformatf("%s-fpu", INSTANCE_ID)) ) fpu_unit ( .clk (clk), - .reset (fpu_reset), + .reset (reset), .dispatch_if (dispatch_if[`EX_FPU * `ISSUE_WIDTH +: `ISSUE_WIDTH]), .commit_if (commit_if[`EX_FPU * `ISSUE_WIDTH +: `ISSUE_WIDTH]), .fpu_csr_if (fpu_csr_if) @@ -97,7 +91,7 @@ module VX_execute import VX_gpu_pkg::*; #( .CORE_ID (CORE_ID) ) sfu_unit ( .clk (clk), - .reset (sfu_reset), + .reset (reset), `ifdef PERF_ENABLE .mem_perf_if (mem_perf_if), .pipeline_perf_if (pipeline_perf_if), diff --git a/hw/rtl/core/VX_fetch.sv b/hw/rtl/core/VX_fetch.sv index 043a87939..cf862aa06 100644 --- a/hw/rtl/core/VX_fetch.sv +++ b/hw/rtl/core/VX_fetch.sv @@ -71,7 +71,7 @@ module VX_fetch import VX_gpu_pkg::*; #( // This resolves potential deadlock if ibuffer fills and the LSU stalls the execute stage due to pending dcache requests. // This issue is particularly prevalent when the icache and dcache are disabled and both requests share the same bus. wire [`NUM_WARPS-1:0] pending_ibuf_full; - for (genvar i = 0; i < `NUM_WARPS; ++i) begin + for (genvar i = 0; i < `NUM_WARPS; ++i) begin : g_pending_reads VX_pending_size #( .SIZE (`IBUF_SIZE) ) pending_reads ( @@ -116,9 +116,9 @@ module VX_fetch import VX_gpu_pkg::*; #( .ready_out (icache_bus_if.req_ready) ); - assign icache_bus_if.req_data.atype = '0; + assign icache_bus_if.req_data.flags = '0; assign icache_bus_if.req_data.rw = 0; - assign icache_bus_if.req_data.byteen = 4'b1111; + assign icache_bus_if.req_data.byteen = '1; assign icache_bus_if.req_data.data = '0; // Icache Response @@ -131,47 +131,57 @@ module VX_fetch import VX_gpu_pkg::*; #( assign fetch_if.data.uuid = rsp_uuid; assign icache_bus_if.rsp_ready = fetch_if.ready; +`ifdef SCOPE `ifdef DBG_SCOPE_FETCH + `SCOPE_IO_SWITCH (1); wire schedule_fire = schedule_if.valid && schedule_if.ready; - wire icache_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready; - VX_scope_tap #( - .SCOPE_ID (1), - .TRIGGERW (4), - .PROBEW (`UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + - ICACHE_TAG_WIDTH + ICACHE_WORD_SIZE + ICACHE_ADDR_WIDTH + - (ICACHE_WORD_SIZE*8) + ICACHE_TAG_WIDTH) - ) scope_tap ( - .clk (clk), - .reset (scope_reset), - .start (1'b0), - .stop (1'b0), - .triggers ({ - reset, + wire icache_bus_req_fire = icache_bus_if.req_valid && icache_bus_if.req_ready; + wire icache_bus_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready; + wire [`UUID_WIDTH-1:0] icache_bus_req_uuid = icache_bus_if.req_data.tag[ICACHE_TAG_WIDTH-1 -: `UUID_WIDTH]; + wire [`UUID_WIDTH-1:0] icache_bus_rsp_uuid = icache_bus_if.rsp_data.tag[ICACHE_TAG_WIDTH-1 -: `UUID_WIDTH]; + `NEG_EDGE (reset_negedge, reset); + `SCOPE_TAP_EX (0, 1, 6, 3, ( + `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + + `UUID_WIDTH + ICACHE_WORD_SIZE + ICACHE_ADDR_WIDTH + + `UUID_WIDTH + (ICACHE_WORD_SIZE * 8) + ), { + schedule_if.valid, + schedule_if.ready, + icache_bus_if.req_valid, + icache_bus_if.req_ready, + icache_bus_if.rsp_valid, + icache_bus_if.rsp_ready + }, { schedule_fire, - icache_req_fire, - icache_rsp_fire - }), - .probes ({ + icache_bus_req_fire, + icache_bus_rsp_fire + },{ schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC, - icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr, - icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag - }), - .bus_in (scope_bus_in), - .bus_out (scope_bus_out) + icache_bus_req_uuid, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr, + icache_bus_rsp_uuid, icache_bus_if.rsp_data.data + }, + reset_negedge, 1'b0, 4096 ); `else - `SCOPE_IO_UNUSED() + `SCOPE_IO_UNUSED(0) +`endif +`endif +`ifdef CHIPSCOPE + ila_fetch ila_fetch_inst ( + .clk (clk), + .probe0 ({schedule_if.valid, schedule_if.data, schedule_if.ready}), + .probe1 ({icache_bus_if.req_valid, icache_bus_if.req_data, icache_bus_if.req_ready}), + .probe2 ({icache_bus_if.rsp_valid, icache_bus_if.rsp_data, icache_bus_if.rsp_ready}) + ); `endif `ifdef DBG_TRACE_MEM - wire schedule_fire = schedule_if.valid && schedule_if.ready; - wire fetch_fire = fetch_if.valid && fetch_if.ready; always @(posedge clk) begin - if (schedule_fire) begin - `TRACE(1, ("%d: %s req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, INSTANCE_ID, schedule_if.data.wid, {schedule_if.data.PC, 1'b0}, schedule_if.data.tmask, schedule_if.data.uuid)); + if (schedule_if.valid && schedule_if.ready) begin + `TRACE(1, ("%t: %s req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, INSTANCE_ID, schedule_if.data.wid, {schedule_if.data.PC, 1'b0}, schedule_if.data.tmask, schedule_if.data.uuid)) end - if (fetch_fire) begin - `TRACE(1, ("%d: %s rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, INSTANCE_ID, fetch_if.data.wid, {fetch_if.data.PC, 1'b0}, fetch_if.data.tmask, fetch_if.data.instr, fetch_if.data.uuid)); + if (fetch_if.valid && fetch_if.ready) begin + `TRACE(1, ("%t: %s rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, INSTANCE_ID, fetch_if.data.wid, {fetch_if.data.PC, 1'b0}, fetch_if.data.tmask, fetch_if.data.instr, fetch_if.data.uuid)) end end `endif diff --git a/hw/rtl/core/VX_fpu_unit.sv b/hw/rtl/core/VX_fpu_unit.sv index 496b24e29..1565f3728 100644 --- a/hw/rtl/core/VX_fpu_unit.sv +++ b/hw/rtl/core/VX_fpu_unit.sv @@ -41,7 +41,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( VX_dispatch_unit #( .BLOCK_SIZE (BLOCK_SIZE), .NUM_LANES (NUM_LANES), - .OUT_BUF (PARTIAL_BW ? 1 : 0) + .OUT_BUF (PARTIAL_BW ? 3 : 0) ) dispatch_unit ( .clk (clk), .reset (reset), @@ -53,12 +53,10 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( .NUM_LANES (NUM_LANES) ) per_block_commit_if[BLOCK_SIZE](); - for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin + for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_fpus `UNUSED_VAR (per_block_execute_if[block_idx].data.tid) `UNUSED_VAR (per_block_execute_if[block_idx].data.wb) - `RESET_RELAY_EN (block_reset, reset, (BLOCK_SIZE > 1)); - // Store request info wire fpu_req_valid, fpu_req_ready; wire fpu_rsp_valid, fpu_rsp_ready; @@ -71,9 +69,9 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( wire [NUM_LANES-1:0] fpu_rsp_tmask; wire [`PC_BITS-1:0] fpu_rsp_PC; wire [`NR_BITS-1:0] fpu_rsp_rd; - wire [PID_WIDTH-1:0] fpu_rsp_pid; - wire fpu_rsp_sop; - wire fpu_rsp_eop; + wire [PID_WIDTH-1:0] fpu_rsp_pid, fpu_rsp_pid_u; + wire fpu_rsp_sop, fpu_rsp_sop_u; + wire fpu_rsp_eop, fpu_rsp_eop_u; wire [TAG_WIDTH-1:0] fpu_req_tag, fpu_rsp_tag; wire mdata_full; @@ -89,17 +87,30 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( .SIZE (`FPUQ_SIZE) ) tag_store ( .clk (clk), - .reset (block_reset), + .reset (reset), .acquire_en (execute_fire), .write_addr (fpu_req_tag), .write_data ({per_block_execute_if[block_idx].data.uuid, per_block_execute_if[block_idx].data.wid, per_block_execute_if[block_idx].data.tmask, per_block_execute_if[block_idx].data.PC, per_block_execute_if[block_idx].data.rd, per_block_execute_if[block_idx].data.pid, per_block_execute_if[block_idx].data.sop, per_block_execute_if[block_idx].data.eop}), - .read_data ({fpu_rsp_uuid, fpu_rsp_wid, fpu_rsp_tmask, fpu_rsp_PC, fpu_rsp_rd, fpu_rsp_pid, fpu_rsp_sop, fpu_rsp_eop}), + .read_data ({fpu_rsp_uuid, fpu_rsp_wid, fpu_rsp_tmask, fpu_rsp_PC, fpu_rsp_rd, fpu_rsp_pid_u, fpu_rsp_sop_u, fpu_rsp_eop_u}), .read_addr (fpu_rsp_tag), .release_en (fpu_rsp_fire), .full (mdata_full), `UNUSED_PIN (empty) ); + if (PID_BITS != 0) begin : g_fpu_rsp_pid + assign fpu_rsp_pid = fpu_rsp_pid_u; + assign fpu_rsp_sop = fpu_rsp_sop_u; + assign fpu_rsp_eop = fpu_rsp_eop_u; + end else begin : g_no_fpu_rsp_pid + `UNUSED_VAR (fpu_rsp_pid_u) + `UNUSED_VAR (fpu_rsp_sop_u) + `UNUSED_VAR (fpu_rsp_eop_u) + assign fpu_rsp_pid = 0; + assign fpu_rsp_sop = 1; + assign fpu_rsp_eop = 1; + end + // resolve dynamic FRM from CSR wire [`INST_FRM_BITS-1:0] fpu_req_frm; `ASSIGN_BLOCKED_WID (fpu_csr_if[block_idx].read_wid, per_block_execute_if[block_idx].data.wid, block_idx, `NUM_FPU_BLOCKS) @@ -119,7 +130,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( .OUT_BUF (PARTIAL_BW ? 1 : 3) ) fpu_dpi ( .clk (clk), - .reset (block_reset), + .reset (reset), .valid_in (fpu_req_valid), .mask_in (per_block_execute_if[block_idx].data.tmask), @@ -148,7 +159,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( .OUT_BUF (PARTIAL_BW ? 1 : 3) ) fpu_fpnew ( .clk (clk), - .reset (block_reset), + .reset (reset), .valid_in (fpu_req_valid), .mask_in (per_block_execute_if[block_idx].data.tmask), @@ -177,7 +188,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( .OUT_BUF (PARTIAL_BW ? 1 : 3) ) fpu_dsp ( .clk (clk), - .reset (block_reset), + .reset (reset), .valid_in (fpu_req_valid), .mask_in (per_block_execute_if[block_idx].data.tmask), @@ -200,27 +211,38 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( `endif - // handle FPU response - + // handle CSR update fflags_t fpu_rsp_fflags_q; - if (PID_BITS != 0) begin + if (PID_BITS != 0) begin : g_pid fflags_t fpu_rsp_fflags_r; always @(posedge clk) begin - if (block_reset) begin + if (reset) begin fpu_rsp_fflags_r <= '0; end else if (fpu_rsp_fire) begin fpu_rsp_fflags_r <= fpu_rsp_eop ? '0 : (fpu_rsp_fflags_r | fpu_rsp_fflags); end end assign fpu_rsp_fflags_q = fpu_rsp_fflags_r | fpu_rsp_fflags; - end else begin + end else begin : g_no_pid assign fpu_rsp_fflags_q = fpu_rsp_fflags; end - assign fpu_csr_if[block_idx].write_enable = fpu_rsp_fire && fpu_rsp_eop && fpu_rsp_has_fflags; - `ASSIGN_BLOCKED_WID (fpu_csr_if[block_idx].write_wid, fpu_rsp_wid, block_idx, `NUM_FPU_BLOCKS) - assign fpu_csr_if[block_idx].write_fflags = fpu_rsp_fflags_q; + VX_fpu_csr_if fpu_csr_tmp_if(); + assign fpu_csr_tmp_if.write_enable = fpu_rsp_fire && fpu_rsp_eop && fpu_rsp_has_fflags; + `ASSIGN_BLOCKED_WID (fpu_csr_tmp_if.write_wid, fpu_rsp_wid, block_idx, `NUM_FPU_BLOCKS) + assign fpu_csr_tmp_if.write_fflags = fpu_rsp_fflags_q; + + VX_pipe_register #( + .DATAW (1 + `NW_WIDTH + $bits(fflags_t)), + .RESETW (1) + ) fpu_csr_reg ( + .clk (clk), + .reset (reset), + .enable (1'b1), + .data_in ({fpu_csr_tmp_if.write_enable, fpu_csr_tmp_if.write_wid, fpu_csr_tmp_if.write_fflags}), + .data_out ({fpu_csr_if[block_idx].write_enable, fpu_csr_if[block_idx].write_wid, fpu_csr_if[block_idx].write_fflags}) + ); // send response @@ -229,7 +251,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #( .SIZE (0) ) rsp_buf ( .clk (clk), - .reset (block_reset), + .reset (reset), .valid_in (fpu_rsp_valid), .ready_in (fpu_rsp_ready), .data_in ({fpu_rsp_uuid, fpu_rsp_wid, fpu_rsp_tmask, fpu_rsp_PC, fpu_rsp_rd, fpu_rsp_result, fpu_rsp_pid, fpu_rsp_sop, fpu_rsp_eop}), diff --git a/hw/rtl/core/VX_gather_unit.sv b/hw/rtl/core/VX_gather_unit.sv index 293495eba..284d5c167 100644 --- a/hw/rtl/core/VX_gather_unit.sv +++ b/hw/rtl/core/VX_gather_unit.sv @@ -41,17 +41,17 @@ module VX_gather_unit import VX_gpu_pkg::*; #( wire [BLOCK_SIZE-1:0] commit_in_ready; wire [BLOCK_SIZE-1:0][ISSUE_ISW_W-1:0] commit_in_isw; - for (genvar i = 0; i < BLOCK_SIZE; ++i) begin + for (genvar i = 0; i < BLOCK_SIZE; ++i) begin : g_commit_in assign commit_in_valid[i] = commit_in_if[i].valid; assign commit_in_data[i] = commit_in_if[i].data; assign commit_in_if[i].ready = commit_in_ready[i]; - if (BLOCK_SIZE != `ISSUE_WIDTH) begin - if (BLOCK_SIZE != 1) begin + if (BLOCK_SIZE != `ISSUE_WIDTH) begin : g_commit_in_isw_partial + if (BLOCK_SIZE != 1) begin : g_block assign commit_in_isw[i] = {commit_in_data[i][DATA_WIS_OFF+BLOCK_SIZE_W +: (ISSUE_ISW_W-BLOCK_SIZE_W)], BLOCK_SIZE_W'(i)}; - end else begin + end else begin : g_no_block assign commit_in_isw[i] = commit_in_data[i][DATA_WIS_OFF +: ISSUE_ISW_W]; end - end else begin + end else begin : g_commit_in_isw_full assign commit_in_isw[i] = BLOCK_SIZE_W'(i); end end @@ -70,11 +70,12 @@ module VX_gather_unit import VX_gpu_pkg::*; #( commit_out_data[commit_in_isw[i]] = commit_in_data[i]; end end - for (genvar i = 0; i < BLOCK_SIZE; ++i) begin + + for (genvar i = 0; i < BLOCK_SIZE; ++i) begin : g_commit_in_ready assign commit_in_ready[i] = commit_out_ready[commit_in_isw[i]]; end - for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin + for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin: g_out_bufs VX_commit_if #( .NUM_LANES (NUM_LANES) ) commit_tmp_if(); @@ -94,31 +95,31 @@ module VX_gather_unit import VX_gpu_pkg::*; #( .ready_out (commit_tmp_if.ready) ); - logic [`NUM_THREADS-1:0] commit_tmask_r; - logic [`NUM_THREADS-1:0][`XLEN-1:0] commit_data_r; - if (PID_BITS != 0) begin + logic [`NUM_THREADS-1:0] commit_tmask_w; + logic [`NUM_THREADS-1:0][`XLEN-1:0] commit_data_w; + if (PID_BITS != 0) begin : g_commit_data_with_pid always @(*) begin - commit_tmask_r = '0; - commit_data_r = 'x; + commit_tmask_w = '0; + commit_data_w = 'x; for (integer j = 0; j < NUM_LANES; ++j) begin - commit_tmask_r[commit_tmp_if.data.pid * NUM_LANES + j] = commit_tmp_if.data.tmask[j]; - commit_data_r[commit_tmp_if.data.pid * NUM_LANES + j] = commit_tmp_if.data.data[j]; + commit_tmask_w[commit_tmp_if.data.pid * NUM_LANES + j] = commit_tmp_if.data.tmask[j]; + commit_data_w[commit_tmp_if.data.pid * NUM_LANES + j] = commit_tmp_if.data.data[j]; end end - end else begin - assign commit_tmask_r = commit_tmp_if.data.tmask; - assign commit_data_r = commit_tmp_if.data.data; + end else begin : g_commit_data_no_pid + assign commit_tmask_w = commit_tmp_if.data.tmask; + assign commit_data_w = commit_tmp_if.data.data; end assign commit_out_if[i].valid = commit_tmp_if.valid; assign commit_out_if[i].data = { commit_tmp_if.data.uuid, commit_tmp_if.data.wid, - commit_tmask_r, + commit_tmask_w, commit_tmp_if.data.PC, commit_tmp_if.data.wb, commit_tmp_if.data.rd, - commit_data_r, + commit_data_w, 1'b0, // PID commit_tmp_if.data.sop, commit_tmp_if.data.eop diff --git a/hw/rtl/core/VX_ibuffer.sv b/hw/rtl/core/VX_ibuffer.sv index e8edf64c7..e1a9457de 100644 --- a/hw/rtl/core/VX_ibuffer.sv +++ b/hw/rtl/core/VX_ibuffer.sv @@ -35,7 +35,7 @@ module VX_ibuffer import VX_gpu_pkg::*; #( wire [PER_ISSUE_WARPS-1:0] ibuf_ready_in; assign decode_if.ready = ibuf_ready_in[decode_if.data.wid]; - for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin + for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : g_instr_bufs VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`IBUF_SIZE), diff --git a/hw/rtl/core/VX_ipdom_stack.sv b/hw/rtl/core/VX_ipdom_stack.sv index 0ec05cbae..ded232f30 100644 --- a/hw/rtl/core/VX_ipdom_stack.sv +++ b/hw/rtl/core/VX_ipdom_stack.sv @@ -48,9 +48,9 @@ module VX_ipdom_stack #( empty_r <= 1; full_r <= 0; end else begin - `ASSERT(~push || ~full, ("runtime error: writing to a full stack!")); - `ASSERT(~pop || ~empty, ("runtime error: reading an empty stack!")); - `ASSERT(~push || ~pop, ("runtime error: push and pop in same cycle not supported!")); + `ASSERT(~push || ~full, ("%t: runtime error: writing to a full stack!", $time)); + `ASSERT(~pop || ~empty, ("%t: runtime error: reading an empty stack!", $time)); + `ASSERT(~push || ~pop, ("%t: runtime error: push and pop in same cycle not supported!", $time)); if (push) begin rd_ptr <= wr_ptr; wr_ptr <= wr_ptr + ADDRW'(1); diff --git a/hw/rtl/core/VX_issue.sv b/hw/rtl/core/VX_issue.sv index 1480e6649..84bcc0072 100644 --- a/hw/rtl/core/VX_issue.sv +++ b/hw/rtl/core/VX_issue.sv @@ -29,16 +29,17 @@ module VX_issue import VX_gpu_pkg::*; #( VX_writeback_if.slave writeback_if [`ISSUE_WIDTH], VX_dispatch_if.master dispatch_if [`NUM_EX_UNITS * `ISSUE_WIDTH] ); + `STATIC_ASSERT ((`ISSUE_WIDTH <= `NUM_WARPS), ("invalid parameter")) `ifdef PERF_ENABLE issue_perf_t per_issue_perf [`ISSUE_WIDTH]; `PERF_COUNTER_ADD (issue_perf, per_issue_perf, ibf_stalls, `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2)) `PERF_COUNTER_ADD (issue_perf, per_issue_perf, scb_stalls, `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2)) `PERF_COUNTER_ADD (issue_perf, per_issue_perf, opd_stalls, `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2)) - for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin + for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin : g_issue_perf_units_uses `PERF_COUNTER_ADD (issue_perf, per_issue_perf, units_uses[i], `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2)) end - for (genvar i = 0; i < `NUM_SFU_UNITS; ++i) begin + for (genvar i = 0; i < `NUM_SFU_UNITS; ++i) begin : g_issue_perf_sfu_uses `PERF_COUNTER_ADD (issue_perf, per_issue_perf, sfu_uses[i], `PERF_CTR_BITS, `ISSUE_WIDTH, (`ISSUE_WIDTH > 2)) end `endif @@ -49,9 +50,9 @@ module VX_issue import VX_gpu_pkg::*; #( wire [`ISSUE_WIDTH-1:0] decode_ready_in; assign decode_if.ready = decode_ready_in[decode_isw]; - `SCOPE_IO_SWITCH (`ISSUE_WIDTH) + `SCOPE_IO_SWITCH (`ISSUE_WIDTH); - for (genvar issue_id = 0; issue_id < `ISSUE_WIDTH; ++issue_id) begin : issue_slices + for (genvar issue_id = 0; issue_id < `ISSUE_WIDTH; ++issue_id) begin : g_issue_slices VX_decode_if #( .NUM_WARPS (PER_ISSUE_WARPS) ) per_issue_decode_if(); @@ -76,15 +77,13 @@ module VX_issue import VX_gpu_pkg::*; #( assign decode_if.ibuf_pop[issue_id * PER_ISSUE_WARPS +: PER_ISSUE_WARPS] = per_issue_decode_if.ibuf_pop; `endif - `RESET_RELAY (slice_reset, reset); - VX_issue_slice #( .INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, issue_id)), .ISSUE_ID (issue_id) ) issue_slice ( `SCOPE_IO_BIND(issue_id) .clk (clk), - .reset (slice_reset), + .reset (reset), `ifdef PERF_ENABLE .issue_perf (per_issue_perf[issue_id]), `endif @@ -94,7 +93,7 @@ module VX_issue import VX_gpu_pkg::*; #( ); // Assign transposed dispatch_if - for (genvar ex_id = 0; ex_id < `NUM_EX_UNITS; ++ex_id) begin + for (genvar ex_id = 0; ex_id < `NUM_EX_UNITS; ++ex_id) begin : g_dispatch_if `ASSIGN_VX_IF(dispatch_if[ex_id * `ISSUE_WIDTH + issue_id], per_issue_dispatch_if[ex_id]); end end diff --git a/hw/rtl/core/VX_issue_slice.sv b/hw/rtl/core/VX_issue_slice.sv index 03b91b5fe..f287525c7 100644 --- a/hw/rtl/core/VX_issue_slice.sv +++ b/hw/rtl/core/VX_issue_slice.sv @@ -13,7 +13,7 @@ `include "VX_define.vh" -module VX_issue_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( +module VX_issue_slice import VX_gpu_pkg::*; #( parameter `STRING INSTANCE_ID = "", parameter ISSUE_ID = 0 ) ( @@ -36,16 +36,11 @@ module VX_issue_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( VX_scoreboard_if scoreboard_if(); VX_operands_if operands_if(); - `RESET_RELAY (ibuf_reset, reset); - `RESET_RELAY (scoreboard_reset, reset); - `RESET_RELAY (operands_reset, reset); - `RESET_RELAY (dispatch_reset, reset); - VX_ibuffer #( .INSTANCE_ID ($sformatf("%s-ibuffer", INSTANCE_ID)) ) ibuffer ( .clk (clk), - .reset (ibuf_reset), + .reset (reset), `ifdef PERF_ENABLE .perf_stalls (issue_perf.ibf_stalls), `endif @@ -57,7 +52,7 @@ module VX_issue_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( .INSTANCE_ID ($sformatf("%s-scoreboard", INSTANCE_ID)) ) scoreboard ( .clk (clk), - .reset (scoreboard_reset), + .reset (reset), `ifdef PERF_ENABLE .perf_stalls (issue_perf.scb_stalls), .perf_units_uses(issue_perf.units_uses), @@ -72,7 +67,7 @@ module VX_issue_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( .INSTANCE_ID ($sformatf("%s-operands", INSTANCE_ID)) ) operands ( .clk (clk), - .reset (operands_reset), + .reset (reset), `ifdef PERF_ENABLE .perf_stalls (issue_perf.opd_stalls), `endif @@ -85,7 +80,7 @@ module VX_issue_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( .INSTANCE_ID ($sformatf("%s-dispatch", INSTANCE_ID)) ) dispatch ( .clk (clk), - .reset (dispatch_reset), + .reset (reset), `ifdef PERF_ENABLE `UNUSED_PIN (perf_stalls), `endif @@ -93,65 +88,86 @@ module VX_issue_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( .dispatch_if (dispatch_if) ); +`ifdef SCOPE `ifdef DBG_SCOPE_ISSUE - wire operands_if_fire = operands_if.valid && operands_if.ready; - wire operands_if_not_ready = ~operands_if.ready; - wire writeback_if_valid = writeback_if.valid; - VX_scope_tap #( - .SCOPE_ID (2), - .TRIGGERW (4), - .PROBEW (`UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS + - 1 + `NR_BITS + (`NUM_THREADS * 3 * `XLEN) + - `UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1) - ) scope_tap ( - .clk (clk), - .reset (scope_reset), - .start (1'b0), - .stop (1'b0), - .triggers ({ - reset, - operands_if_fire, - operands_if_not_ready, - writeback_if_valid - }), - .probes ({ + `SCOPE_IO_SWITCH (1); + wire decode_fire = decode_if.valid && decode_if.ready; + wire operands_fire = operands_if.valid && operands_if.ready; + `NEG_EDGE (reset_negedge, reset); + `SCOPE_TAP_EX (0, 2, 4, 3, ( + `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + `EX_BITS + `INST_OP_BITS + 1 + `NR_BITS * 4 + + `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + `EX_BITS + `INST_OP_BITS + 1 + `NR_BITS + (3 * `XLEN) + + `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * `XLEN) + 1 + ), { + decode_if.valid, + decode_if.ready, + operands_if.valid, + operands_if.ready + }, { + decode_fire, + operands_fire, + writeback_if.valid // ack-free + }, { + decode_if.data.uuid, + decode_if.data.wid, + decode_if.data.tmask, + decode_if.data.PC, + decode_if.data.ex_type, + decode_if.data.op_type, + decode_if.data.wb, + decode_if.data.rd, + decode_if.data.rs1, + decode_if.data.rs2, + decode_if.data.rs3, operands_if.data.uuid, + operands_if.data.wis, operands_if.data.tmask, + operands_if.data.PC, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.wb, operands_if.data.rd, - operands_if.data.rs1_data, - operands_if.data.rs2_data, - operands_if.data.rs3_data, + operands_if.data.rs1_data[0], + operands_if.data.rs2_data[0], + operands_if.data.rs3_data[0], writeback_if.data.uuid, + writeback_if.data.wis, writeback_if.data.tmask, writeback_if.data.rd, writeback_if.data.data, writeback_if.data.eop - }), - .bus_in (scope_bus_in), - .bus_out (scope_bus_out) + }, + reset_negedge, 1'b0, 4096 ); `else - `SCOPE_IO_UNUSED() + `SCOPE_IO_UNUSED(0) +`endif +`endif +`ifdef CHIPSCOPE + ila_issue ila_issue_inst ( + .clk (clk), + .probe0 ({decode_if.valid, decode_if.data, decode_if.ready}), + .probe1 ({scoreboard_if.valid, scoreboard_if.data, scoreboard_if.ready}), + .probe2 ({operands_if.valid, operands_if.data, operands_if.ready}), + .probe3 ({writeback_if.valid, writeback_if.data}) + ); `endif `ifdef DBG_TRACE_PIPELINE always @(posedge clk) begin if (operands_if.valid && operands_if.ready) begin - `TRACE(1, ("%d: %s wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(operands_if.data.wis, ISSUE_ID), {operands_if.data.PC, 1'b0})); + `TRACE(1, ("%t: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(operands_if.data.wis, ISSUE_ID), {operands_if.data.PC, 1'b0})) trace_ex_type(1, operands_if.data.ex_type); - `TRACE(1, (", op=")); + `TRACE(1, (", op=")) trace_ex_op(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args); - `TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1_data=", operands_if.data.tmask, operands_if.data.wb, operands_if.data.rd)); - `TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs1_data, `NUM_THREADS); - `TRACE(1, (", rs2_data=")); - `TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs2_data, `NUM_THREADS); - `TRACE(1, (", rs3_data=")); - `TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs3_data, `NUM_THREADS); + `TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1_data=", operands_if.data.tmask, operands_if.data.wb, operands_if.data.rd)) + `TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs1_data, `NUM_THREADS) + `TRACE(1, (", rs2_data=")) + `TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs2_data, `NUM_THREADS) + `TRACE(1, (", rs3_data=")) + `TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs3_data, `NUM_THREADS) trace_op_args(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args); - `TRACE(1, (" (#%0d)\n", operands_if.data.uuid)); + `TRACE(1, (" (#%0d)\n", operands_if.data.uuid)) end end `endif diff --git a/hw/rtl/core/VX_issue_top.sv b/hw/rtl/core/VX_issue_top.sv index 0166cf770..2d81ee044 100644 --- a/hw/rtl/core/VX_issue_top.sv +++ b/hw/rtl/core/VX_issue_top.sv @@ -80,7 +80,7 @@ module VX_issue_top import VX_gpu_pkg::*; #( assign decode_if.data.rs3 = decode_rs3; assign decode_ready = decode_if.ready; - for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin + for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin : g_writeback_if assign writeback_if[i].valid = writeback_valid[i]; assign writeback_if[i].data.uuid = writeback_uuid[i]; assign writeback_if[i].data.wis = writeback_wis[i]; @@ -92,7 +92,7 @@ module VX_issue_top import VX_gpu_pkg::*; #( assign writeback_if[i].data.eop = writeback_eop[i]; end - for (genvar i = 0; i < `NUM_EX_UNITS * `ISSUE_WIDTH; ++i) begin + for (genvar i = 0; i < `NUM_EX_UNITS * `ISSUE_WIDTH; ++i) begin : g_dispatch_if assign dispatch_valid[i] = dispatch_if[i].valid; assign dispatch_uuid[i] = dispatch_if[i].data.uuid; assign dispatch_wis[i] = dispatch_if[i].data.wis; @@ -113,6 +113,13 @@ module VX_issue_top import VX_gpu_pkg::*; #( issue_perf_t issue_perf = '0; `endif +`ifdef SCOPE + wire [0:0] scope_reset_w = 1'b0; + wire [0:0] scope_bus_in_w = 1'b0; + wire [0:0] scope_bus_out_w; + `UNUSED_VAR (scope_bus_out_w) +`endif + VX_issue #( .INSTANCE_ID (INSTANCE_ID) ) issue ( diff --git a/hw/rtl/core/VX_lmem_unit.sv b/hw/rtl/core/VX_lmem_unit.sv deleted file mode 100644 index accb7a586..000000000 --- a/hw/rtl/core/VX_lmem_unit.sv +++ /dev/null @@ -1,201 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -`include "VX_define.vh" - -module VX_lmem_unit import VX_gpu_pkg::*; #( - parameter `STRING INSTANCE_ID = "" -) ( - input wire clk, - input wire reset, - -`ifdef PERF_ENABLE - output cache_perf_t cache_perf, -`endif - - VX_lsu_mem_if.slave lsu_mem_in_if [`NUM_LSU_BLOCKS], - VX_lsu_mem_if.master lsu_mem_out_if [`NUM_LSU_BLOCKS] -); - `STATIC_ASSERT(`IS_DIVISBLE((1 << `LMEM_LOG_SIZE), `MEM_BLOCK_SIZE), ("invalid parameter")) - `STATIC_ASSERT(0 == (`LMEM_BASE_ADDR % (1 << `LMEM_LOG_SIZE)), ("invalid parameter")) - - localparam REQ_DATAW = `NUM_LSU_LANES + 1 + `NUM_LSU_LANES * (LSU_WORD_SIZE + LSU_ADDR_WIDTH + `ADDR_TYPE_WIDTH + LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH; - localparam RSP_DATAW = `NUM_LSU_LANES + `NUM_LSU_LANES * (LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH; - localparam LMEM_ADDR_WIDTH = `LMEM_LOG_SIZE - `CLOG2(LSU_WORD_SIZE); - - VX_lsu_mem_if #( - .NUM_LANES (`NUM_LSU_LANES), - .DATA_SIZE (LSU_WORD_SIZE), - .TAG_WIDTH (LSU_TAG_WIDTH) - ) lsu_switch_if[`NUM_LSU_BLOCKS](); - - `RESET_RELAY_EX (block_reset, reset, `NUM_LSU_BLOCKS, 1); - - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin - - wire [`NUM_LSU_LANES-1:0] is_addr_local_mask; - for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin - assign is_addr_local_mask[j] = lsu_mem_in_if[i].req_data.atype[j][`ADDR_TYPE_LOCAL]; - end - - wire is_addr_global = | (lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask); - wire is_addr_local = | (lsu_mem_in_if[i].req_data.mask & is_addr_local_mask); - - wire req_global_ready; - wire req_local_ready; - - VX_elastic_buffer #( - .DATAW (REQ_DATAW), - .SIZE (2), - .OUT_REG (1) - ) req_global_buf ( - .clk (clk), - .reset (block_reset[i]), - .valid_in (lsu_mem_in_if[i].req_valid && is_addr_global), - .data_in ({ - lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask, - lsu_mem_in_if[i].req_data.rw, - lsu_mem_in_if[i].req_data.byteen, - lsu_mem_in_if[i].req_data.addr, - lsu_mem_in_if[i].req_data.atype, - lsu_mem_in_if[i].req_data.data, - lsu_mem_in_if[i].req_data.tag - }), - .ready_in (req_global_ready), - .valid_out (lsu_mem_out_if[i].req_valid), - .data_out ({ - lsu_mem_out_if[i].req_data.mask, - lsu_mem_out_if[i].req_data.rw, - lsu_mem_out_if[i].req_data.byteen, - lsu_mem_out_if[i].req_data.addr, - lsu_mem_out_if[i].req_data.atype, - lsu_mem_out_if[i].req_data.data, - lsu_mem_out_if[i].req_data.tag - }), - .ready_out (lsu_mem_out_if[i].req_ready) - ); - - VX_elastic_buffer #( - .DATAW (REQ_DATAW), - .SIZE (0), - .OUT_REG (0) - ) req_local_buf ( - .clk (clk), - .reset (block_reset[i]), - .valid_in (lsu_mem_in_if[i].req_valid && is_addr_local), - .data_in ({ - lsu_mem_in_if[i].req_data.mask & is_addr_local_mask, - lsu_mem_in_if[i].req_data.rw, - lsu_mem_in_if[i].req_data.byteen, - lsu_mem_in_if[i].req_data.addr, - lsu_mem_in_if[i].req_data.atype, - lsu_mem_in_if[i].req_data.data, - lsu_mem_in_if[i].req_data.tag - }), - .ready_in (req_local_ready), - .valid_out (lsu_switch_if[i].req_valid), - .data_out ({ - lsu_switch_if[i].req_data.mask, - lsu_switch_if[i].req_data.rw, - lsu_switch_if[i].req_data.byteen, - lsu_switch_if[i].req_data.addr, - lsu_switch_if[i].req_data.atype, - lsu_switch_if[i].req_data.data, - lsu_switch_if[i].req_data.tag - }), - .ready_out (lsu_switch_if[i].req_ready) - ); - - assign lsu_mem_in_if[i].req_ready = (req_global_ready && is_addr_global) - || (req_local_ready && is_addr_local); - - VX_stream_arb #( - .NUM_INPUTS (2), - .DATAW (RSP_DATAW), - .ARBITER ("R"), - .OUT_BUF (1) - ) rsp_arb ( - .clk (clk), - .reset (block_reset[i]), - .valid_in ({ - lsu_switch_if[i].rsp_valid, - lsu_mem_out_if[i].rsp_valid - }), - .ready_in ({ - lsu_switch_if[i].rsp_ready, - lsu_mem_out_if[i].rsp_ready - }), - .data_in ({ - lsu_switch_if[i].rsp_data, - lsu_mem_out_if[i].rsp_data - }), - .data_out (lsu_mem_in_if[i].rsp_data), - .valid_out (lsu_mem_in_if[i].rsp_valid), - .ready_out (lsu_mem_in_if[i].rsp_ready), - `UNUSED_PIN (sel_out) - ); - end - - VX_mem_bus_if #( - .DATA_SIZE (LSU_WORD_SIZE), - .TAG_WIDTH (LSU_TAG_WIDTH) - ) lmem_bus_if[LSU_NUM_REQS](); - - for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin - VX_mem_bus_if #( - .DATA_SIZE (LSU_WORD_SIZE), - .TAG_WIDTH (LSU_TAG_WIDTH) - ) lmem_bus_tmp_if[`NUM_LSU_LANES](); - - VX_lsu_adapter #( - .NUM_LANES (`NUM_LSU_LANES), - .DATA_SIZE (LSU_WORD_SIZE), - .TAG_WIDTH (LSU_TAG_WIDTH), - .TAG_SEL_BITS (LSU_TAG_WIDTH - `UUID_WIDTH), - .ARBITER ("P"), - .REQ_OUT_BUF (3), - .RSP_OUT_BUF (0) - ) lsu_adapter ( - .clk (clk), - .reset (block_reset[i]), - .lsu_mem_if (lsu_switch_if[i]), - .mem_bus_if (lmem_bus_tmp_if) - ); - - for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin - `ASSIGN_VX_MEM_BUS_IF (lmem_bus_if[i * `NUM_LSU_LANES + j], lmem_bus_tmp_if[j]); - end - end - - `RESET_RELAY (lmem_reset, reset); - - VX_local_mem #( - .INSTANCE_ID($sformatf("%s-lmem", INSTANCE_ID)), - .SIZE (1 << `LMEM_LOG_SIZE), - .NUM_REQS (LSU_NUM_REQS), - .NUM_BANKS (`LMEM_NUM_BANKS), - .WORD_SIZE (LSU_WORD_SIZE), - .ADDR_WIDTH (LMEM_ADDR_WIDTH), - .UUID_WIDTH (`UUID_WIDTH), - .TAG_WIDTH (LSU_TAG_WIDTH), - .OUT_BUF (3) - ) local_mem ( - .clk (clk), - .reset (lmem_reset), - `ifdef PERF_ENABLE - .cache_perf (cache_perf), - `endif - .mem_bus_if (lmem_bus_if) - ); - -endmodule diff --git a/hw/rtl/core/VX_lsu_slice.sv b/hw/rtl/core/VX_lsu_slice.sv index 8c685fca2..1f39ab5a7 100644 --- a/hw/rtl/core/VX_lsu_slice.sv +++ b/hw/rtl/core/VX_lsu_slice.sv @@ -13,7 +13,7 @@ `include "VX_define.vh" -module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( +module VX_lsu_slice import VX_gpu_pkg::*; #( parameter `STRING INSTANCE_ID = "" ) ( `SCOPE_IO_DECL @@ -59,25 +59,25 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( wire req_is_fence, rsp_is_fence; wire [NUM_LANES-1:0][`XLEN-1:0] full_addr; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_full_addr assign full_addr[i] = execute_if.data.rs1_data[i] + `SEXT(`XLEN, execute_if.data.op_args.lsu.offset); end // address type calculation - wire [NUM_LANES-1:0][`ADDR_TYPE_WIDTH-1:0] mem_req_atype; - for (genvar i = 0; i < NUM_LANES; ++i) begin + wire [NUM_LANES-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] mem_req_flags; + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mem_req_flags wire [MEM_ADDRW-1:0] block_addr = full_addr[i][MEM_ASHIFT +: MEM_ADDRW]; // is I/O address wire [MEM_ADDRW-1:0] io_addr_start = MEM_ADDRW'(`XLEN'(`IO_BASE_ADDR) >> MEM_ASHIFT); wire [MEM_ADDRW-1:0] io_addr_end = MEM_ADDRW'(`XLEN'(`IO_END_ADDR) >> MEM_ASHIFT); - assign mem_req_atype[i][`ADDR_TYPE_FLUSH] = req_is_fence; - assign mem_req_atype[i][`ADDR_TYPE_IO] = (block_addr >= io_addr_start) && (block_addr < io_addr_end); + assign mem_req_flags[i][`MEM_REQ_FLAG_FLUSH] = req_is_fence; + assign mem_req_flags[i][`MEM_REQ_FLAG_IO] = (block_addr >= io_addr_start) && (block_addr < io_addr_end); `ifdef LMEM_ENABLE // is local memory address wire [MEM_ADDRW-1:0] lmem_addr_start = MEM_ADDRW'(`XLEN'(`LMEM_BASE_ADDR) >> MEM_ASHIFT); wire [MEM_ADDRW-1:0] lmem_addr_end = MEM_ADDRW'((`XLEN'(`LMEM_BASE_ADDR) + `XLEN'(1 << `LMEM_LOG_SIZE)) >> MEM_ASHIFT); - assign mem_req_atype[i][`ADDR_TYPE_LOCAL] = (block_addr >= lmem_addr_start) && (block_addr < lmem_addr_end); + assign mem_req_flags[i][`MEM_REQ_FLAG_LOCAL] = (block_addr >= lmem_addr_start) && (block_addr < lmem_addr_end); `endif end @@ -102,8 +102,6 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( wire mem_req_fire = mem_req_valid && mem_req_ready; wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready; - `UNUSED_VAR (mem_req_fire) - `UNUSED_VAR (mem_rsp_fire) wire mem_rsp_sop_pkt, mem_rsp_eop_pkt; wire no_rsp_buf_valid, no_rsp_buf_ready; @@ -151,49 +149,49 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( wire [NUM_LANES-1:0][REQ_ASHIFT-1:0] req_align; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mem_req_addr assign req_align[i] = full_addr[i][REQ_ASHIFT-1:0]; assign mem_req_addr[i] = full_addr[i][`MEM_ADDR_WIDTH-1:REQ_ASHIFT]; end // byte enable formatting - for (genvar i = 0; i < NUM_LANES; ++i) begin - reg [LSU_WORD_SIZE-1:0] mem_req_byteen_r; + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mem_req_byteen_w + reg [LSU_WORD_SIZE-1:0] mem_req_byteen_w; always @(*) begin - mem_req_byteen_r = '0; + mem_req_byteen_w = '0; case (`INST_LSU_WSIZE(execute_if.data.op_type)) 0: begin // 8-bit - mem_req_byteen_r[req_align[i]] = 1'b1; + mem_req_byteen_w[req_align[i]] = 1'b1; end 1: begin // 16 bit - mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:1], 1'b0}] = 1'b1; - mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:1], 1'b1}] = 1'b1; + mem_req_byteen_w[{req_align[i][REQ_ASHIFT-1:1], 1'b0}] = 1'b1; + mem_req_byteen_w[{req_align[i][REQ_ASHIFT-1:1], 1'b1}] = 1'b1; end `ifdef XLEN_64 2: begin // 32 bit - mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b00}] = 1'b1; - mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b01}] = 1'b1; - mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b10}] = 1'b1; - mem_req_byteen_r[{req_align[i][REQ_ASHIFT-1:2], 2'b11}] = 1'b1; + mem_req_byteen_w[{req_align[i][REQ_ASHIFT-1:2], 2'b00}] = 1'b1; + mem_req_byteen_w[{req_align[i][REQ_ASHIFT-1:2], 2'b01}] = 1'b1; + mem_req_byteen_w[{req_align[i][REQ_ASHIFT-1:2], 2'b10}] = 1'b1; + mem_req_byteen_w[{req_align[i][REQ_ASHIFT-1:2], 2'b11}] = 1'b1; end `endif // 3: 64 bit - default : mem_req_byteen_r = {LSU_WORD_SIZE{1'b1}}; + default : mem_req_byteen_w = {LSU_WORD_SIZE{1'b1}}; endcase end - assign mem_req_byteen[i] = mem_req_byteen_r; + assign mem_req_byteen[i] = mem_req_byteen_w; end // memory misalignment not supported! - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_missalign wire lsu_req_fire = execute_if.valid && execute_if.ready; `RUNTIME_ASSERT((~lsu_req_fire || ~execute_if.data.tmask[i] || req_is_fence || (full_addr[i] % (1 << `INST_LSU_WSIZE(execute_if.data.op_type))) == 0), - ("misaligned memory access, wid=%0d, PC=0x%0h, addr=0x%0h, wsize=%0d! (#%0d)", - execute_if.data.wid, {execute_if.data.PC, 1'b0}, full_addr[i], `INST_LSU_WSIZE(execute_if.data.op_type), execute_if.data.uuid)); + ("%t: misaligned memory access, wid=%0d, PC=0x%0h, addr=0x%0h, wsize=%0d! (#%0d)", + $time, execute_if.data.wid, {execute_if.data.PC, 1'b0}, full_addr[i], `INST_LSU_WSIZE(execute_if.data.op_type), execute_if.data.uuid)) end // store data formatting - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mem_req_data always @(*) begin mem_req_data[i] = execute_if.data.rs2_data[i]; case (req_align[i]) @@ -215,7 +213,7 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( wire [LSUQ_SIZEW-1:0] pkt_waddr, pkt_raddr; - if (PID_BITS != 0) begin + if (PID_BITS != 0) begin : g_pids reg [`LSUQ_IN_SIZE-1:0][PID_BITS:0] pkt_ctr; reg [`LSUQ_IN_SIZE-1:0] pkt_sop, pkt_eop; @@ -271,10 +269,10 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( assign mem_rsp_sop_pkt = pkt_sop[pkt_raddr]; assign mem_rsp_eop_pkt = mem_rsp_eop_fire && pkt_eop[pkt_raddr] && (pkt_ctr[pkt_raddr] == 1); - `RUNTIME_ASSERT(~(mem_req_rd_fire && full), ("allocator full!")) - `RUNTIME_ASSERT(~mem_req_rd_sop_fire || 0 == pkt_ctr[pkt_waddr], ("Oops!")) + `RUNTIME_ASSERT(~(mem_req_rd_fire && full), ("%t: allocator full!", $time)) + `RUNTIME_ASSERT(~mem_req_rd_sop_fire || 0 == pkt_ctr[pkt_waddr], ("%t: oops! broken sop request!", $time)) `UNUSED_VAR (mem_rsp_sop) - end else begin + end else begin : g_no_pids assign pkt_waddr = 0; assign mem_rsp_sop_pkt = mem_rsp_sop; assign mem_rsp_eop_pkt = mem_rsp_eop; @@ -300,7 +298,7 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( wire [NUM_LANES-1:0] lsu_mem_req_mask; wire [NUM_LANES-1:0][LSU_WORD_SIZE-1:0] lsu_mem_req_byteen; wire [NUM_LANES-1:0][LSU_ADDR_WIDTH-1:0] lsu_mem_req_addr; - wire [NUM_LANES-1:0][`ADDR_TYPE_WIDTH-1:0] lsu_mem_req_atype; + wire [NUM_LANES-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] lsu_mem_req_flags; wire [NUM_LANES-1:0][(LSU_WORD_SIZE*8)-1:0] lsu_mem_req_data; wire [LSU_TAG_WIDTH-1:0] lsu_mem_req_tag; wire lsu_mem_req_ready; @@ -311,16 +309,14 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( wire [LSU_TAG_WIDTH-1:0] lsu_mem_rsp_tag; wire lsu_mem_rsp_ready; - `RESET_RELAY (mem_scheduler_reset, reset); - VX_mem_scheduler #( - .INSTANCE_ID ($sformatf("%s-scheduler", INSTANCE_ID)), + .INSTANCE_ID ($sformatf("%s-memsched", INSTANCE_ID)), .CORE_REQS (NUM_LANES), .MEM_CHANNELS(NUM_LANES), .WORD_SIZE (LSU_WORD_SIZE), .LINE_SIZE (LSU_WORD_SIZE), .ADDR_WIDTH (LSU_ADDR_WIDTH), - .ATYPE_WIDTH (`ADDR_TYPE_WIDTH), + .FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH), .TAG_WIDTH (TAG_WIDTH), .CORE_QUEUE_SIZE (`LSUQ_IN_SIZE), .MEM_QUEUE_SIZE (`LSUQ_OUT_SIZE), @@ -330,7 +326,7 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( .CORE_OUT_BUF(0) ) mem_scheduler ( .clk (clk), - .reset (mem_scheduler_reset), + .reset (reset), // Input request .core_req_valid (mem_req_valid), @@ -338,12 +334,12 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( .core_req_mask (mem_req_mask), .core_req_byteen(mem_req_byteen), .core_req_addr (mem_req_addr), - .core_req_atype (mem_req_atype), + .core_req_flags (mem_req_flags), .core_req_data (mem_req_data), .core_req_tag (mem_req_tag), .core_req_ready (mem_req_ready), `UNUSED_PIN (core_req_empty), - `UNUSED_PIN (core_req_sent), + `UNUSED_PIN (core_req_wr_notify), // Output response .core_rsp_valid (mem_rsp_valid), @@ -360,7 +356,7 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( .mem_req_mask (lsu_mem_req_mask), .mem_req_byteen (lsu_mem_req_byteen), .mem_req_addr (lsu_mem_req_addr), - .mem_req_atype (lsu_mem_req_atype), + .mem_req_flags (lsu_mem_req_flags), .mem_req_data (lsu_mem_req_data), .mem_req_tag (lsu_mem_req_tag), .mem_req_ready (lsu_mem_req_ready), @@ -378,7 +374,7 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( assign lsu_mem_if.req_data.rw = lsu_mem_req_rw; assign lsu_mem_if.req_data.byteen = lsu_mem_req_byteen; assign lsu_mem_if.req_data.addr = lsu_mem_req_addr; - assign lsu_mem_if.req_data.atype = lsu_mem_req_atype; + assign lsu_mem_if.req_data.flags = lsu_mem_req_flags; assign lsu_mem_if.req_data.data = lsu_mem_req_data; assign lsu_mem_if.req_data.tag = lsu_mem_req_tag; assign lsu_mem_req_ready = lsu_mem_if.req_ready; @@ -426,7 +422,7 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( `endif `endif - for (genvar i = 0; i < NUM_LANES; i++) begin + for (genvar i = 0; i < NUM_LANES; i++) begin : g_rsp_data `ifdef XLEN_64 wire [63:0] rsp_data64 = mem_rsp_data[i]; wire [31:0] rsp_data32 = (rsp_align[i][2] ? mem_rsp_data[i][63:32] : mem_rsp_data[i][31:0]); @@ -483,6 +479,7 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( .valid_out (commit_no_rsp_if.valid), .ready_out (commit_no_rsp_if.ready) ); + assign commit_no_rsp_if.data.rd = '0; assign commit_no_rsp_if.data.wb = 1'b0; assign commit_no_rsp_if.data.data = commit_rsp_if.data.data; // arbiter MUX optimization @@ -507,51 +504,70 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #( `ifdef DBG_TRACE_MEM always @(posedge clk) begin if (execute_if.valid && fence_lock) begin - `TRACE(1, ("%d: *** %s fence wait\n", $time, INSTANCE_ID)); + `TRACE(1, ("%t: *** %s fence wait\n", $time, INSTANCE_ID)) end if (mem_req_fire) begin if (mem_req_rw) begin - `TRACE(1, ("%d: %s Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)); - `TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES); - `TRACE(1, (", atype=")); - `TRACE_ARRAY1D(1, "%b", mem_req_atype, NUM_LANES); - `TRACE(1, (", byteen=0x%0h, data=", mem_req_byteen)); - `TRACE_ARRAY1D(1, "0x%0h", mem_req_data, NUM_LANES); - `TRACE(1, (", tag=0x%0h (#%0d)\n", mem_req_tag, execute_if.data.uuid)); + `TRACE(1, ("%t: %s Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)) + `TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES) + `TRACE(1, (", flags=")) + `TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES) + `TRACE(1, (", byteen=0x%0h, data=", mem_req_byteen)) + `TRACE_ARRAY1D(1, "0x%0h", mem_req_data, NUM_LANES) + `TRACE(1, (", sop=%b, eop=%b, tag=0x%0h (#%0d)\n", execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid)) end else begin - `TRACE(1, ("%d: %s Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)); - `TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES); - `TRACE(1, (", atype=")); - `TRACE_ARRAY1D(1, "%b", mem_req_atype, NUM_LANES); - `TRACE(1, (", byteen=0x%0h, rd=%0d, tag=0x%0h (#%0d)\n", mem_req_byteen, execute_if.data.rd, mem_req_tag, execute_if.data.uuid)); + `TRACE(1, ("%t: %s Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask)) + `TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES) + `TRACE(1, (", flags=")) + `TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES) + `TRACE(1, (", byteen=0x%0h, rd=%0d, sop=%b, eop=%b, tag=0x%0h (#%0d)\n", mem_req_byteen, execute_if.data.rd, execute_if.data.sop, execute_if.data.eop, mem_req_tag, execute_if.data.uuid)) end end if (mem_rsp_fire) begin - `TRACE(1, ("%d: %s Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, sop=%b, eop=%b, data=", - $time, INSTANCE_ID, rsp_wid, {rsp_pc, 1'b0}, mem_rsp_mask, rsp_rd, mem_rsp_sop, mem_rsp_eop)); - `TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data, NUM_LANES); - `TRACE(1, (", tag=0x%0h (#%0d)\n", mem_rsp_tag, rsp_uuid)); + `TRACE(1, ("%t: %s Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, sop=%b, eop=%b, data=", + $time, INSTANCE_ID, rsp_wid, {rsp_pc, 1'b0}, mem_rsp_mask, rsp_rd, mem_rsp_sop, mem_rsp_eop)) + `TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data, NUM_LANES) + `TRACE(1, (", tag=0x%0h (#%0d)\n", mem_rsp_tag, rsp_uuid)) end end `endif +`ifdef SCOPE `ifdef DBG_SCOPE_LSU - VX_scope_tap #( - .SCOPE_ID (3), - .TRIGGERW (3), - .PROBEW (1 + NUM_LANES*(`XLEN + LSU_WORD_SIZE + LSU_WORD_SIZE*8) + `UUID_WIDTH + NUM_LANES*LSU_WORD_SIZE*8 + `UUID_WIDTH) - ) scope_tap ( - .clk (clk), - .reset (scope_reset), - .start (1'b0), - .stop (1'b0), - .triggers({reset, mem_req_fire, mem_rsp_fire}), - .probes ({mem_req_rw, full_addr, mem_req_byteen, mem_req_data, execute_if.data.uuid, rsp_data, rsp_uuid}), - .bus_in (scope_bus_in), - .bus_out(scope_bus_out) + `SCOPE_IO_SWITCH (1); + `NEG_EDGE (reset_negedge, reset); + `SCOPE_TAP_EX (0, 3, 4, 2, ( + 1 + NUM_LANES * (`XLEN + LSU_WORD_SIZE + LSU_WORD_SIZE * 8) + `UUID_WIDTH + NUM_LANES * LSU_WORD_SIZE * 8 + `UUID_WIDTH + ), { + mem_req_valid, + mem_req_ready, + mem_rsp_valid, + mem_rsp_ready + }, { + mem_req_fire, + mem_rsp_fire + }, { + mem_req_rw, + full_addr, + mem_req_byteen, + mem_req_data, + execute_if.data.uuid, + rsp_data, + rsp_uuid + }, + reset_negedge, 1'b0, 4096 ); `else - `SCOPE_IO_UNUSED() + `SCOPE_IO_UNUSED(0) +`endif +`endif +`ifdef CHIPSCOPE + ila_lsu ila_lsu_inst ( + .clk (clk), + .probe0 ({execute_if.valid, execute_if.data, execute_if.ready}), + .probe1 ({lsu_mem_if.req_valid, lsu_mem_if.req_data, lsu_mem_if.req_ready}), + .probe2 ({lsu_mem_if.rsp_valid, lsu_mem_if.rsp_data, lsu_mem_if.rsp_ready}) + ); `endif endmodule diff --git a/hw/rtl/core/VX_lsu_unit.sv b/hw/rtl/core/VX_lsu_unit.sv index d40f5fcfb..6e9e2081c 100644 --- a/hw/rtl/core/VX_lsu_unit.sv +++ b/hw/rtl/core/VX_lsu_unit.sv @@ -31,9 +31,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #( localparam BLOCK_SIZE = `NUM_LSU_BLOCKS; localparam NUM_LANES = `NUM_LSU_LANES; -`ifdef SCOPE `SCOPE_IO_SWITCH (BLOCK_SIZE); -`endif VX_execute_if #( .NUM_LANES (NUM_LANES) @@ -42,7 +40,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #( VX_dispatch_unit #( .BLOCK_SIZE (BLOCK_SIZE), .NUM_LANES (NUM_LANES), - .OUT_BUF (1) + .OUT_BUF (3) ) dispatch_unit ( .clk (clk), .reset (reset), @@ -54,16 +52,13 @@ module VX_lsu_unit import VX_gpu_pkg::*; #( .NUM_LANES (NUM_LANES) ) per_block_commit_if[BLOCK_SIZE](); - for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : lsu_slices - - `RESET_RELAY (slice_reset, reset); - + for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_lsus VX_lsu_slice #( .INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, block_idx)) ) lsu_slice( `SCOPE_IO_BIND (block_idx) .clk (clk), - .reset (slice_reset), + .reset (reset), .execute_if (per_block_execute_if[block_idx]), .commit_if (per_block_commit_if[block_idx]), .lsu_mem_if (lsu_mem_if[block_idx]) diff --git a/hw/rtl/core/VX_mem_unit.sv b/hw/rtl/core/VX_mem_unit.sv new file mode 100644 index 000000000..c02e99b29 --- /dev/null +++ b/hw/rtl/core/VX_mem_unit.sv @@ -0,0 +1,221 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_define.vh" + +module VX_mem_unit import VX_gpu_pkg::*; #( + parameter `STRING INSTANCE_ID = "" +) ( + input wire clk, + input wire reset, + +`ifdef PERF_ENABLE + output cache_perf_t lmem_perf, +`endif + + VX_lsu_mem_if.slave lsu_mem_if [`NUM_LSU_BLOCKS], + VX_mem_bus_if.master dcache_bus_if [DCACHE_NUM_REQS] +); + VX_lsu_mem_if #( + .NUM_LANES (`NUM_LSU_LANES), + .DATA_SIZE (LSU_WORD_SIZE), + .TAG_WIDTH (LSU_TAG_WIDTH) + ) lsu_dcache_if[`NUM_LSU_BLOCKS](); + +`ifdef LMEM_ENABLE + + `STATIC_ASSERT(`IS_DIVISBLE((1 << `LMEM_LOG_SIZE), `MEM_BLOCK_SIZE), ("invalid parameter")) + `STATIC_ASSERT(0 == (`LMEM_BASE_ADDR % (1 << `LMEM_LOG_SIZE)), ("invalid parameter")) + + localparam LMEM_ADDR_WIDTH = `LMEM_LOG_SIZE - `CLOG2(LSU_WORD_SIZE); + + VX_lsu_mem_if #( + .NUM_LANES (`NUM_LSU_LANES), + .DATA_SIZE (LSU_WORD_SIZE), + .TAG_WIDTH (LSU_TAG_WIDTH) + ) lsu_lmem_if[`NUM_LSU_BLOCKS](); + + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_lmem_switches + VX_lmem_switch #( + .REQ0_OUT_BUF (3), + .REQ1_OUT_BUF (0), + .RSP_OUT_BUF (1), + .ARBITER ("P") + ) lmem_switch ( + .clk (clk), + .reset (reset), + .lsu_in_if (lsu_mem_if[i]), + .global_out_if(lsu_dcache_if[i]), + .local_out_if (lsu_lmem_if[i]) + ); + end + + VX_mem_bus_if #( + .DATA_SIZE (LSU_WORD_SIZE), + .TAG_WIDTH (LSU_TAG_WIDTH) + ) lmem_bus_if[LSU_NUM_REQS](); + + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_lmem_adapters + VX_mem_bus_if #( + .DATA_SIZE (LSU_WORD_SIZE), + .TAG_WIDTH (LSU_TAG_WIDTH) + ) lmem_bus_tmp_if[`NUM_LSU_LANES](); + + VX_lsu_adapter #( + .NUM_LANES (`NUM_LSU_LANES), + .DATA_SIZE (LSU_WORD_SIZE), + .TAG_WIDTH (LSU_TAG_WIDTH), + .TAG_SEL_BITS (LSU_TAG_WIDTH - `UUID_WIDTH), + .ARBITER ("P"), + .REQ_OUT_BUF (3), + .RSP_OUT_BUF (0) + ) lmem_adapter ( + .clk (clk), + .reset (reset), + .lsu_mem_if (lsu_lmem_if[i]), + .mem_bus_if (lmem_bus_tmp_if) + ); + + for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin : g_lmem_bus_if + `ASSIGN_VX_MEM_BUS_IF (lmem_bus_if[i * `NUM_LSU_LANES + j], lmem_bus_tmp_if[j]); + end + end + + VX_local_mem #( + .INSTANCE_ID($sformatf("%s-lmem", INSTANCE_ID)), + .SIZE (1 << `LMEM_LOG_SIZE), + .NUM_REQS (LSU_NUM_REQS), + .NUM_BANKS (`LMEM_NUM_BANKS), + .WORD_SIZE (LSU_WORD_SIZE), + .ADDR_WIDTH (LMEM_ADDR_WIDTH), + .UUID_WIDTH (`UUID_WIDTH), + .TAG_WIDTH (LSU_TAG_WIDTH), + .OUT_BUF (3) + ) local_mem ( + .clk (clk), + .reset (reset), + `ifdef PERF_ENABLE + .lmem_perf (lmem_perf), + `endif + .mem_bus_if (lmem_bus_if) + ); + +`else + +`ifdef PERF_ENABLE + assign lmem_perf = '0; +`endif + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_lsu_dcache_if + `ASSIGN_VX_MEM_BUS_IF (lsu_dcache_if[i], lsu_mem_if[i]); + end + +`endif + + VX_lsu_mem_if #( + .NUM_LANES (DCACHE_CHANNELS), + .DATA_SIZE (DCACHE_WORD_SIZE), + .TAG_WIDTH (DCACHE_TAG_WIDTH) + ) dcache_coalesced_if[`NUM_LSU_BLOCKS](); + + if (LSU_WORD_SIZE != DCACHE_WORD_SIZE) begin : g_enabled + + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_coalescers + VX_mem_coalescer #( + .INSTANCE_ID ($sformatf("%s-coalescer%0d", INSTANCE_ID, i)), + .NUM_REQS (`NUM_LSU_LANES), + .DATA_IN_SIZE (LSU_WORD_SIZE), + .DATA_OUT_SIZE (DCACHE_WORD_SIZE), + .ADDR_WIDTH (LSU_ADDR_WIDTH), + .FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH), + .TAG_WIDTH (LSU_TAG_WIDTH), + .UUID_WIDTH (`UUID_WIDTH), + .QUEUE_SIZE (`LSUQ_OUT_SIZE) + ) mem_coalescer ( + .clk (clk), + .reset (reset), + + // Input request + .in_req_valid (lsu_dcache_if[i].req_valid), + .in_req_mask (lsu_dcache_if[i].req_data.mask), + .in_req_rw (lsu_dcache_if[i].req_data.rw), + .in_req_byteen (lsu_dcache_if[i].req_data.byteen), + .in_req_addr (lsu_dcache_if[i].req_data.addr), + .in_req_flags (lsu_dcache_if[i].req_data.flags), + .in_req_data (lsu_dcache_if[i].req_data.data), + .in_req_tag (lsu_dcache_if[i].req_data.tag), + .in_req_ready (lsu_dcache_if[i].req_ready), + + // Input response + .in_rsp_valid (lsu_dcache_if[i].rsp_valid), + .in_rsp_mask (lsu_dcache_if[i].rsp_data.mask), + .in_rsp_data (lsu_dcache_if[i].rsp_data.data), + .in_rsp_tag (lsu_dcache_if[i].rsp_data.tag), + .in_rsp_ready (lsu_dcache_if[i].rsp_ready), + + // Output request + .out_req_valid (dcache_coalesced_if[i].req_valid), + .out_req_mask (dcache_coalesced_if[i].req_data.mask), + .out_req_rw (dcache_coalesced_if[i].req_data.rw), + .out_req_byteen (dcache_coalesced_if[i].req_data.byteen), + .out_req_addr (dcache_coalesced_if[i].req_data.addr), + .out_req_flags (dcache_coalesced_if[i].req_data.flags), + .out_req_data (dcache_coalesced_if[i].req_data.data), + .out_req_tag (dcache_coalesced_if[i].req_data.tag), + .out_req_ready (dcache_coalesced_if[i].req_ready), + + // Output response + .out_rsp_valid (dcache_coalesced_if[i].rsp_valid), + .out_rsp_mask (dcache_coalesced_if[i].rsp_data.mask), + .out_rsp_data (dcache_coalesced_if[i].rsp_data.data), + .out_rsp_tag (dcache_coalesced_if[i].rsp_data.tag), + .out_rsp_ready (dcache_coalesced_if[i].rsp_ready) + ); + end + + end else begin : g_passthru + + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_dcache_coalesced_if + `ASSIGN_VX_MEM_BUS_IF (dcache_coalesced_if[i], lsu_dcache_if[i]); + end + + end + + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_dcache_adapters + + VX_mem_bus_if #( + .DATA_SIZE (DCACHE_WORD_SIZE), + .TAG_WIDTH (DCACHE_TAG_WIDTH) + ) dcache_bus_tmp_if[DCACHE_CHANNELS](); + + VX_lsu_adapter #( + .NUM_LANES (DCACHE_CHANNELS), + .DATA_SIZE (DCACHE_WORD_SIZE), + .TAG_WIDTH (DCACHE_TAG_WIDTH), + .TAG_SEL_BITS (DCACHE_TAG_WIDTH - `UUID_WIDTH), + .ARBITER ("P"), + .REQ_OUT_BUF (0), + .RSP_OUT_BUF (0) + ) dcache_adapter ( + .clk (clk), + .reset (reset), + .lsu_mem_if (dcache_coalesced_if[i]), + .mem_bus_if (dcache_bus_tmp_if) + ); + + for (genvar j = 0; j < DCACHE_CHANNELS; ++j) begin : g_dcache_bus_if + `ASSIGN_VX_MEM_BUS_IF (dcache_bus_if[i * DCACHE_CHANNELS + j], dcache_bus_tmp_if[j]); + end + + end + +endmodule diff --git a/hw/rtl/core/VX_mem_unit_top.sv b/hw/rtl/core/VX_mem_unit_top.sv new file mode 100644 index 000000000..17786a09b --- /dev/null +++ b/hw/rtl/core/VX_mem_unit_top.sv @@ -0,0 +1,127 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_define.vh" + +module VX_mem_unit_top import VX_gpu_pkg::*; #( + parameter `STRING INSTANCE_ID = "", + parameter LSU_WORD_WIDTH = LSU_WORD_SIZE * 8 +) ( + // Clock + input wire clk, + input wire reset, + + // LSU memory request + input wire [`NUM_LSU_BLOCKS-1:0] lsu_req_valid, + input wire [`NUM_LSU_BLOCKS-1:0] lsu_req_rw, + input wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0] lsu_req_mask, + input wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0][LSU_WORD_SIZE-1:0] lsu_req_byteen, + input wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0][LSU_ADDR_WIDTH-1:0] lsu_req_addr, + input wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] lsu_req_flags, + input wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0][LSU_WORD_WIDTH-1:0] lsu_req_data, + input wire [`NUM_LSU_BLOCKS-1:0][LSU_TAG_WIDTH-1:0] lsu_req_tag, + output wire [`NUM_LSU_BLOCKS-1:0] lsu_req_ready, + + // LSU memory response + output wire [`NUM_LSU_BLOCKS-1:0] lsu_rsp_valid, + output wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0] lsu_rsp_mask, + output wire [`NUM_LSU_BLOCKS-1:0][`NUM_LSU_LANES-1:0][LSU_WORD_WIDTH-1:0] lsu_rsp_data, + output wire [`NUM_LSU_BLOCKS-1:0][LSU_TAG_WIDTH-1:0] lsu_rsp_tag, + input wire [`NUM_LSU_BLOCKS-1:0] lsu_rsp_ready, + + // Memory request + output wire [DCACHE_NUM_REQS-1:0] mem_req_valid, + output wire [DCACHE_NUM_REQS-1:0] mem_req_rw, + output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE-1:0] mem_req_byteen, + output wire [DCACHE_NUM_REQS-1:0][DCACHE_ADDR_WIDTH-1:0] mem_req_addr, + output wire [DCACHE_NUM_REQS-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] mem_req_flags, + output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] mem_req_data, + output wire [DCACHE_NUM_REQS-1:0][DCACHE_TAG_WIDTH-1:0] mem_req_tag, + input wire [DCACHE_NUM_REQS-1:0] mem_req_ready, + + // Memory response + input wire [DCACHE_NUM_REQS-1:0] mem_rsp_valid, + input wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] mem_rsp_data, + input wire [DCACHE_NUM_REQS-1:0][DCACHE_TAG_WIDTH-1:0] mem_rsp_tag, + output wire [DCACHE_NUM_REQS-1:0] mem_rsp_ready +); + VX_lsu_mem_if #( + .NUM_LANES (`NUM_LSU_LANES), + .DATA_SIZE (LSU_WORD_SIZE), + .TAG_WIDTH (LSU_TAG_WIDTH) + ) lsu_mem_if[`NUM_LSU_BLOCKS](); + + // LSU memory request + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_lsu_mem_req + assign lsu_mem_if[i].req_valid = lsu_req_valid[i]; + assign lsu_mem_if[i].req_data.rw = lsu_req_rw[i]; + assign lsu_mem_if[i].req_data.mask = lsu_req_mask[i]; + assign lsu_mem_if[i].req_data.byteen = lsu_req_byteen[i]; + assign lsu_mem_if[i].req_data.addr = lsu_req_addr[i]; + assign lsu_mem_if[i].req_data.flags = lsu_req_flags[i]; + assign lsu_mem_if[i].req_data.data = lsu_req_data[i]; + assign lsu_mem_if[i].req_data.tag = lsu_req_tag[i]; + assign lsu_req_ready[i] = lsu_mem_if[i].req_ready; + end + + // LSU memory response + for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin : g_lsu_rsp + assign lsu_rsp_valid[i] = lsu_mem_if[i].rsp_valid; + assign lsu_rsp_mask[i] = lsu_mem_if[i].rsp_data.mask; + assign lsu_rsp_data[i] = lsu_mem_if[i].rsp_data.data; + assign lsu_rsp_tag[i] = lsu_mem_if[i].rsp_data.tag; + assign lsu_mem_if[i].rsp_ready = lsu_rsp_ready[i]; + end + + VX_mem_bus_if #( + .DATA_SIZE (DCACHE_WORD_SIZE), + .TAG_WIDTH (DCACHE_TAG_WIDTH) + ) mem_bus_if[DCACHE_NUM_REQS](); + + // memory request + for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin : g_mem_req + assign mem_req_valid[i] = mem_bus_if[i].req_valid; + assign mem_req_rw[i] = mem_bus_if[i].req_data.rw; + assign mem_req_byteen[i] = mem_bus_if[i].req_data.byteen; + assign mem_req_addr[i] = mem_bus_if[i].req_data.addr; + assign mem_req_flags[i] = mem_bus_if[i].req_data.flags; + assign mem_req_data[i] = mem_bus_if[i].req_data.data; + assign mem_req_tag[i] = mem_bus_if[i].req_data.tag; + assign mem_bus_if[i].req_ready = mem_req_ready[i]; + end + + // memory response + for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin : g_mem_bus_rsp + assign mem_bus_if[i].rsp_valid = mem_rsp_valid[i]; + assign mem_bus_if[i].rsp_data.tag = mem_rsp_tag[i]; + assign mem_bus_if[i].rsp_data.data = mem_rsp_data[i]; + assign mem_rsp_ready[i] = mem_bus_if[i].rsp_ready; + end + +`ifdef PERF_ENABLE + cache_perf_t lmem_perf = '0; +`endif + + VX_mem_unit #( + .INSTANCE_ID (INSTANCE_ID) + ) mem_unit ( + .clk (clk), + .reset (reset), + `ifdef PERF_ENABLE + .lmem_perf (lmem_perf), + `endif + .lsu_mem_if (lsu_mem_if), + .dcache_bus_if (mem_bus_if) + ); + +endmodule diff --git a/hw/rtl/core/VX_operands.sv b/hw/rtl/core/VX_operands.sv index e3df0c1fa..42a91e4c2 100644 --- a/hw/rtl/core/VX_operands.sv +++ b/hw/rtl/core/VX_operands.sv @@ -23,7 +23,7 @@ module VX_operands import VX_gpu_pkg::*; #( parameter `STRING INSTANCE_ID = "", parameter NUM_BANKS = 4, - parameter OUT_BUF = 4 // using 2-cycle EB for area reduction + parameter OUT_BUF = 3 ) ( input wire clk, input wire reset, @@ -37,15 +37,15 @@ module VX_operands import VX_gpu_pkg::*; #( VX_operands_if.master operands_if ); `UNUSED_SPARAM (INSTANCE_ID) - localparam NUM_SRC_REGS = 3; - localparam REQ_SEL_BITS = `CLOG2(NUM_SRC_REGS); + localparam NUM_SRC_OPDS = 3; + localparam REQ_SEL_BITS = `CLOG2(NUM_SRC_OPDS); localparam REQ_SEL_WIDTH = `UP(REQ_SEL_BITS); localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS); localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS); localparam PER_BANK_REGS = `NUM_REGS / NUM_BANKS; localparam META_DATAW = ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + `NR_BITS + `UUID_WIDTH; localparam REGS_DATAW = `XLEN * `NUM_THREADS; - localparam DATAW = META_DATAW + NUM_SRC_REGS * REGS_DATAW; + localparam DATAW = META_DATAW + NUM_SRC_OPDS * REGS_DATAW; localparam RAM_ADDRW = `LOG2UP(`NUM_REGS * PER_ISSUE_WARPS); localparam PER_BANK_ADDRW = RAM_ADDRW - BANK_SEL_BITS; localparam XLEN_SIZE = `XLEN / 8; @@ -53,55 +53,56 @@ module VX_operands import VX_gpu_pkg::*; #( `UNUSED_VAR (writeback_if.data.sop) - wire [NUM_SRC_REGS-1:0] src_valid; - wire [NUM_SRC_REGS-1:0] req_in_valid, req_in_ready; - wire [NUM_SRC_REGS-1:0][PER_BANK_ADDRW-1:0] req_in_data; - wire [NUM_SRC_REGS-1:0][BANK_SEL_WIDTH-1:0] req_bank_idx; + wire [NUM_SRC_OPDS-1:0] src_valid; + wire [NUM_SRC_OPDS-1:0] req_valid_in, req_ready_in; + wire [NUM_SRC_OPDS-1:0][PER_BANK_ADDRW-1:0] req_data_in; + wire [NUM_SRC_OPDS-1:0][BANK_SEL_WIDTH-1:0] req_bank_idx; wire [NUM_BANKS-1:0] gpr_rd_valid, gpr_rd_ready; wire [NUM_BANKS-1:0] gpr_rd_valid_st1, gpr_rd_valid_st2; wire [NUM_BANKS-1:0][PER_BANK_ADDRW-1:0] gpr_rd_addr, gpr_rd_addr_st1; - wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data_st1, gpr_rd_data_st2; + wire [NUM_BANKS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data_st2; wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] gpr_rd_req_idx, gpr_rd_req_idx_st1, gpr_rd_req_idx_st2; + wire pipe_ready_in; wire pipe_valid_st1, pipe_ready_st1; wire pipe_valid_st2, pipe_ready_st2; wire [META_DATAW-1:0] pipe_data, pipe_data_st1, pipe_data_st2; - reg [NUM_SRC_REGS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_n; - wire [NUM_SRC_REGS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data_st1, src_data_st2; + reg [NUM_SRC_OPDS-1:0][(`NUM_THREADS * `XLEN)-1:0] src_data_st2, src_data_m_st2; - reg [NUM_SRC_REGS-1:0] data_fetched_n; - wire [NUM_SRC_REGS-1:0] data_fetched_st1; + reg [NUM_SRC_OPDS-1:0] data_fetched_st1; reg has_collision_n; wire has_collision_st1; - wire [NUM_SRC_REGS-1:0][`NR_BITS-1:0] src_regs = {scoreboard_if.data.rs3, - scoreboard_if.data.rs2, - scoreboard_if.data.rs1}; + wire [NUM_SRC_OPDS-1:0][`NR_BITS-1:0] src_opds; + assign src_opds = {scoreboard_if.data.rs3, scoreboard_if.data.rs2, scoreboard_if.data.rs1}; - for (genvar i = 0; i < NUM_SRC_REGS; ++i) begin - if (ISSUE_WIS != 0) begin - assign req_in_data[i] = {src_regs[i][`NR_BITS-1:BANK_SEL_BITS], scoreboard_if.data.wis}; - end else begin - assign req_in_data[i] = src_regs[i][`NR_BITS-1:BANK_SEL_BITS]; + for (genvar i = 0; i < NUM_SRC_OPDS; ++i) begin : g_req_data_in + if (ISSUE_WIS != 0) begin : g_wis + assign req_data_in[i] = {src_opds[i][`NR_BITS-1:BANK_SEL_BITS], scoreboard_if.data.wis}; + end else begin : g_no_wis + assign req_data_in[i] = src_opds[i][`NR_BITS-1:BANK_SEL_BITS]; end - if (NUM_BANKS != 1) begin - assign req_bank_idx[i] = src_regs[i][BANK_SEL_BITS-1:0]; - end else begin + end + + for (genvar i = 0; i < NUM_SRC_OPDS; ++i) begin : g_req_bank_idx + if (NUM_BANKS != 1) begin : g_multibanks + assign req_bank_idx[i] = src_opds[i][BANK_SEL_BITS-1:0]; + end else begin : g_singlebank assign req_bank_idx[i] = '0; end end - for (genvar i = 0; i < NUM_SRC_REGS; ++i) begin - assign src_valid[i] = (src_regs[i] != 0) && ~data_fetched_st1[i]; + for (genvar i = 0; i < NUM_SRC_OPDS; ++i) begin : g_src_valid + assign src_valid[i] = (src_opds[i] != 0) && ~data_fetched_st1[i]; end - assign req_in_valid = {NUM_SRC_REGS{scoreboard_if.valid}} & src_valid; + assign req_valid_in = {NUM_SRC_OPDS{scoreboard_if.valid}} & src_valid; VX_stream_xbar #( - .NUM_INPUTS (NUM_SRC_REGS), + .NUM_INPUTS (NUM_SRC_OPDS), .NUM_OUTPUTS (NUM_BANKS), .DATAW (PER_BANK_ADDRW), .ARBITER ("P"), // use priority arbiter @@ -111,29 +112,22 @@ module VX_operands import VX_gpu_pkg::*; #( .clk (clk), .reset (reset), `UNUSED_PIN(collisions), - .valid_in (req_in_valid), - .data_in (req_in_data), + .valid_in (req_valid_in), + .data_in (req_data_in), .sel_in (req_bank_idx), - .ready_in (req_in_ready), + .ready_in (req_ready_in), .valid_out (gpr_rd_valid), .data_out (gpr_rd_addr), .sel_out (gpr_rd_req_idx), .ready_out (gpr_rd_ready) ); - wire pipe_in_ready = pipe_ready_st1 || ~pipe_valid_st1; - - assign gpr_rd_ready = {NUM_BANKS{pipe_in_ready}}; - - assign scoreboard_if.ready = pipe_in_ready && ~has_collision_n; - - wire pipe_fire_st1 = pipe_valid_st1 && pipe_ready_st1; - wire pipe_fire_st2 = pipe_valid_st2 && pipe_ready_st2; + assign gpr_rd_ready = {NUM_BANKS{pipe_ready_in}}; always @(*) begin has_collision_n = 0; - for (integer i = 0; i < NUM_SRC_REGS; ++i) begin - for (integer j = 1; j < (NUM_SRC_REGS-i); ++j) begin + for (integer i = 0; i < NUM_SRC_OPDS; ++i) begin + for (integer j = 1; j < (NUM_SRC_OPDS-i); ++j) begin has_collision_n |= src_valid[i] && src_valid[j+i] && (req_bank_idx[i] == req_bank_idx[j+i]); @@ -141,14 +135,7 @@ module VX_operands import VX_gpu_pkg::*; #( end end - always @(*) begin - data_fetched_n = data_fetched_st1; - if (scoreboard_if.ready) begin - data_fetched_n = '0; - end else begin - data_fetched_n = data_fetched_st1 | req_in_ready; - end - end + wire [NUM_SRC_OPDS-1:0] req_fire_in = req_valid_in & req_ready_in; assign pipe_data = { scoreboard_if.data.wis, @@ -162,61 +149,74 @@ module VX_operands import VX_gpu_pkg::*; #( scoreboard_if.data.uuid }; - VX_pipe_register #( - .DATAW (1 + NUM_SRC_REGS + NUM_BANKS + META_DATAW + 1 + NUM_BANKS * (PER_BANK_ADDRW + REQ_SEL_WIDTH)), - .RESETW (1 + NUM_SRC_REGS) + assign scoreboard_if.ready = pipe_ready_in && ~has_collision_n; + + wire pipe_fire_st1 = pipe_valid_st1 && pipe_ready_st1; + wire pipe_fire_st2 = pipe_valid_st2 && pipe_ready_st2; + + VX_pipe_buffer #( + .DATAW (NUM_BANKS + META_DATAW + 1 + NUM_BANKS * (PER_BANK_ADDRW + REQ_SEL_WIDTH)) ) pipe_reg1 ( .clk (clk), .reset (reset), - .enable (pipe_in_ready), - .data_in ({scoreboard_if.valid, data_fetched_n, gpr_rd_valid, pipe_data, has_collision_n, gpr_rd_addr, gpr_rd_req_idx}), - .data_out ({pipe_valid_st1, data_fetched_st1, gpr_rd_valid_st1, pipe_data_st1, has_collision_st1, gpr_rd_addr_st1, gpr_rd_req_idx_st1}) + .valid_in (scoreboard_if.valid), + .ready_in (pipe_ready_in), + .data_in ({gpr_rd_valid, pipe_data, has_collision_n, gpr_rd_addr, gpr_rd_req_idx}), + .data_out ({gpr_rd_valid_st1, pipe_data_st1, has_collision_st1, gpr_rd_addr_st1, gpr_rd_req_idx_st1}), + .valid_out(pipe_valid_st1), + .ready_out(pipe_ready_st1) ); - assign pipe_ready_st1 = pipe_ready_st2 || ~pipe_valid_st2; - - assign src_data_st1 = pipe_fire_st2 ? '0 : src_data_n; + always @(posedge clk) begin + if (reset || scoreboard_if.ready) begin + data_fetched_st1 <= 0; + end else begin + data_fetched_st1 <= data_fetched_st1 | req_fire_in; + end + end wire pipe_valid2_st1 = pipe_valid_st1 && ~has_collision_st1; - `RESET_RELAY (pipe2_reset, reset); // needed for pipe_reg2's wide RESETW - - VX_pipe_register #( - .DATAW (1 + NUM_SRC_REGS * REGS_DATAW + NUM_BANKS + NUM_BANKS * REGS_DATAW + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH), - .RESETW (1 + NUM_SRC_REGS * REGS_DATAW) + VX_pipe_buffer #( + .DATAW (NUM_BANKS + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH) ) pipe_reg2 ( .clk (clk), - .reset (pipe2_reset), - .enable (pipe_ready_st1), - .data_in ({pipe_valid2_st1, src_data_st1, gpr_rd_valid_st1, gpr_rd_data_st1, pipe_data_st1, gpr_rd_req_idx_st1}), - .data_out ({pipe_valid_st2, src_data_st2, gpr_rd_valid_st2, gpr_rd_data_st2, pipe_data_st2, gpr_rd_req_idx_st2}) + .reset (reset), + .valid_in (pipe_valid2_st1), + .ready_in (pipe_ready_st1), + .data_in ({gpr_rd_valid_st1, pipe_data_st1, gpr_rd_req_idx_st1}), + .data_out ({gpr_rd_valid_st2, pipe_data_st2, gpr_rd_req_idx_st2}), + .valid_out(pipe_valid_st2), + .ready_out(pipe_ready_st2) ); always @(*) begin - src_data_n = src_data_st2; + src_data_m_st2 = src_data_st2; for (integer b = 0; b < NUM_BANKS; ++b) begin if (gpr_rd_valid_st2[b]) begin - src_data_n[gpr_rd_req_idx_st2[b]] = gpr_rd_data_st2[b]; + src_data_m_st2[gpr_rd_req_idx_st2[b]] = gpr_rd_data_st2[b]; end end end + always @(posedge clk) begin + if (reset || pipe_fire_st2) begin + src_data_st2 <= 0; + end else begin + src_data_st2 <= src_data_m_st2; + end + end + VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), - .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)), - .LUTRAM (1) + .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)) ) out_buf ( .clk (clk), .reset (reset), .valid_in (pipe_valid_st2), .ready_in (pipe_ready_st2), - .data_in ({ - pipe_data_st2, - src_data_n[0], - src_data_n[1], - src_data_n[2] - }), + .data_in ({pipe_data_st2, src_data_m_st2}), .data_out ({ operands_if.data.wis, operands_if.data.tmask, @@ -227,57 +227,47 @@ module VX_operands import VX_gpu_pkg::*; #( operands_if.data.op_args, operands_if.data.rd, operands_if.data.uuid, - operands_if.data.rs1_data, + operands_if.data.rs3_data, operands_if.data.rs2_data, - operands_if.data.rs3_data + operands_if.data.rs1_data }), .valid_out (operands_if.valid), .ready_out (operands_if.ready) ); wire [PER_BANK_ADDRW-1:0] gpr_wr_addr; - if (ISSUE_WIS != 0) begin + if (ISSUE_WIS != 0) begin : g_gpr_wr_addr assign gpr_wr_addr = {writeback_if.data.rd[`NR_BITS-1:BANK_SEL_BITS], writeback_if.data.wis}; - end else begin + end else begin : g_gpr_wr_addr_no_wis assign gpr_wr_addr = writeback_if.data.rd[`NR_BITS-1:BANK_SEL_BITS]; end wire [BANK_SEL_WIDTH-1:0] gpr_wr_bank_idx; - if (NUM_BANKS != 1) begin + if (NUM_BANKS != 1) begin : g_gpr_wr_bank_idx assign gpr_wr_bank_idx = writeback_if.data.rd[BANK_SEL_BITS-1:0]; - end else begin + end else begin : g_gpr_wr_bank_idx_0 assign gpr_wr_bank_idx = '0; end - `ifdef GPR_RESET - reg wr_enabled = 0; - always @(posedge clk) begin - if (reset) begin - wr_enabled <= 1; - end - end - `else - wire wr_enabled = 1; - `endif - - for (genvar b = 0; b < NUM_BANKS; ++b) begin + for (genvar b = 0; b < NUM_BANKS; ++b) begin : g_gpr_rams wire gpr_wr_enabled; - if (BANK_SEL_BITS != 0) begin - assign gpr_wr_enabled = wr_enabled - && writeback_if.valid + if (BANK_SEL_BITS != 0) begin : g_gpr_wr_enabled_multibanks + assign gpr_wr_enabled = writeback_if.valid && (gpr_wr_bank_idx == BANK_SEL_BITS'(b)); - end else begin - assign gpr_wr_enabled = wr_enabled && writeback_if.valid; + end else begin : g_gpr_wr_enabled + assign gpr_wr_enabled = writeback_if.valid; end wire [BYTEENW-1:0] wren; - for (genvar i = 0; i < `NUM_THREADS; ++i) begin + for (genvar i = 0; i < `NUM_THREADS; ++i) begin : g_wren assign wren[i*XLEN_SIZE+:XLEN_SIZE] = {XLEN_SIZE{writeback_if.data.tmask[i]}}; end VX_dp_ram #( .DATAW (REGS_DATAW), .SIZE (PER_BANK_REGS * PER_ISSUE_WARPS), + .OUT_REG (1), + .READ_ENABLE (1), .WRENW (BYTEENW), `ifdef GPR_RESET .RESET_RAM (1), @@ -292,7 +282,7 @@ module VX_operands import VX_gpu_pkg::*; #( .waddr (gpr_wr_addr), .wdata (writeback_if.data.data), .raddr (gpr_rd_addr_st1[b]), - .rdata (gpr_rd_data_st1[b]) + .rdata (gpr_rd_data_st2[b]) ); end @@ -302,7 +292,7 @@ module VX_operands import VX_gpu_pkg::*; #( if (reset) begin collisions_r <= '0; end else begin - collisions_r <= collisions_r + `PERF_CTR_BITS'(scoreboard_if.valid && pipe_in_ready && has_collision_n); + collisions_r <= collisions_r + `PERF_CTR_BITS'(scoreboard_if.valid && pipe_ready_in && has_collision_n); end end assign perf_stalls = collisions_r; diff --git a/hw/rtl/core/VX_pe_switch.sv b/hw/rtl/core/VX_pe_switch.sv new file mode 100644 index 000000000..163d76c64 --- /dev/null +++ b/hw/rtl/core/VX_pe_switch.sv @@ -0,0 +1,92 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_define.vh" + +module VX_pe_switch import VX_gpu_pkg::*; #( + parameter PE_COUNT = 0, + parameter NUM_LANES = 0, + parameter REQ_OUT_BUF = 0, + parameter RSP_OUT_BUF = 0, + parameter `STRING ARBITER = "R", + parameter PE_SEL_BITS = `CLOG2(PE_COUNT) +) ( + input wire clk, + input wire reset, + input wire [`UP(PE_SEL_BITS)-1:0] pe_sel, + VX_execute_if.slave execute_in_if, + VX_commit_if.master commit_out_if, + VX_execute_if.master execute_out_if[PE_COUNT], + VX_commit_if .slave commit_in_if[PE_COUNT] +); + localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES); + localparam PID_WIDTH = `UP(PID_BITS); + localparam REQ_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `INST_ALU_BITS + $bits(op_args_t) + 1 + `NR_BITS + `NT_WIDTH + (3 * NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1; + localparam RSP_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1; + + wire [PE_COUNT-1:0] pe_req_valid; + wire [PE_COUNT-1:0][REQ_DATAW-1:0] pe_req_data; + wire [PE_COUNT-1:0] pe_req_ready; + + VX_stream_switch #( + .DATAW (REQ_DATAW), + .NUM_OUTPUTS (PE_COUNT), + .OUT_BUF (REQ_OUT_BUF) + ) req_switch ( + .clk (clk), + .reset (reset), + .sel_in (pe_sel), + .valid_in (execute_in_if.valid), + .ready_in (execute_in_if.ready), + .data_in (execute_in_if.data), + .data_out (pe_req_data), + .valid_out (pe_req_valid), + .ready_out (pe_req_ready) + ); + + for (genvar i = 0; i < PE_COUNT; ++i) begin : g_execute_out_if + assign execute_out_if[i].valid = pe_req_valid[i]; + assign execute_out_if[i].data = pe_req_data[i]; + assign pe_req_ready[i] = execute_out_if[i].ready; + end + + /////////////////////////////////////////////////////////////////////////// + + wire [PE_COUNT-1:0] pe_rsp_valid; + wire [PE_COUNT-1:0][RSP_DATAW-1:0] pe_rsp_data; + wire [PE_COUNT-1:0] pe_rsp_ready; + + for (genvar i = 0; i < PE_COUNT; ++i) begin : g_commit_in_if + assign pe_rsp_valid[i] = commit_in_if[i].valid; + assign pe_rsp_data[i] = commit_in_if[i].data; + assign commit_in_if[i].ready = pe_rsp_ready[i]; + end + + VX_stream_arb #( + .NUM_INPUTS (PE_COUNT), + .DATAW (RSP_DATAW), + .ARBITER (ARBITER), + .OUT_BUF (RSP_OUT_BUF) + ) rsp_arb ( + .clk (clk), + .reset (reset), + .valid_in (pe_rsp_valid), + .ready_in (pe_rsp_ready), + .data_in (pe_rsp_data), + .data_out (commit_out_if.data), + .valid_out (commit_out_if.valid), + .ready_out (commit_out_if.ready), + `UNUSED_PIN (sel_out) + ); + +endmodule diff --git a/hw/rtl/core/VX_schedule.sv b/hw/rtl/core/VX_schedule.sv index 71a74c6ac..9b49ae268 100644 --- a/hw/rtl/core/VX_schedule.sv +++ b/hw/rtl/core/VX_schedule.sv @@ -78,7 +78,7 @@ module VX_schedule import VX_gpu_pkg::*; #( wire [`NUM_ALU_BLOCKS-1:0][`NW_WIDTH-1:0] branch_wid; wire [`NUM_ALU_BLOCKS-1:0] branch_taken; wire [`NUM_ALU_BLOCKS-1:0][`PC_BITS-1:0] branch_dest; - for (genvar i = 0; i < `NUM_ALU_BLOCKS; ++i) begin + for (genvar i = 0; i < `NUM_ALU_BLOCKS; ++i) begin : g_branch_init assign branch_valid[i] = branch_ctl_if[i].valid; assign branch_wid[i] = branch_ctl_if[i].wid; assign branch_taken[i] = branch_ctl_if[i].taken; @@ -189,7 +189,7 @@ module VX_schedule import VX_gpu_pkg::*; #( end // decode unlock - if (decode_sched_if.valid && ~decode_sched_if.is_wstall) begin + if (decode_sched_if.valid && decode_sched_if.unlock) begin stalled_warps_n[decode_sched_if.wid] = 0; end @@ -289,13 +289,11 @@ module VX_schedule import VX_gpu_pkg::*; #( // split/join handling - `RESET_RELAY (split_join_reset, reset); - VX_split_join #( .INSTANCE_ID ($sformatf("%s-splitjoin", INSTANCE_ID)) ) split_join ( .clk (clk), - .reset (split_join_reset), + .reset (reset), .valid (warp_ctl_if.valid), .wid (warp_ctl_if.wid), .split (warp_ctl_if.split), @@ -324,7 +322,7 @@ module VX_schedule import VX_gpu_pkg::*; #( ); wire [`NUM_WARPS-1:0][(`NUM_THREADS + `PC_BITS)-1:0] schedule_data; - for (genvar i = 0; i < `NUM_WARPS; ++i) begin + for (genvar i = 0; i < `NUM_WARPS; ++i) begin : g_schedule_data assign schedule_data[i] = {thread_masks[i], warp_pcs[i]}; end @@ -333,67 +331,50 @@ module VX_schedule import VX_gpu_pkg::*; #( schedule_data[schedule_wid][(`NUM_THREADS + `PC_BITS)-5:0] }; -`ifndef NDEBUG - localparam GNW_WIDTH = `LOG2UP(`NUM_CLUSTERS * `NUM_CORES * `NUM_WARPS); - reg [`UUID_WIDTH-1:0] instr_uuid; - wire [GNW_WIDTH-1:0] g_wid = (GNW_WIDTH'(CORE_ID) << `NW_BITS) + GNW_WIDTH'(schedule_wid); -`ifdef SV_DPI - always @(posedge clk) begin - if (reset) begin - instr_uuid <= `UUID_WIDTH'(dpi_uuid_gen(1, 32'd0)); - end else if (schedule_fire) begin - instr_uuid <= `UUID_WIDTH'(dpi_uuid_gen(0, 32'(g_wid))); - end - end -`else - wire [GNW_WIDTH+16-1:0] w_uuid = {g_wid, 16'(schedule_pc)}; - always @(*) begin - instr_uuid = `UUID_WIDTH'(w_uuid); - end -`endif + wire [`UUID_WIDTH-1:0] instr_uuid; +`ifdef UUID_ENABLE + VX_uuid_gen #( + .CORE_ID (CORE_ID), + .UUID_WIDTH (`UUID_WIDTH) + ) uuid_gen ( + .clk (clk), + .reset (reset), + .incr (schedule_fire), + .wid (schedule_wid), + .uuid (instr_uuid) + ); `else - wire [`UUID_WIDTH-1:0] instr_uuid = '0; + assign instr_uuid = '0; `endif VX_elastic_buffer #( - .DATAW (`NUM_THREADS + `PC_BITS + `NW_WIDTH) + .DATAW (`NUM_THREADS + `PC_BITS + `NW_WIDTH + `UUID_WIDTH), + .SIZE (2), // need to buffer out ready_in + .OUT_REG (1) // should be registered for BRAM acces in fetch unit ) out_buf ( .clk (clk), .reset (reset), .valid_in (schedule_valid), .ready_in (schedule_ready), - .data_in ({schedule_tmask, schedule_pc, schedule_wid}), - .data_out ({schedule_if.data.tmask, schedule_if.data.PC, schedule_if.data.wid}), + .data_in ({schedule_tmask, schedule_pc, schedule_wid, instr_uuid}), + .data_out ({schedule_if.data.tmask, schedule_if.data.PC, schedule_if.data.wid, schedule_if.data.uuid}), .valid_out (schedule_if.valid), .ready_out (schedule_if.ready) ); - assign schedule_if.data.uuid = instr_uuid; - // Track pending instructions per warp - reg [`NUM_WARPS-1:0] per_warp_incr; - always @(*) begin - per_warp_incr = 0; - if (schedule_if_fire) begin - per_warp_incr[schedule_if.data.wid] = 1; - end - end - wire [`NUM_WARPS-1:0] pending_warp_empty; wire [`NUM_WARPS-1:0] pending_warp_alm_empty; - `RESET_RELAY_EX (pending_instr_reset, reset, `NUM_WARPS, `MAX_FANOUT); - - for (genvar i = 0; i < `NUM_WARPS; ++i) begin - + for (genvar i = 0; i < `NUM_WARPS; ++i) begin : g_pending_sizes VX_pending_size #( .SIZE (4096), .ALM_EMPTY (1) ) counter ( .clk (clk), - .reset (pending_instr_reset[i]), - .incr (per_warp_incr[i]), + .reset (reset), + .incr (schedule_if_fire && (schedule_if.data.wid == `NW_WIDTH'(i))), .decr (commit_sched_if.committed_warps[i]), .empty (pending_warp_empty[i]), .alm_empty (pending_warp_alm_empty[i]), @@ -422,7 +403,7 @@ module VX_schedule import VX_gpu_pkg::*; #( timeout_ctr <= '0; timeout_enable <= 0; end else begin - if (decode_sched_if.valid && ~decode_sched_if.is_wstall) begin + if (decode_sched_if.valid && decode_sched_if.unlock) begin timeout_enable <= 1; end if (timeout_enable && active_warps !=0 && active_warps == stalled_warps) begin diff --git a/hw/rtl/core/VX_scoreboard.sv b/hw/rtl/core/VX_scoreboard.sv index 9b3a146c6..1fe9a7f44 100644 --- a/hw/rtl/core/VX_scoreboard.sv +++ b/hw/rtl/core/VX_scoreboard.sv @@ -30,6 +30,8 @@ module VX_scoreboard import VX_gpu_pkg::*; #( VX_scoreboard_if.master scoreboard_if ); `UNUSED_SPARAM (INSTANCE_ID) + localparam NUM_SRC_OPDS = 3; + localparam NUM_OPDS = NUM_SRC_OPDS + 1; localparam DATAW = `UUID_WIDTH + `NUM_THREADS + `PC_BITS + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + (`NR_BITS * 4) + 1; VX_ibuffer_if staging_if [PER_ISSUE_WARPS](); @@ -64,13 +66,13 @@ module VX_scoreboard import VX_gpu_pkg::*; #( `BUFFER_EX(perf_sfu_per_cycle_r, perf_sfu_per_cycle, 1'b1, `CDIV(PER_ISSUE_WARPS, `MAX_FANOUT)); wire [PER_ISSUE_WARPS-1:0] stg_valid_in; - for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin + for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : g_stg_valid_in assign stg_valid_in[w] = staging_if[w].valid; end wire perf_stall_per_cycle = (|stg_valid_in) && ~(|(stg_valid_in & operands_ready)); - always @(posedge clk) begin + always @(posedge clk) begin : g_perf_stalls if (reset) begin perf_stalls <= '0; end else begin @@ -78,7 +80,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( end end - for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin + for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin : g_perf_units_uses always @(posedge clk) begin if (reset) begin perf_units_uses[i] <= '0; @@ -88,7 +90,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #( end end - for (genvar i = 0; i < `NUM_SFU_UNITS; ++i) begin + for (genvar i = 0; i < `NUM_SFU_UNITS; ++i) begin : g_perf_sfu_uses always @(posedge clk) begin if (reset) begin perf_sfu_uses[i] <= '0; @@ -99,10 +101,9 @@ module VX_scoreboard import VX_gpu_pkg::*; #( end `endif - for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin - VX_elastic_buffer #( - .DATAW (DATAW), - .SIZE (1) + for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : g_stanging_bufs + VX_pipe_buffer #( + .DATAW (DATAW) ) stanging_buf ( .clk (clk), .reset (reset), @@ -115,10 +116,10 @@ module VX_scoreboard import VX_gpu_pkg::*; #( ); end - for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin + for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : g_scoreboard reg [`NUM_REGS-1:0] inuse_regs; - reg [3:0] operands_busy, operands_busy_n; + reg [NUM_OPDS-1:0] operands_busy, operands_busy_n; wire ibuffer_fire = ibuffer_if[w].valid && ibuffer_if[w].ready; @@ -128,6 +129,10 @@ module VX_scoreboard import VX_gpu_pkg::*; #( && (writeback_if.data.wis == ISSUE_WIS_W'(w)) && writeback_if.data.eop; + wire [NUM_OPDS-1:0][`NR_BITS-1:0] ibuf_opds, stg_opds; + assign ibuf_opds = {ibuffer_if[w].data.rs3, ibuffer_if[w].data.rs2, ibuffer_if[w].data.rs1, ibuffer_if[w].data.rd}; + assign stg_opds = {staging_if[w].data.rs3, staging_if[w].data.rs2, staging_if[w].data.rs1, staging_if[w].data.rd}; + `ifdef PERF_ENABLE reg [`NUM_REGS-1:0][`EX_WIDTH-1:0] inuse_units; reg [`NUM_REGS-1:0][`SFU_WIDTH-1:0] inuse_sfu; @@ -135,29 +140,11 @@ module VX_scoreboard import VX_gpu_pkg::*; #( always @(*) begin perf_inuse_units_per_cycle[w] = '0; perf_inuse_sfu_per_cycle[w] = '0; - if (staging_if[w].valid) begin - if (operands_busy[0]) begin - perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rd]] = 1; - if (inuse_units[staging_if[w].data.rd] == `EX_SFU) begin - perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rd]] = 1; - end - end - if (operands_busy[1]) begin - perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rs1]] = 1; - if (inuse_units[staging_if[w].data.rs1] == `EX_SFU) begin - perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rs1]] = 1; - end - end - if (operands_busy[2]) begin - perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rs2]] = 1; - if (inuse_units[staging_if[w].data.rs2] == `EX_SFU) begin - perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rs2]] = 1; - end - end - if (operands_busy[3]) begin - perf_inuse_units_per_cycle[w][inuse_units[staging_if[w].data.rs3]] = 1; - if (inuse_units[staging_if[w].data.rs3] == `EX_SFU) begin - perf_inuse_sfu_per_cycle[w][inuse_sfu[staging_if[w].data.rs3]] = 1; + for (integer i = 0; i < NUM_OPDS; ++i) begin + if (staging_if[w].valid && operands_busy[i]) begin + perf_inuse_units_per_cycle[w][inuse_units[stg_opds[i]]] = 1; + if (inuse_units[stg_opds[i]] == `EX_SFU) begin + perf_inuse_sfu_per_cycle[w][inuse_sfu[stg_opds[i]]] = 1; end end end @@ -165,56 +152,24 @@ module VX_scoreboard import VX_gpu_pkg::*; #( `endif always @(*) begin - operands_busy_n = operands_busy; - if (ibuffer_fire) begin - operands_busy_n = { - inuse_regs[ibuffer_if[w].data.rs3], - inuse_regs[ibuffer_if[w].data.rs2], - inuse_regs[ibuffer_if[w].data.rs1], - inuse_regs[ibuffer_if[w].data.rd] - }; - end - if (writeback_fire) begin + for (integer i = 0; i < NUM_OPDS; ++i) begin + operands_busy_n[i] = operands_busy[i]; if (ibuffer_fire) begin - if (writeback_if.data.rd == ibuffer_if[w].data.rd) begin - operands_busy_n[0] = 0; - end - if (writeback_if.data.rd == ibuffer_if[w].data.rs1) begin - operands_busy_n[1] = 0; - end - if (writeback_if.data.rd == ibuffer_if[w].data.rs2) begin - operands_busy_n[2] = 0; - end - if (writeback_if.data.rd == ibuffer_if[w].data.rs3) begin - operands_busy_n[3] = 0; - end - end else begin - if (writeback_if.data.rd == staging_if[w].data.rd) begin - operands_busy_n[0] = 0; - end - if (writeback_if.data.rd == staging_if[w].data.rs1) begin - operands_busy_n[1] = 0; - end - if (writeback_if.data.rd == staging_if[w].data.rs2) begin - operands_busy_n[2] = 0; - end - if (writeback_if.data.rd == staging_if[w].data.rs3) begin - operands_busy_n[3] = 0; - end + operands_busy_n[i] = inuse_regs[ibuf_opds[i]]; end - end - if (staging_fire && staging_if[w].data.wb) begin - if (staging_if[w].data.rd == ibuffer_if[w].data.rd) begin - operands_busy_n[0] = 1; - end - if (staging_if[w].data.rd == ibuffer_if[w].data.rs1) begin - operands_busy_n[1] = 1; - end - if (staging_if[w].data.rd == ibuffer_if[w].data.rs2) begin - operands_busy_n[2] = 1; + if (writeback_fire) begin + if (ibuffer_fire) begin + if (writeback_if.data.rd == ibuf_opds[i]) begin + operands_busy_n[i] = 0; + end + end else begin + if (writeback_if.data.rd == stg_opds[i]) begin + operands_busy_n[i] = 0; + end + end end - if (staging_if[w].data.rd == ibuffer_if[w].data.rs3) begin - operands_busy_n[3] = 1; + if (staging_fire && staging_if[w].data.wb && staging_if[w].data.rd == ibuf_opds[i]) begin + operands_busy_n[i] = 1; end end end @@ -251,9 +206,9 @@ module VX_scoreboard import VX_gpu_pkg::*; #( end else begin if (staging_if[w].valid && ~staging_if[w].ready) begin `ifdef DBG_TRACE_PIPELINE - `TRACE(3, ("%d: *** %s-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n", + `TRACE(3, ("%t: *** %s-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n", $time, INSTANCE_ID, w, {staging_if[w].data.PC, 1'b0}, staging_if[w].data.tmask, timeout_ctr, - operands_busy, staging_if[w].data.uuid)); + operands_busy, staging_if[w].data.uuid)) `endif timeout_ctr <= timeout_ctr + 1; end else if (ibuffer_fire) begin @@ -265,11 +220,11 @@ module VX_scoreboard import VX_gpu_pkg::*; #( `RUNTIME_ASSERT((timeout_ctr < `STALL_TIMEOUT), ("%t: *** %s timeout: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)", $time, INSTANCE_ID, w, {staging_if[w].data.PC, 1'b0}, staging_if[w].data.tmask, timeout_ctr, - operands_busy, staging_if[w].data.uuid)); + operands_busy, staging_if[w].data.uuid)) `RUNTIME_ASSERT(~writeback_fire || inuse_regs[writeback_if.data.rd] != 0, ("%t: *** %s invalid writeback register: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d (#%0d)", - $time, INSTANCE_ID, w, {writeback_if.data.PC, 1'b0}, writeback_if.data.tmask, writeback_if.data.rd, writeback_if.data.uuid)); + $time, INSTANCE_ID, w, {writeback_if.data.PC, 1'b0}, writeback_if.data.tmask, writeback_if.data.rd, writeback_if.data.uuid)) `endif end @@ -278,23 +233,20 @@ module VX_scoreboard import VX_gpu_pkg::*; #( wire [PER_ISSUE_WARPS-1:0][DATAW-1:0] arb_data_in; wire [PER_ISSUE_WARPS-1:0] arb_ready_in; - for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin + for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : g_arb_data_in assign arb_valid_in[w] = staging_if[w].valid && operands_ready[w]; assign arb_data_in[w] = staging_if[w].data; assign staging_if[w].ready = arb_ready_in[w] && operands_ready[w]; end - `RESET_RELAY (arb_reset, reset); - VX_stream_arb #( .NUM_INPUTS (PER_ISSUE_WARPS), .DATAW (DATAW), - .ARBITER ("F"), - .LUTRAM (1), - .OUT_BUF (4) // using 2-cycle EB for area reduction + .ARBITER ("C"), + .OUT_BUF (3) ) out_arb ( .clk (clk), - .reset (arb_reset), + .reset (reset), .valid_in (arb_valid_in), .ready_in (arb_ready_in), .data_in (arb_data_in), diff --git a/hw/rtl/core/VX_sfu_unit.sv b/hw/rtl/core/VX_sfu_unit.sv index 5ef4211d0..5af6211f6 100644 --- a/hw/rtl/core/VX_sfu_unit.sv +++ b/hw/rtl/core/VX_sfu_unit.sv @@ -41,24 +41,25 @@ module VX_sfu_unit import VX_gpu_pkg::*; #( VX_warp_ctl_if.master warp_ctl_if ); `UNUSED_SPARAM (INSTANCE_ID) - localparam BLOCK_SIZE = 1; - localparam NUM_LANES = `NUM_SFU_LANES; - localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES); - localparam PID_WIDTH = `UP(PID_BITS); - - localparam RSP_ARB_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + (NUM_LANES * `XLEN) + `NR_BITS + 1 + `PC_BITS + PID_WIDTH + 1 + 1; - localparam RSP_ARB_SIZE = 1 + 1; - localparam RSP_ARB_IDX_WCTL = 0; - localparam RSP_ARB_IDX_CSRS = 1; + localparam BLOCK_SIZE = 1; + localparam NUM_LANES = `NUM_SFU_LANES; + localparam PE_COUNT = 2; + localparam PE_SEL_BITS = `CLOG2(PE_COUNT); + localparam PE_IDX_WCTL = 0; + localparam PE_IDX_CSRS = 1; VX_execute_if #( .NUM_LANES (NUM_LANES) ) per_block_execute_if[BLOCK_SIZE](); + VX_commit_if #( + .NUM_LANES (NUM_LANES) + ) per_block_commit_if[BLOCK_SIZE](); + VX_dispatch_unit #( .BLOCK_SIZE (BLOCK_SIZE), .NUM_LANES (NUM_LANES), - .OUT_BUF (1) + .OUT_BUF (3) ) dispatch_unit ( .clk (clk), .reset (reset), @@ -66,61 +67,58 @@ module VX_sfu_unit import VX_gpu_pkg::*; #( .execute_if (per_block_execute_if) ); - wire [RSP_ARB_SIZE-1:0] rsp_arb_valid_in; - wire [RSP_ARB_SIZE-1:0] rsp_arb_ready_in; - wire [RSP_ARB_SIZE-1:0][RSP_ARB_DATAW-1:0] rsp_arb_data_in; - - // Warp control block VX_execute_if #( .NUM_LANES (NUM_LANES) - ) wctl_execute_if(); + ) pe_execute_if[PE_COUNT](); + VX_commit_if#( .NUM_LANES (NUM_LANES) - ) wctl_commit_if(); + ) pe_commit_if[PE_COUNT](); - assign wctl_execute_if.valid = per_block_execute_if[0].valid && `INST_SFU_IS_WCTL(per_block_execute_if[0].data.op_type); - assign wctl_execute_if.data = per_block_execute_if[0].data; + reg [PE_SEL_BITS-1:0] pe_select; + always @(*) begin + pe_select = PE_IDX_WCTL; + if (`INST_SFU_IS_CSR(per_block_execute_if[0].data.op_type)) + pe_select = PE_IDX_CSRS; + end - `RESET_RELAY (wctl_reset, reset); + VX_pe_switch #( + .PE_COUNT (PE_COUNT), + .NUM_LANES (NUM_LANES), + .ARBITER ("R"), + .REQ_OUT_BUF(0), + .RSP_OUT_BUF(3) + ) pe_switch ( + .clk (clk), + .reset (reset), + .pe_sel (pe_select), + .execute_in_if (per_block_execute_if[0]), + .commit_out_if (per_block_commit_if[0]), + .execute_out_if (pe_execute_if), + .commit_in_if (pe_commit_if) + ); VX_wctl_unit #( .INSTANCE_ID ($sformatf("%s-wctl", INSTANCE_ID)), .NUM_LANES (NUM_LANES) ) wctl_unit ( .clk (clk), - .reset (wctl_reset), - .execute_if (wctl_execute_if), + .reset (reset), + .execute_if (pe_execute_if[PE_IDX_WCTL]), .warp_ctl_if(warp_ctl_if), - .commit_if (wctl_commit_if) + .commit_if (pe_commit_if[PE_IDX_WCTL]) ); - assign rsp_arb_valid_in[RSP_ARB_IDX_WCTL] = wctl_commit_if.valid; - assign rsp_arb_data_in[RSP_ARB_IDX_WCTL] = wctl_commit_if.data; - assign wctl_commit_if.ready = rsp_arb_ready_in[RSP_ARB_IDX_WCTL]; - - // CSR unit - VX_execute_if #( - .NUM_LANES (NUM_LANES) - ) csr_execute_if(); - VX_commit_if #( - .NUM_LANES (NUM_LANES) - ) csr_commit_if(); - - assign csr_execute_if.valid = per_block_execute_if[0].valid && `INST_SFU_IS_CSR(per_block_execute_if[0].data.op_type); - assign csr_execute_if.data = per_block_execute_if[0].data; - - `RESET_RELAY (csr_reset, reset); - VX_csr_unit #( .INSTANCE_ID ($sformatf("%s-csr", INSTANCE_ID)), .CORE_ID (CORE_ID), .NUM_LANES (NUM_LANES) ) csr_unit ( .clk (clk), - .reset (csr_reset), + .reset (reset), .base_dcrs (base_dcrs), - .execute_if (csr_execute_if), + .execute_if (pe_execute_if[PE_IDX_CSRS]), `ifdef PERF_ENABLE .mem_perf_if (mem_perf_if), @@ -133,47 +131,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #( .sched_csr_if (sched_csr_if), .commit_csr_if (commit_csr_if), - .commit_if (csr_commit_if) - ); - - assign rsp_arb_valid_in[RSP_ARB_IDX_CSRS] = csr_commit_if.valid; - assign rsp_arb_data_in[RSP_ARB_IDX_CSRS] = csr_commit_if.data; - assign csr_commit_if.ready = rsp_arb_ready_in[RSP_ARB_IDX_CSRS]; - - // can accept new request? - - reg sfu_req_ready; - always @(*) begin - case (per_block_execute_if[0].data.op_type) - `INST_SFU_CSRRW, - `INST_SFU_CSRRS, - `INST_SFU_CSRRC: sfu_req_ready = csr_execute_if.ready; - default: sfu_req_ready = wctl_execute_if.ready; - endcase - end - assign per_block_execute_if[0].ready = sfu_req_ready; - - // response arbitration - - VX_commit_if #( - .NUM_LANES (NUM_LANES) - ) arb_commit_if[BLOCK_SIZE](); - - VX_stream_arb #( - .NUM_INPUTS (RSP_ARB_SIZE), - .DATAW (RSP_ARB_DATAW), - .ARBITER ("R"), - .OUT_BUF (3) - ) rsp_arb ( - .clk (clk), - .reset (reset), - .valid_in (rsp_arb_valid_in), - .ready_in (rsp_arb_ready_in), - .data_in (rsp_arb_data_in), - .data_out (arb_commit_if[0].data), - .valid_out (arb_commit_if[0].valid), - .ready_out (arb_commit_if[0].ready), - `UNUSED_PIN (sel_out) + .commit_if (pe_commit_if[PE_IDX_CSRS]) ); VX_gather_unit #( @@ -181,9 +139,9 @@ module VX_sfu_unit import VX_gpu_pkg::*; #( .NUM_LANES (NUM_LANES), .OUT_BUF (3) ) gather_unit ( - .clk (clk), - .reset (reset), - .commit_in_if (arb_commit_if), + .clk (clk), + .reset (reset), + .commit_in_if (per_block_commit_if), .commit_out_if (commit_if) ); diff --git a/hw/rtl/core/VX_split_join.sv b/hw/rtl/core/VX_split_join.sv index 7f887e602..7955437a6 100644 --- a/hw/rtl/core/VX_split_join.sv +++ b/hw/rtl/core/VX_split_join.sv @@ -45,16 +45,14 @@ module VX_split_join import VX_gpu_pkg::*; #( wire ipdom_push = valid && split.valid && split.is_dvg; wire ipdom_pop = valid && sjoin.valid && sjoin_is_dvg; - for (genvar i = 0; i < `NUM_WARPS; ++i) begin - - `RESET_RELAY (ipdom_reset, reset); - + for (genvar i = 0; i < `NUM_WARPS; ++i) begin : g_ipdom_stacks VX_ipdom_stack #( .WIDTH (`NUM_THREADS+`PC_BITS), - .DEPTH (`DV_STACK_SIZE) + .DEPTH (`DV_STACK_SIZE), + .OUT_REG (0) ) ipdom_stack ( .clk (clk), - .reset (ipdom_reset), + .reset (reset), .q0 (ipdom_q0), .q1 (ipdom_q1), .d (ipdom_data[i]), diff --git a/hw/rtl/core/VX_trace_pkg.sv b/hw/rtl/core/VX_trace_pkg.sv deleted file mode 100644 index b4eae96fe..000000000 --- a/hw/rtl/core/VX_trace_pkg.sv +++ /dev/null @@ -1,399 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -`ifndef VX_TRACE_PKG_VH -`define VX_TRACE_PKG_VH - -`include "VX_define.vh" - -package VX_trace_pkg; - -`ifdef SIMULATION - -`ifdef SV_DPI - import "DPI-C" function void dpi_trace(input int level, input string format /*verilator sformat*/); -`endif - - import VX_gpu_pkg::*; - - task trace_ex_type(input int level, input [`EX_BITS-1:0] ex_type); - case (ex_type) - `EX_ALU: `TRACE(level, ("ALU")); - `EX_LSU: `TRACE(level, ("LSU")); - `EX_FPU: `TRACE(level, ("FPU")); - `EX_SFU: `TRACE(level, ("SFU")); - default: `TRACE(level, ("?")); - endcase - endtask - - task trace_ex_op(input int level, - input [`EX_BITS-1:0] ex_type, - input [`INST_OP_BITS-1:0] op_type, - input VX_gpu_pkg::op_args_t op_args - ); - case (ex_type) - `EX_ALU: begin - case (op_args.alu.xtype) - `ALU_TYPE_ARITH: begin - if (op_args.alu.is_w) begin - if (op_args.alu.use_imm) begin - case (`INST_ALU_BITS'(op_type)) - `INST_ALU_ADD: `TRACE(level, ("ADDIW")); - `INST_ALU_SLL: `TRACE(level, ("SLLIW")); - `INST_ALU_SRL: `TRACE(level, ("SRLIW")); - `INST_ALU_SRA: `TRACE(level, ("SRAIW")); - default: `TRACE(level, ("?")); - endcase - end else begin - case (`INST_ALU_BITS'(op_type)) - `INST_ALU_ADD: `TRACE(level, ("ADDW")); - `INST_ALU_SUB: `TRACE(level, ("SUBW")); - `INST_ALU_SLL: `TRACE(level, ("SLLW")); - `INST_ALU_SRL: `TRACE(level, ("SRLW")); - `INST_ALU_SRA: `TRACE(level, ("SRAW")); - default: `TRACE(level, ("?")); - endcase - end - end else begin - if (op_args.alu.use_imm) begin - case (`INST_ALU_BITS'(op_type)) - `INST_ALU_ADD: `TRACE(level, ("ADDI")); - `INST_ALU_SLL: `TRACE(level, ("SLLI")); - `INST_ALU_SRL: `TRACE(level, ("SRLI")); - `INST_ALU_SRA: `TRACE(level, ("SRAI")); - `INST_ALU_SLT: `TRACE(level, ("SLTI")); - `INST_ALU_SLTU: `TRACE(level, ("SLTIU")); - `INST_ALU_XOR: `TRACE(level, ("XORI")); - `INST_ALU_OR: `TRACE(level, ("ORI")); - `INST_ALU_AND: `TRACE(level, ("ANDI")); - `INST_ALU_LUI: `TRACE(level, ("LUI")); - `INST_ALU_AUIPC: `TRACE(level, ("AUIPC")); - default: `TRACE(level, ("?")); - endcase - end else begin - case (`INST_ALU_BITS'(op_type)) - `INST_ALU_ADD: `TRACE(level, ("ADD")); - `INST_ALU_SUB: `TRACE(level, ("SUB")); - `INST_ALU_SLL: `TRACE(level, ("SLL")); - `INST_ALU_SRL: `TRACE(level, ("SRL")); - `INST_ALU_SRA: `TRACE(level, ("SRA")); - `INST_ALU_SLT: `TRACE(level, ("SLT")); - `INST_ALU_SLTU: `TRACE(level, ("SLTU")); - `INST_ALU_XOR: `TRACE(level, ("XOR")); - `INST_ALU_OR: `TRACE(level, ("OR")); - `INST_ALU_AND: `TRACE(level, ("AND")); - `INST_ALU_CZEQ: `TRACE(level, ("CZERO.EQZ")); - `INST_ALU_CZNE: `TRACE(level, ("CZERO.NEZ")); - default: `TRACE(level, ("?")); - endcase - end - end - end - `ALU_TYPE_BRANCH: begin - case (`INST_BR_BITS'(op_type)) - `INST_BR_EQ: `TRACE(level, ("BEQ")); - `INST_BR_NE: `TRACE(level, ("BNE")); - `INST_BR_LT: `TRACE(level, ("BLT")); - `INST_BR_GE: `TRACE(level, ("BGE")); - `INST_BR_LTU: `TRACE(level, ("BLTU")); - `INST_BR_GEU: `TRACE(level, ("BGEU")); - `INST_BR_JAL: `TRACE(level, ("JAL")); - `INST_BR_JALR: `TRACE(level, ("JALR")); - `INST_BR_ECALL: `TRACE(level, ("ECALL")); - `INST_BR_EBREAK:`TRACE(level, ("EBREAK")); - `INST_BR_URET: `TRACE(level, ("URET")); - `INST_BR_SRET: `TRACE(level, ("SRET")); - `INST_BR_MRET: `TRACE(level, ("MRET")); - default: `TRACE(level, ("?")); - endcase - end - `ALU_TYPE_MULDIV: begin - if (op_args.alu.is_w) begin - case (`INST_M_BITS'(op_type)) - `INST_M_MUL: `TRACE(level, ("MULW")); - `INST_M_DIV: `TRACE(level, ("DIVW")); - `INST_M_DIVU: `TRACE(level, ("DIVUW")); - `INST_M_REM: `TRACE(level, ("REMW")); - `INST_M_REMU: `TRACE(level, ("REMUW")); - default: `TRACE(level, ("?")); - endcase - end else begin - case (`INST_M_BITS'(op_type)) - `INST_M_MUL: `TRACE(level, ("MUL")); - `INST_M_MULH: `TRACE(level, ("MULH")); - `INST_M_MULHSU:`TRACE(level, ("MULHSU")); - `INST_M_MULHU: `TRACE(level, ("MULHU")); - `INST_M_DIV: `TRACE(level, ("DIV")); - `INST_M_DIVU: `TRACE(level, ("DIVU")); - `INST_M_REM: `TRACE(level, ("REM")); - `INST_M_REMU: `TRACE(level, ("REMU")); - default: `TRACE(level, ("?")); - endcase - end - end - default: `TRACE(level, ("?")); - endcase - end - `EX_LSU: begin - if (op_args.lsu.is_float) begin - case (`INST_LSU_BITS'(op_type)) - `INST_LSU_LW: `TRACE(level, ("FLW")); - `INST_LSU_LD: `TRACE(level, ("FLD")); - `INST_LSU_SW: `TRACE(level, ("FSW")); - `INST_LSU_SD: `TRACE(level, ("FSD")); - default: `TRACE(level, ("?")); - endcase - end else begin - case (`INST_LSU_BITS'(op_type)) - `INST_LSU_LB: `TRACE(level, ("LB")); - `INST_LSU_LH: `TRACE(level, ("LH")); - `INST_LSU_LW: `TRACE(level, ("LW")); - `INST_LSU_LD: `TRACE(level, ("LD")); - `INST_LSU_LBU:`TRACE(level, ("LBU")); - `INST_LSU_LHU:`TRACE(level, ("LHU")); - `INST_LSU_LWU:`TRACE(level, ("LWU")); - `INST_LSU_SB: `TRACE(level, ("SB")); - `INST_LSU_SH: `TRACE(level, ("SH")); - `INST_LSU_SW: `TRACE(level, ("SW")); - `INST_LSU_SD: `TRACE(level, ("SD")); - `INST_LSU_FENCE:`TRACE(level,("FENCE")); - default: `TRACE(level, ("?")); - endcase - end - end - `EX_FPU: begin - case (`INST_FPU_BITS'(op_type)) - `INST_FPU_ADD: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FADD.D")); - else - `TRACE(level, ("FADD.S")); - end - `INST_FPU_SUB: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FSUB.D")); - else - `TRACE(level, ("FSUB.S")); - end - `INST_FPU_MUL: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FMUL.D")); - else - `TRACE(level, ("FMUL.S")); - end - `INST_FPU_DIV: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FDIV.D")); - else - `TRACE(level, ("FDIV.S")); - end - `INST_FPU_SQRT: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FSQRT.D")); - else - `TRACE(level, ("FSQRT.S")); - end - `INST_FPU_MADD: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FMADD.D")); - else - `TRACE(level, ("FMADD.S")); - end - `INST_FPU_MSUB: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FMSUB.D")); - else - `TRACE(level, ("FMSUB.S")); - end - `INST_FPU_NMADD: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FNMADD.D")); - else - `TRACE(level, ("FNMADD.S")); - end - `INST_FPU_NMSUB: begin - if (op_args.fpu.fmt[0]) - `TRACE(level, ("FNMSUB.D")); - else - `TRACE(level, ("FNMSUB.S")); - end - `INST_FPU_CMP: begin - if (op_args.fpu.fmt[0]) begin - case (op_args.fpu.frm[1:0]) - 0: `TRACE(level, ("FLE.D")); - 1: `TRACE(level, ("FLT.D")); - 2: `TRACE(level, ("FEQ.D")); - default: `TRACE(level, ("?")); - endcase - end else begin - case (op_args.fpu.frm[1:0]) - 0: `TRACE(level, ("FLE.S")); - 1: `TRACE(level, ("FLT.S")); - 2: `TRACE(level, ("FEQ.S")); - default: `TRACE(level, ("?")); - endcase - end - end - `INST_FPU_F2F: begin - if (op_args.fpu.fmt[0]) begin - `TRACE(level, ("FCVT.D.S")); - end else begin - `TRACE(level, ("FCVT.S.D")); - end - end - `INST_FPU_F2I: begin - if (op_args.fpu.fmt[0]) begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.L.D")); - end else begin - `TRACE(level, ("FCVT.W.D")); - end - end else begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.L.S")); - end else begin - `TRACE(level, ("FCVT.W.S")); - end - end - end - `INST_FPU_F2U: begin - if (op_args.fpu.fmt[0]) begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.LU.D")); - end else begin - `TRACE(level, ("FCVT.WU.D")); - end - end else begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.LU.S")); - end else begin - `TRACE(level, ("FCVT.WU.S")); - end - end - end - `INST_FPU_I2F: begin - if (op_args.fpu.fmt[0]) begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.D.L")); - end else begin - `TRACE(level, ("FCVT.D.W")); - end - end else begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.S.L")); - end else begin - `TRACE(level, ("FCVT.S.W")); - end - end - end - `INST_FPU_U2F: begin - if (op_args.fpu.fmt[0]) begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.D.LU")); - end else begin - `TRACE(level, ("FCVT.D.WU")); - end - end else begin - if (op_args.fpu.fmt[1]) begin - `TRACE(level, ("FCVT.S.LU")); - end else begin - `TRACE(level, ("FCVT.S.WU")); - end - end - end - `INST_FPU_MISC: begin - if (op_args.fpu.fmt[0]) begin - case (op_args.fpu.frm) - 0: `TRACE(level, ("FSGNJ.D")); - 1: `TRACE(level, ("FSGNJN.D")); - 2: `TRACE(level, ("FSGNJX.D")); - 3: `TRACE(level, ("FCLASS.D")); - 4: `TRACE(level, ("FMV.X.D")); - 5: `TRACE(level, ("FMV.D.X")); - 6: `TRACE(level, ("FMIN.D")); - 7: `TRACE(level, ("FMAX.D")); - endcase - end else begin - case (op_args.fpu.frm) - 0: `TRACE(level, ("FSGNJ.S")); - 1: `TRACE(level, ("FSGNJN.S")); - 2: `TRACE(level, ("FSGNJX.S")); - 3: `TRACE(level, ("FCLASS.S")); - 4: `TRACE(level, ("FMV.X.S")); - 5: `TRACE(level, ("FMV.S.X")); - 6: `TRACE(level, ("FMIN.S")); - 7: `TRACE(level, ("FMAX.S")); - endcase - end - end - default: `TRACE(level, ("?")); - endcase - end - `EX_SFU: begin - case (`INST_SFU_BITS'(op_type)) - `INST_SFU_TMC: `TRACE(level, ("TMC")); - `INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN")); - `INST_SFU_SPLIT: begin if (op_args.wctl.is_neg) `TRACE(level, ("SPLIT.N")); else `TRACE(level, ("SPLIT")); end - `INST_SFU_JOIN: `TRACE(level, ("JOIN")); - `INST_SFU_BAR: `TRACE(level, ("BAR")); - `INST_SFU_PRED: begin if (op_args.wctl.is_neg) `TRACE(level, ("PRED.N")); else `TRACE(level, ("PRED")); end - `INST_SFU_CSRRW: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRWI")); else `TRACE(level, ("CSRRW")); end - `INST_SFU_CSRRS: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRSI")); else `TRACE(level, ("CSRRS")); end - `INST_SFU_CSRRC: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRCI")); else `TRACE(level, ("CSRRC")); end - default: `TRACE(level, ("?")); - endcase - end - default: `TRACE(level, ("?")); - endcase - endtask - - task trace_op_args(input int level, - input [`EX_BITS-1:0] ex_type, - input [`INST_OP_BITS-1:0] op_type, - input VX_gpu_pkg::op_args_t op_args - ); - case (ex_type) - `EX_ALU: begin - `TRACE(level, (", use_PC=%b, use_imm=%b, imm=0x%0h", op_args.alu.use_PC, op_args.alu.use_imm, op_args.alu.imm)); - end - `EX_LSU: begin - `TRACE(level, (", offset=0x%0h", op_args.lsu.offset)); - end - `EX_FPU: begin - `TRACE(level, (", fmt=0x%0h, frm=0x%0h", op_args.fpu.fmt, op_args.fpu.frm)); - end - `EX_SFU: begin - if (`INST_SFU_IS_CSR(op_type)) begin - `TRACE(level, (", addr=0x%0h, use_imm=%b, imm=0x%0h", op_args.csr.addr, op_args.csr.use_imm, op_args.csr.imm)); - end - end - default:; - endcase - endtask - - task trace_base_dcr(input int level, input [`VX_DCR_ADDR_WIDTH-1:0] addr); - case (addr) - `VX_DCR_BASE_STARTUP_ADDR0: `TRACE(level, ("STARTUP_ADDR0")); - `VX_DCR_BASE_STARTUP_ADDR1: `TRACE(level, ("STARTUP_ADDR1")); - `VX_DCR_BASE_STARTUP_ARG0: `TRACE(level, ("STARTUP_ARG0")); - `VX_DCR_BASE_STARTUP_ARG1: `TRACE(level, ("STARTUP_ARG1")); - `VX_DCR_BASE_MPM_CLASS: `TRACE(level, ("MPM_CLASS")); - default: `TRACE(level, ("?")); - endcase - endtask - -`endif - -endpackage - -`endif // VX_TRACE_PKG_VH diff --git a/hw/rtl/core/VX_uuid_gen.sv b/hw/rtl/core/VX_uuid_gen.sv new file mode 100644 index 000000000..cbde9091d --- /dev/null +++ b/hw/rtl/core/VX_uuid_gen.sv @@ -0,0 +1,44 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_define.vh" + +module VX_uuid_gen import VX_gpu_pkg::*; #( + parameter CORE_ID = 0, + parameter UUID_WIDTH = 48 +) ( + input wire clk, + input wire reset, + input wire incr, + input wire [`NW_WIDTH-1:0] wid, + output wire [UUID_WIDTH-1:0] uuid +); + localparam GNW_WIDTH = UUID_WIDTH - 32; + reg [31:0] uuid_cntrs [0:`NUM_WARPS-1]; + reg [`NUM_WARPS-1:0] has_uuid_cntrs; + + always @(posedge clk) begin + if (reset) begin + has_uuid_cntrs <= '0; + end else if (incr) begin + has_uuid_cntrs[wid] <= 1; + end + if (incr) begin + uuid_cntrs[wid] <= has_uuid_cntrs[wid] ? (uuid_cntrs[wid] + 1) : 1; + end + end + + wire [GNW_WIDTH-1:0] g_wid = (GNW_WIDTH'(CORE_ID) << `NW_BITS) + GNW_WIDTH'(wid); + assign uuid = {g_wid, (has_uuid_cntrs[wid] ? uuid_cntrs[wid] : 0)}; + +endmodule diff --git a/hw/rtl/core/VX_wctl_unit.sv b/hw/rtl/core/VX_wctl_unit.sv index 132f679d4..bb85b70c9 100644 --- a/hw/rtl/core/VX_wctl_unit.sv +++ b/hw/rtl/core/VX_wctl_unit.sv @@ -50,9 +50,9 @@ module VX_wctl_unit import VX_gpu_pkg::*; #( wire is_bar = (execute_if.data.op_type == `INST_SFU_BAR); wire [`UP(LANE_BITS)-1:0] tid; - if (LANE_BITS != 0) begin + if (LANE_BITS != 0) begin : g_tid assign tid = execute_if.data.tid[0 +: LANE_BITS]; - end else begin + end else begin : g_no_tid assign tid = 0; end @@ -63,7 +63,7 @@ module VX_wctl_unit import VX_gpu_pkg::*; #( wire not_pred = execute_if.data.op_args.wctl.is_neg; wire [NUM_LANES-1:0] taken; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_taken assign taken[i] = (execute_if.data.rs1_data[i][0] ^ not_pred); end @@ -131,7 +131,7 @@ module VX_wctl_unit import VX_gpu_pkg::*; #( // wspawn wire [`NUM_WARPS-1:0] wspawn_wmask; - for (genvar i = 0; i < `NUM_WARPS; ++i) begin + for (genvar i = 0; i < `NUM_WARPS; ++i) begin : g_wspawn_wmask assign wspawn_wmask[i] = (i < rs1_data[`NW_BITS:0]) && (i != execute_if.data.wid); end assign wspawn.valid = is_wspawn; @@ -162,7 +162,7 @@ module VX_wctl_unit import VX_gpu_pkg::*; #( assign warp_ctl_if.sjoin = sjoin_r; assign warp_ctl_if.barrier = barrier_r; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_commit_if assign commit_if.data.data[i] = `XLEN'(dvstack_ptr); end diff --git a/hw/rtl/fpu/VX_fcvt_unit.sv b/hw/rtl/fpu/VX_fcvt_unit.sv index b5b7b1690..5756a25ed 100644 --- a/hw/rtl/fpu/VX_fcvt_unit.sv +++ b/hw/rtl/fpu/VX_fcvt_unit.sv @@ -1,17 +1,17 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// Modified port of cast module from fpnew Libray +// Modified port of cast module from fpnew Libray // reference: https://github.com/pulp-platform/fpnew `include "VX_fpu_define.vh" @@ -22,7 +22,8 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( parameter LATENCY = 1, parameter INT_WIDTH = 32, parameter MAN_BITS = 23, - parameter EXP_BITS = 8 + parameter EXP_BITS = 8, + parameter OUT_REG = 0 ) ( input wire clk, input wire reset, @@ -35,10 +36,10 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( input wire is_signed, input wire [31:0] dataa, - output wire [31:0] result, + output wire [31:0] result, output wire [`FP_FLAGS_BITS-1:0] fflags -); +); // Constants localparam EXP_BIAS = 2**(EXP_BITS-1)-1; @@ -55,11 +56,11 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( localparam FMT_SHIFT_COMPENSATION = S_MAN_WIDTH - 1 - MAN_BITS; localparam NUM_FP_STICKY = 2 * S_MAN_WIDTH - MAN_BITS - 1; // removed mantissa, 1. and R localparam NUM_INT_STICKY = 2 * S_MAN_WIDTH - INT_WIDTH; // removed int and R - + // Input processing - - fclass_t fclass; - VX_fp_classifier #( + + fclass_t fclass; + VX_fp_classifier #( .EXP_BITS (EXP_BITS), .MAN_BITS (MAN_BITS) ) fp_classifier ( @@ -69,9 +70,9 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( ); wire [S_MAN_WIDTH-1:0] input_mant; - wire [S_EXP_WIDTH-1:0] input_exp; + wire [S_EXP_WIDTH-1:0] input_exp; wire input_sign; - + wire i2f_sign = dataa[INT_WIDTH-1]; wire f2i_sign = dataa[INT_WIDTH-1] && is_signed; wire [S_MAN_WIDTH-1:0] f2i_mantissa = f2i_sign ? (-dataa) : dataa; @@ -81,7 +82,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( assign input_sign = is_itof ? f2i_sign : i2f_sign; // Pipeline stage0 - + wire is_itof_s0; wire is_signed_s0; wire [2:0] rnd_mode_s0; @@ -92,7 +93,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( VX_pipe_register #( .DATAW (1 + `INST_FRM_BITS + 1 + $bits(fclass_t) + 1 + S_EXP_WIDTH + S_MAN_WIDTH), - .DEPTH (LATENCY > 2) + .DEPTH (LATENCY > 1) ) pipe_reg0 ( .clk (clk), .reset (reset), @@ -100,7 +101,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( .data_in ({is_itof, is_signed, frm, fclass, input_sign, input_exp, input_mant}), .data_out ({is_itof_s0, is_signed_s0, rnd_mode_s0, fclass_s0, input_sign_s0, fmt_exponent_s0, encoded_mant_s0}) ); - + // Normalization wire [LZC_RESULT_WIDTH-1:0] renorm_shamt_s0; // renormalization shift amount @@ -113,12 +114,12 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( .data_out (renorm_shamt_s0), .valid_out (mant_is_nonzero_s0) ); - + wire mant_is_zero_s0 = ~mant_is_nonzero_s0; - wire [S_MAN_WIDTH-1:0] input_mant_n_s0; // normalized input mantissa + wire [S_MAN_WIDTH-1:0] input_mant_n_s0; // normalized input mantissa wire [S_EXP_WIDTH-1:0] input_exp_n_s0; // unbiased true exponent - + // Realign input mantissa, append zeroes if destination is wider assign input_mant_n_s0 = encoded_mant_s0 << renorm_shamt_s0; @@ -140,7 +141,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( VX_pipe_register #( .DATAW (1 + `INST_FRM_BITS + 1 + $bits(fclass_t) + 1 + 1 + S_MAN_WIDTH + S_EXP_WIDTH), - .DEPTH (LATENCY > 1) + .DEPTH (LATENCY > 2) ) pipe_reg1 ( .clk (clk), .reset (reset), @@ -169,30 +170,30 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( wire of_before_round_s1 = overflow; // Pipeline stage2 - + wire is_itof_s2; wire is_signed_s2; wire [2:0] rnd_mode_s2; - fclass_t fclass_s2; + fclass_t fclass_s2; wire mant_is_zero_s2; wire input_sign_s2; wire [2*S_MAN_WIDTH:0] destination_mant_s2; wire [EXP_BITS-1:0] final_exp_s2; wire of_before_round_s2; - + VX_pipe_register #( .DATAW (1 + 1 + `INST_FRM_BITS + $bits(fclass_t) + 1 + 1 + (2*S_MAN_WIDTH+1) + EXP_BITS + 1), - .DEPTH (LATENCY > 3) + .DEPTH (LATENCY > 0) ) pipe_reg2 ( .clk (clk), .reset (reset), .enable (enable), .data_in ({is_itof_s1, is_signed_s1, rnd_mode_s1, fclass_s1, mant_is_zero_s1, input_sign_s1, destination_mant_s1, final_exp_s1, of_before_round_s1}), .data_out ({is_itof_s2, is_signed_s2, rnd_mode_s2, fclass_s2, mant_is_zero_s2, input_sign_s2, destination_mant_s2, final_exp_s2, of_before_round_s2}) - ); - + ); + // Rouding and classification - + wire [MAN_BITS-1:0] final_mant_s2; // mantissa after adjustments wire [INT_WIDTH-1:0] final_int_s2; // integer shifted in position wire [1:0] f2i_round_sticky_bits_s2, i2f_round_sticky_bits_s2; @@ -237,20 +238,20 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( wire is_itof_s3; wire is_signed_s3; - fclass_t fclass_s3; + fclass_t fclass_s3; wire mant_is_zero_s3; wire input_sign_s3; wire rounded_sign_s3; wire [INT_WIDTH-1:0] rounded_abs_s3; - wire of_before_round_s3; + wire of_before_round_s3; wire f2i_round_has_sticky_s3; wire i2f_round_has_sticky_s3; - `UNUSED_VAR (fclass_s3) + `UNUSED_VAR (fclass_s3) VX_pipe_register #( .DATAW (1 + 1 + $bits(fclass_t) + 1 + 1 + 32 + 1 + 1 + 1 + 1), - .DEPTH (LATENCY > 4) + .DEPTH (LATENCY > 3) ) pipe_reg3 ( .clk (clk), .reset (reset), @@ -258,7 +259,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( .data_in ({is_itof_s2, is_signed_s2, fclass_s2, mant_is_zero_s2, input_sign_s2, rounded_abs_s2, rounded_sign_s2, of_before_round_s2, f2i_round_has_sticky_s2, i2f_round_has_sticky_s2}), .data_out ({is_itof_s3, is_signed_s3, fclass_s3, mant_is_zero_s3, input_sign_s3, rounded_abs_s3, rounded_sign_s3, of_before_round_s3, f2i_round_has_sticky_s3, i2f_round_has_sticky_s3}) ); - + // Assemble regular result, nan box short ones. Int zeroes need to be detected wire [INT_WIDTH-1:0] fmt_result_s3 = mant_is_zero_s3 ? 0 : {rounded_sign_s3, rounded_abs_s3[EXP_BITS+MAN_BITS-1:0]}; @@ -278,18 +279,18 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( f2i_special_result_s3[INT_WIDTH-2:0] = 2**(INT_WIDTH-1) - 1; // alone yields 2**(31)-1 f2i_special_result_s3[INT_WIDTH-1] = ~is_signed_s3; // for unsigned casts yields 2**31 end - end + end // Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned) - wire f2i_result_is_special_s3 = fclass_s3.is_nan + wire f2i_result_is_special_s3 = fclass_s3.is_nan | fclass_s3.is_inf | of_before_round_s3 | (input_sign_s3 & ~is_signed_s3 & ~rounded_int_res_zero_s3); - + fflags_t f2i_special_status_s3; fflags_t i2f_status_s3, f2i_status_s3; fflags_t tmp_fflags_s3; - + // All integer special cases are invalid assign f2i_special_status_s3 = {1'b1, 4'h0}; @@ -306,7 +307,7 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #( VX_pipe_register #( .DATAW (32 + `FP_FLAGS_BITS), - .DEPTH (LATENCY > 0) + .DEPTH (OUT_REG) ) pipe_reg4 ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fncp_unit.sv b/hw/rtl/fpu/VX_fncp_unit.sv index a0876dcd7..27836fcbc 100644 --- a/hw/rtl/fpu/VX_fncp_unit.sv +++ b/hw/rtl/fpu/VX_fncp_unit.sv @@ -1,17 +1,17 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// Modified port of noncomp module from fpnew Libray +// Modified port of noncomp module from fpnew Libray // reference: https://github.com/pulp-platform/fpnew `include "VX_fpu_define.vh" @@ -19,9 +19,10 @@ `ifdef FPU_DSP module VX_fncp_unit import VX_fpu_pkg::*; #( - parameter LATENCY = 2, + parameter LATENCY = 1, parameter EXP_BITS = 8, - parameter MAN_BITS = 23 + parameter MAN_BITS = 23, + parameter OUT_REG = 0 ) ( input wire clk, input wire reset, @@ -33,10 +34,10 @@ module VX_fncp_unit import VX_fpu_pkg::*; #( input wire [31:0] dataa, input wire [31:0] datab, - output wire [31:0] result, + output wire [31:0] result, output wire [`FP_FLAGS_BITS-1:0] fflags -); +); localparam NEG_INF = 32'h00000001, NEG_NORM = 32'h00000002, NEG_SUBNORM = 32'h00000004, @@ -55,15 +56,15 @@ module VX_fncp_unit import VX_fpu_pkg::*; #( wire a_smaller, ab_equal; // Setup - assign a_sign = dataa[31]; + assign a_sign = dataa[31]; assign a_exponent = dataa[30:23]; assign a_mantissa = dataa[22:0]; - assign b_sign = datab[31]; + assign b_sign = datab[31]; assign b_exponent = datab[30:23]; assign b_mantissa = datab[22:0]; - VX_fp_classifier #( + VX_fp_classifier #( .EXP_BITS (EXP_BITS), .MAN_BITS (MAN_BITS) ) fp_class_a ( @@ -72,7 +73,7 @@ module VX_fncp_unit import VX_fpu_pkg::*; #( .clss_o (a_fclass) ); - VX_fp_classifier #( + VX_fp_classifier #( .EXP_BITS (EXP_BITS), .MAN_BITS (MAN_BITS) ) fp_class_b ( @@ -82,7 +83,7 @@ module VX_fncp_unit import VX_fpu_pkg::*; #( ); assign a_smaller = (dataa < datab) ^ (a_sign || b_sign); - assign ab_equal = (dataa == datab) + assign ab_equal = (dataa == datab) || (a_fclass.is_zero && b_fclass.is_zero); // +0 == -0 // Pipeline stage0 @@ -101,54 +102,54 @@ module VX_fncp_unit import VX_fpu_pkg::*; #( VX_pipe_register #( .DATAW (4 + 2 * 32 + 1 + 1 + 8 + 23 + 2 * $bits(fclass_t) + 1 + 1), - .DEPTH (LATENCY > 1) + .DEPTH (LATENCY > 0) ) pipe_reg0 ( .clk (clk), .reset (reset), .enable (enable), .data_in ({op_mod, dataa, datab, a_sign, b_sign, a_exponent, a_mantissa, a_fclass, b_fclass, a_smaller, ab_equal}), .data_out ({op_mod_s0, dataa_s0, datab_s0, a_sign_s0, b_sign_s0, a_exponent_s0, a_mantissa_s0, a_fclass_s0, b_fclass_s0, a_smaller_s0, ab_equal_s0}) - ); + ); // FCLASS reg [31:0] fclass_mask_s0; // generate a 10-bit mask for integer reg - always @(*) begin + always @(*) begin if (a_fclass_s0.is_normal) begin fclass_mask_s0 = a_sign_s0 ? NEG_NORM : POS_NORM; - end + end else if (a_fclass_s0.is_inf) begin fclass_mask_s0 = a_sign_s0 ? NEG_INF : POS_INF; - end + end else if (a_fclass_s0.is_zero) begin fclass_mask_s0 = a_sign_s0 ? NEG_ZERO : POS_ZERO; - end + end else if (a_fclass_s0.is_subnormal) begin fclass_mask_s0 = a_sign_s0 ? NEG_SUBNORM : POS_SUBNORM; - end + end else if (a_fclass_s0.is_nan) begin fclass_mask_s0 = {22'h0, a_fclass_s0.is_quiet, a_fclass_s0.is_signaling, 8'h0}; - end - else begin + end + else begin fclass_mask_s0 = QUT_NAN; end end - // Min/Max + // Min/Max reg [31:0] fminmax_res_s0; always @(*) begin if (a_fclass_s0.is_nan && b_fclass_s0.is_nan) fminmax_res_s0 = {1'b0, 8'hff, 1'b1, 22'd0}; // canonical qNaN - else if (a_fclass_s0.is_nan) + else if (a_fclass_s0.is_nan) fminmax_res_s0 = datab_s0; - else if (b_fclass_s0.is_nan) + else if (b_fclass_s0.is_nan) fminmax_res_s0 = dataa_s0; - else begin + else begin // FMIN, FMAX fminmax_res_s0 = (op_mod_s0[0] ^ a_smaller_s0) ? dataa_s0 : datab_s0; end end - // Sign injection + // Sign injection reg [31:0] fsgnj_res_s0; // result of sign injection always @(*) begin case (op_mod_s0[1:0]) @@ -158,12 +159,12 @@ module VX_fncp_unit import VX_fpu_pkg::*; #( endcase end - // Comparison + // Comparison reg fcmp_res_s0; // result of comparison reg fcmp_fflags_NV_s0; // comparison fflags always @(*) begin case (op_mod_s0[1:0]) - 0: begin // LE + 0: begin // LE if (a_fclass_s0.is_nan || b_fclass_s0.is_nan) begin fcmp_res_s0 = 0; fcmp_fflags_NV_s0 = 1; @@ -179,12 +180,12 @@ module VX_fncp_unit import VX_fpu_pkg::*; #( end else begin fcmp_res_s0 = (a_smaller_s0 & ~ab_equal_s0); fcmp_fflags_NV_s0 = 0; - end + end end 2: begin // EQ if (a_fclass_s0.is_nan || b_fclass_s0.is_nan) begin fcmp_res_s0 = 0; - fcmp_fflags_NV_s0 = a_fclass_s0.is_signaling | b_fclass_s0.is_signaling; + fcmp_fflags_NV_s0 = a_fclass_s0.is_signaling | b_fclass_s0.is_signaling; end else begin fcmp_res_s0 = ab_equal_s0; fcmp_fflags_NV_s0 = 0; @@ -192,7 +193,7 @@ module VX_fncp_unit import VX_fpu_pkg::*; #( end default: begin fcmp_res_s0 = 'x; - fcmp_fflags_NV_s0 = 'x; + fcmp_fflags_NV_s0 = 'x; end endcase end @@ -216,7 +217,7 @@ module VX_fncp_unit import VX_fpu_pkg::*; #( // FMV result_s0 = dataa_s0; fflags_NV_s0 = 0; - end + end 6,7: begin // MIN/MAX result_s0 = fminmax_res_s0; @@ -229,7 +230,7 @@ module VX_fncp_unit import VX_fpu_pkg::*; #( VX_pipe_register #( .DATAW (32 + 1), - .DEPTH (LATENCY > 0) + .DEPTH (OUT_REG) ) pipe_reg1 ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fpu/VX_fpu_cvt.sv b/hw/rtl/fpu/VX_fpu_cvt.sv index 37a2ab419..2d0d52753 100644 --- a/hw/rtl/fpu/VX_fpu_cvt.sv +++ b/hw/rtl/fpu/VX_fpu_cvt.sv @@ -46,56 +46,68 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #( input wire ready_out, output wire valid_out ); - `UNUSED_VAR (frm) + localparam DATAW = 32 + `INST_FRM_BITS + 1 + 1; + + wire [NUM_LANES-1:0][DATAW-1:0] data_in; wire [NUM_LANES-1:0] mask_out; wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out; fflags_t [NUM_LANES-1:0] fflags_out; wire pe_enable; - wire [NUM_PES-1:0][31:0] pe_data_in; + wire [NUM_PES-1:0][DATAW-1:0] pe_data_in; wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out; + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_data_in + assign data_in[i][0 +: 32] = dataa[i]; + assign data_in[i][32 +: `INST_FRM_BITS] = frm; + assign data_in[i][32 + `INST_FRM_BITS +: 1] = is_itof; + assign data_in[i][32 + `INST_FRM_BITS + 1 +: 1] = is_signed; + end + VX_pe_serializer #( .NUM_LANES (NUM_LANES), .NUM_PES (NUM_PES), .LATENCY (`LATENCY_FCVT), - .DATA_IN_WIDTH(32), - .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), + .DATA_IN_WIDTH (DATAW), + .DATA_OUT_WIDTH (`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), - .OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0) + .OUT_BUF (2) ) pe_serializer ( .clk (clk), .reset (reset), .valid_in (valid_in), - .data_in (dataa), + .data_in (data_in), .tag_in ({mask_in, tag_in}), .ready_in (ready_in), .pe_enable (pe_enable), - .pe_data_in (pe_data_in), - .pe_data_out(pe_data_out), + .pe_data_out(pe_data_in), + .pe_data_in (pe_data_out), .valid_out (valid_out), .data_out (data_out), .tag_out ({mask_out, tag_out}), .ready_out (ready_out) ); - for (genvar i = 0; i < NUM_LANES; ++i) begin + `UNUSED_VAR (pe_data_in) + + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_result assign result[i] = data_out[i][0 +: 32]; assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS]; end - for (genvar i = 0; i < NUM_PES; ++i) begin + for (genvar i = 0; i < NUM_PES; ++i) begin : g_fcvt_units VX_fcvt_unit #( - .LATENCY (`LATENCY_FCVT) + .LATENCY (`LATENCY_FCVT), + .OUT_REG (1) ) fcvt_unit ( .clk (clk), .reset (reset), .enable (pe_enable), - .frm (frm), - .is_itof (is_itof), - .is_signed (is_signed), + .frm (pe_data_in[0][32 +: `INST_FRM_BITS]), + .is_itof (pe_data_in[0][32 + `INST_FRM_BITS +: 1]), + .is_signed (pe_data_in[0][32 + `INST_FRM_BITS + 1 +: 1]), .dataa (pe_data_in[i][0 +: 32]), .result (pe_data_out[i][0 +: 32]), .fflags (pe_data_out[i][32 +: `FP_FLAGS_BITS]) diff --git a/hw/rtl/fpu/VX_fpu_div.sv b/hw/rtl/fpu/VX_fpu_div.sv index 81fc8f022..2238307a6 100644 --- a/hw/rtl/fpu/VX_fpu_div.sv +++ b/hw/rtl/fpu/VX_fpu_div.sv @@ -44,31 +44,33 @@ module VX_fpu_div import VX_fpu_pkg::*; #( output wire valid_out, input wire ready_out ); - `UNUSED_VAR (frm) + localparam DATAW = 2 * 32 + `INST_FRM_BITS; + + wire [NUM_LANES-1:0][DATAW-1:0] data_in; - wire [NUM_LANES-1:0][2*32-1:0] data_in; wire [NUM_LANES-1:0] mask_out; wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out; wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out; wire pe_enable; - wire [NUM_PES-1:0][2*32-1:0] pe_data_in; + wire [NUM_PES-1:0][DATAW-1:0] pe_data_in; wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_data_in assign data_in[i][0 +: 32] = dataa[i]; assign data_in[i][32 +: 32] = datab[i]; + assign data_in[i][64 +: `INST_FRM_BITS] = frm; end VX_pe_serializer #( .NUM_LANES (NUM_LANES), .NUM_PES (NUM_PES), .LATENCY (`LATENCY_FDIV), - .DATA_IN_WIDTH(2*32), - .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), + .DATA_IN_WIDTH (DATAW), + .DATA_OUT_WIDTH (`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), - .OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0) + .OUT_BUF (2) ) pe_serializer ( .clk (clk), .reset (reset), @@ -77,15 +79,17 @@ module VX_fpu_div import VX_fpu_pkg::*; #( .tag_in ({mask_in, tag_in}), .ready_in (ready_in), .pe_enable (pe_enable), - .pe_data_in (pe_data_in), - .pe_data_out(pe_data_out), + .pe_data_out(pe_data_in), + .pe_data_in (pe_data_out), .valid_out (valid_out), .data_out (data_out), .tag_out ({mask_out, tag_out}), .ready_out (ready_out) ); - for (genvar i = 0; i < NUM_LANES; ++i) begin + `UNUSED_VAR (pe_data_in) + + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_result assign result[i] = data_out[i][0 +: 32]; assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS]; end @@ -94,7 +98,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #( `ifdef QUARTUS - for (genvar i = 0; i < NUM_PES; ++i) begin + for (genvar i = 0; i < NUM_PES; ++i) begin : g_fdivs acl_fdiv fdiv ( .clk (clk), .areset (1'b0), @@ -112,7 +116,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #( `elsif VIVADO - for (genvar i = 0; i < NUM_PES; ++i) begin + for (genvar i = 0; i < NUM_PES; ++i) begin : g_fdivs wire [3:0] tuser; xil_fdiv fdiv ( .aclk (clk), @@ -134,7 +138,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #( `else - for (genvar i = 0; i < NUM_PES; ++i) begin + for (genvar i = 0; i < NUM_PES; ++i) begin : g_fdivs reg [63:0] r; `UNUSED_VAR (r) fflags_t f; @@ -143,9 +147,9 @@ module VX_fpu_div import VX_fpu_pkg::*; #( dpi_fdiv ( pe_enable, int'(0), - {32'hffffffff, pe_data_in[i][0 +: 32]}, - {32'hffffffff, pe_data_in[i][32 +: 32]}, - frm, + {32'hffffffff, pe_data_in[i][0 +: 32]}, // a + {32'hffffffff, pe_data_in[i][32 +: 32]}, // b + pe_data_in[0][64 +: `INST_FRM_BITS], // frm r, f ); diff --git a/hw/rtl/fpu/VX_fpu_dpi.sv b/hw/rtl/fpu/VX_fpu_dpi.sv index 781b5b88e..e900e105c 100644 --- a/hw/rtl/fpu/VX_fpu_dpi.sv +++ b/hw/rtl/fpu/VX_fpu_dpi.sv @@ -76,7 +76,6 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( reg is_fadd, is_fsub, is_fmul, is_fmadd, is_fmsub, is_fnmadd, is_fnmsub; reg is_div, is_fcmp, is_itof, is_utof, is_ftoi, is_ftou, is_f2f; - reg dst_fmt, int_fmt; reg [NUM_LANES-1:0][63:0] operands [3]; @@ -88,7 +87,8 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( end end - `UNUSED_VAR (fmt) + wire f_fmt = fmt[0]; + wire i_fmt = fmt[1]; always @(*) begin is_fadd = 0; @@ -106,25 +106,11 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( is_ftou = 0; is_f2f = 0; - dst_fmt = 0; - int_fmt = 0; - - `ifdef FLEN_64 - dst_fmt = fmt[0]; - `endif - - `ifdef XLEN_64 - int_fmt = fmt[1]; - `endif - case (op_type) - `INST_FPU_ADD: begin core_select = FPU_FMA; is_fadd = 1; end - `INST_FPU_SUB: begin core_select = FPU_FMA; is_fsub = 1; end + `INST_FPU_ADD: begin core_select = FPU_FMA; is_fadd = ~i_fmt; is_fsub = i_fmt; end + `INST_FPU_MADD: begin core_select = FPU_FMA; is_fmadd = ~i_fmt; is_fmsub = i_fmt; end + `INST_FPU_NMADD: begin core_select = FPU_FMA; is_fnmadd = ~i_fmt; is_fnmsub = i_fmt; end `INST_FPU_MUL: begin core_select = FPU_FMA; is_fmul = 1; end - `INST_FPU_MADD: begin core_select = FPU_FMA; is_fmadd = 1; end - `INST_FPU_MSUB: begin core_select = FPU_FMA; is_fmsub = 1; end - `INST_FPU_NMADD: begin core_select = FPU_FMA; is_fnmadd = 1; end - `INST_FPU_NMSUB: begin core_select = FPU_FMA; is_fnmsub = 1; end `INST_FPU_DIV: begin core_select = FPU_DIVSQRT; is_div = 1; end `INST_FPU_SQRT: begin core_select = FPU_DIVSQRT; end `INST_FPU_CMP: begin core_select = FPU_NCP; is_fcmp = 1; end @@ -138,7 +124,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( end generate - begin : fma + begin : g_fma reg [NUM_LANES-1:0][`XLEN-1:0] result_fma; reg [NUM_LANES-1:0][63:0] result_fadd; @@ -164,13 +150,13 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( always @(*) begin for (integer i = 0; i < NUM_LANES; ++i) begin - dpi_fadd (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fadd[i], fflags_fadd[i]); - dpi_fsub (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fsub[i], fflags_fsub[i]); - dpi_fmul (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fmul[i], fflags_fmul[i]); - dpi_fmadd (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fmadd[i], fflags_fmadd[i]); - dpi_fmsub (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fmsub[i], fflags_fmsub[i]); - dpi_fnmadd (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fnmadd[i], fflags_fnmadd[i]); - dpi_fnmsub (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fnmsub[i], fflags_fnmsub[i]); + dpi_fadd (fma_fire, int'(f_fmt), operands[0][i], operands[1][i], frm, result_fadd[i], fflags_fadd[i]); + dpi_fsub (fma_fire, int'(f_fmt), operands[0][i], operands[1][i], frm, result_fsub[i], fflags_fsub[i]); + dpi_fmul (fma_fire, int'(f_fmt), operands[0][i], operands[1][i], frm, result_fmul[i], fflags_fmul[i]); + dpi_fmadd (fma_fire, int'(f_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fmadd[i], fflags_fmadd[i]); + dpi_fmsub (fma_fire, int'(f_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fmsub[i], fflags_fmsub[i]); + dpi_fnmadd (fma_fire, int'(f_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fnmadd[i], fflags_fnmadd[i]); + dpi_fnmsub (fma_fire, int'(f_fmt), operands[0][i], operands[1][i], operands[2][i], frm, result_fnmsub[i], fflags_fnmsub[i]); result_fma[i] = is_fadd ? result_fadd[i][`XLEN-1:0] : is_fsub ? result_fsub[i][`XLEN-1:0] : @@ -214,7 +200,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( endgenerate generate - begin : fdiv + begin : g_fdiv reg [NUM_LANES-1:0][`XLEN-1:0] result_fdiv_r; reg [NUM_LANES-1:0][63:0] result_fdiv; @@ -226,7 +212,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( always @(*) begin for (integer i = 0; i < NUM_LANES; ++i) begin - dpi_fdiv (fdiv_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fdiv[i], fflags_fdiv[i]); + dpi_fdiv (fdiv_fire, int'(f_fmt), operands[0][i], operands[1][i], frm, result_fdiv[i], fflags_fdiv[i]); result_fdiv_r[i] = result_fdiv[i][`XLEN-1:0]; end end @@ -253,7 +239,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( endgenerate generate - begin : fsqrt + begin : g_fsqrt reg [NUM_LANES-1:0][`XLEN-1:0] result_fsqrt_r; reg [NUM_LANES-1:0][63:0] result_fsqrt; @@ -265,7 +251,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( always @(*) begin for (integer i = 0; i < NUM_LANES; ++i) begin - dpi_fsqrt (fsqrt_fire, int'(dst_fmt), operands[0][i], frm, result_fsqrt[i], fflags_fsqrt[i]); + dpi_fsqrt (fsqrt_fire, int'(f_fmt), operands[0][i], frm, result_fsqrt[i], fflags_fsqrt[i]); result_fsqrt_r[i] = result_fsqrt[i][`XLEN-1:0]; end end @@ -292,7 +278,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( endgenerate generate - begin : fcvt + begin : g_fcvt reg [NUM_LANES-1:0][`XLEN-1:0] result_fcvt; reg [NUM_LANES-1:0][63:0] result_itof; @@ -313,11 +299,11 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( always @(*) begin for (integer i = 0; i < NUM_LANES; ++i) begin - dpi_itof (fcvt_fire, int'(dst_fmt), int'(int_fmt), operands[0][i], frm, result_itof[i], fflags_itof[i]); - dpi_utof (fcvt_fire, int'(dst_fmt), int'(int_fmt), operands[0][i], frm, result_utof[i], fflags_utof[i]); - dpi_ftoi (fcvt_fire, int'(int_fmt), int'(dst_fmt), operands[0][i], frm, result_ftoi[i], fflags_ftoi[i]); - dpi_ftou (fcvt_fire, int'(int_fmt), int'(dst_fmt), operands[0][i], frm, result_ftou[i], fflags_ftou[i]); - dpi_f2f (fcvt_fire, int'(dst_fmt), operands[0][i], result_f2f[i]); + dpi_itof (fcvt_fire, int'(f_fmt), int'(i_fmt), operands[0][i], frm, result_itof[i], fflags_itof[i]); + dpi_utof (fcvt_fire, int'(f_fmt), int'(i_fmt), operands[0][i], frm, result_utof[i], fflags_utof[i]); + dpi_ftoi (fcvt_fire, int'(i_fmt), int'(f_fmt), operands[0][i], frm, result_ftoi[i], fflags_ftoi[i]); + dpi_ftou (fcvt_fire, int'(i_fmt), int'(f_fmt), operands[0][i], frm, result_ftou[i], fflags_ftou[i]); + dpi_f2f (fcvt_fire, int'(f_fmt), operands[0][i], result_f2f[i]); result_fcvt[i] = is_itof ? result_itof[i][`XLEN-1:0] : is_utof ? result_utof[i][`XLEN-1:0] : @@ -356,7 +342,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( endgenerate generate - begin : fncp + begin : g_fncp reg [NUM_LANES-1:0][`XLEN-1:0] result_fncp; reg [NUM_LANES-1:0][63:0] result_fclss; @@ -384,17 +370,17 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( always @(*) begin for (integer i = 0; i < NUM_LANES; ++i) begin - dpi_fclss (fncp_fire, int'(dst_fmt), operands[0][i], result_fclss[i]); - dpi_fle (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fle[i], fflags_fle[i]); - dpi_flt (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_flt[i], fflags_flt[i]); - dpi_feq (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_feq[i], fflags_feq[i]); - dpi_fmin (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fmin[i], fflags_fmin[i]); - dpi_fmax (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fmax[i], fflags_fmax[i]); - dpi_fsgnj (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fsgnj[i]); - dpi_fsgnjn (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fsgnjn[i]); - dpi_fsgnjx (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fsgnjx[i]); - result_fmvx[i] = dst_fmt ? operands[0][i] : 64'($signed(operands[0][i][31:0])); // sign-extension - result_fmvf[i] = dst_fmt ? operands[0][i] : (operands[0][i] | 64'hffffffff00000000); // nan-boxing + dpi_fclss (fncp_fire, int'(f_fmt), operands[0][i], result_fclss[i]); + dpi_fle (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_fle[i], fflags_fle[i]); + dpi_flt (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_flt[i], fflags_flt[i]); + dpi_feq (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_feq[i], fflags_feq[i]); + dpi_fmin (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_fmin[i], fflags_fmin[i]); + dpi_fmax (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_fmax[i], fflags_fmax[i]); + dpi_fsgnj (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_fsgnj[i]); + dpi_fsgnjn (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_fsgnjn[i]); + dpi_fsgnjx (fncp_fire, int'(f_fmt), operands[0][i], operands[1][i], result_fsgnjx[i]); + result_fmvx[i] = f_fmt ? operands[0][i] : 64'($signed(operands[0][i][31:0])); // sign-extension + result_fmvf[i] = f_fmt ? operands[0][i] : (operands[0][i] | 64'hffffffff00000000); // nan-boxing end end @@ -444,7 +430,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( VX_stream_arb #( .NUM_INPUTS (2), .DATAW (RSP_DATAW), - .ARBITER ("R"), + .ARBITER ("P"), .OUT_BUF (0) ) div_sqrt_arb ( .clk (clk), @@ -463,14 +449,14 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #( wire [NUM_FPC-1:0][RSP_DATAW-1:0] per_core_data_out; - for (genvar i = 0; i < NUM_FPC; ++i) begin + for (genvar i = 0; i < NUM_FPC; ++i) begin : g_per_core_data_out assign per_core_data_out[i] = {per_core_result[i], per_core_has_fflags[i], per_core_fflags[i], per_core_tag_out[i]}; end VX_stream_arb #( .NUM_INPUTS (NUM_FPC), .DATAW (RSP_DATAW), - .ARBITER ("F"), + .ARBITER ("R"), .OUT_BUF (OUT_BUF) ) rsp_arb ( .clk (clk), diff --git a/hw/rtl/fpu/VX_fpu_dsp.sv b/hw/rtl/fpu/VX_fpu_dsp.sv index ad398dcd7..af75c8a75 100644 --- a/hw/rtl/fpu/VX_fpu_dsp.sv +++ b/hw/rtl/fpu/VX_fpu_dsp.sv @@ -51,68 +51,39 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( localparam FPU_DIVSQRT = 1; localparam FPU_CVT = 2; localparam FPU_NCP = 3; - localparam NUM_FPC = 4; - localparam FPC_BITS = `LOG2UP(NUM_FPC); + localparam NUM_FPCORES = 4; + localparam FPCORES_BITS = `LOG2UP(NUM_FPCORES); + localparam REQ_DATAW = NUM_LANES + TAG_WIDTH + `INST_FPU_BITS + `INST_FMT_BITS + `INST_FRM_BITS + 3 * (NUM_LANES * 32); localparam RSP_DATAW = (NUM_LANES * 32) + 1 + $bits(fflags_t) + TAG_WIDTH; `UNUSED_VAR (fmt) - wire [NUM_FPC-1:0] per_core_ready_in; - wire [NUM_FPC-1:0][NUM_LANES-1:0][31:0] per_core_result; - wire [NUM_FPC-1:0][TAG_WIDTH-1:0] per_core_tag_out; - wire [NUM_FPC-1:0] per_core_ready_out; - wire [NUM_FPC-1:0] per_core_valid_out; - wire [NUM_FPC-1:0] per_core_has_fflags; - fflags_t [NUM_FPC-1:0] per_core_fflags; - - wire div_ready_in, sqrt_ready_in; - wire [NUM_LANES-1:0][31:0] div_result, sqrt_result; - wire [TAG_WIDTH-1:0] div_tag_out, sqrt_tag_out; - wire div_ready_out, sqrt_ready_out; - wire div_valid_out, sqrt_valid_out; - wire div_has_fflags, sqrt_has_fflags; - fflags_t div_fflags, sqrt_fflags; - - reg [FPC_BITS-1:0] core_select; - reg is_madd, is_sub, is_neg, is_div, is_itof, is_signed; - - always @(*) begin - is_madd = 0; - is_sub = 0; - is_neg = 0; - is_div = 0; - is_itof = 0; - is_signed = 0; - case (op_type) - `INST_FPU_ADD: begin core_select = FPU_FMA; end - `INST_FPU_SUB: begin core_select = FPU_FMA; is_sub = 1; end - `INST_FPU_MUL: begin core_select = FPU_FMA; is_neg = 1; end - `INST_FPU_MADD: begin core_select = FPU_FMA; is_madd = 1; end - `INST_FPU_MSUB: begin core_select = FPU_FMA; is_madd = 1; is_sub = 1; end - `INST_FPU_NMADD: begin core_select = FPU_FMA; is_madd = 1; is_neg = 1; end - `INST_FPU_NMSUB: begin core_select = FPU_FMA; is_madd = 1; is_sub = 1; is_neg = 1; end - `INST_FPU_DIV: begin core_select = FPU_DIVSQRT; is_div = 1; end - `INST_FPU_SQRT: begin core_select = FPU_DIVSQRT; end - `INST_FPU_F2I: begin core_select = FPU_CVT; is_signed = 1; end - `INST_FPU_F2U: begin core_select = FPU_CVT; end - `INST_FPU_I2F: begin core_select = FPU_CVT; is_itof = 1; is_signed = 1; end - `INST_FPU_U2F: begin core_select = FPU_CVT; is_itof = 1; end - default: begin core_select = FPU_NCP; end - endcase - end - - `RESET_RELAY (fma_reset, reset); - `RESET_RELAY (div_reset, reset); - `RESET_RELAY (sqrt_reset, reset); - `RESET_RELAY (cvt_reset, reset); - `RESET_RELAY (ncp_reset, reset); + wire [NUM_FPCORES-1:0] per_core_valid_in; + wire [NUM_FPCORES-1:0][REQ_DATAW-1:0] per_core_data_in; + wire [NUM_FPCORES-1:0] per_core_ready_in; + + wire [NUM_FPCORES-1:0][NUM_LANES-1:0] per_core_mask_in; + wire [NUM_FPCORES-1:0][TAG_WIDTH-1:0] per_core_tag_in; + wire [NUM_FPCORES-1:0][`INST_FPU_BITS-1:0] per_core_op_type; + wire [NUM_FPCORES-1:0][`INST_FMT_BITS-1:0] per_core_fmt; + wire [NUM_FPCORES-1:0][`INST_FRM_BITS-1:0] per_core_frm; + wire [NUM_FPCORES-1:0][NUM_LANES-1:0][31:0] per_core_dataa; + wire [NUM_FPCORES-1:0][NUM_LANES-1:0][31:0] per_core_datab; + wire [NUM_FPCORES-1:0][NUM_LANES-1:0][31:0] per_core_datac; + + wire [NUM_FPCORES-1:0] per_core_valid_out; + wire [NUM_FPCORES-1:0][NUM_LANES-1:0][31:0] per_core_result; + wire [NUM_FPCORES-1:0][TAG_WIDTH-1:0] per_core_tag_out; + wire [NUM_FPCORES-1:0] per_core_has_fflags; + fflags_t [NUM_FPCORES-1:0] per_core_fflags; + wire [NUM_FPCORES-1:0] per_core_ready_out; wire [NUM_LANES-1:0][31:0] dataa_s; wire [NUM_LANES-1:0][31:0] datab_s; wire [NUM_LANES-1:0][31:0] datac_s; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_data assign dataa_s[i] = dataa[i][31:0]; assign datab_s[i] = datab[i][31:0]; assign datac_s[i] = datac[i][31:0]; @@ -122,23 +93,60 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( `UNUSED_VAR (datab) `UNUSED_VAR (datac) + // Decode fpu core type + wire [FPCORES_BITS-1:0] core_select = op_type[3:2]; + + VX_stream_switch #( + .DATAW (REQ_DATAW), + .NUM_OUTPUTS (NUM_FPCORES) + ) req_switch ( + .clk (clk), + .reset (reset), + .sel_in (core_select), + .valid_in (valid_in), + .ready_in (ready_in), + .data_in ({mask_in, tag_in, fmt, frm, dataa_s, datab_s, datac_s, op_type}), + .data_out (per_core_data_in), + .valid_out (per_core_valid_in), + .ready_out (per_core_ready_in) + ); + + for (genvar i = 0; i < NUM_FPCORES; ++i) begin : g_per_core_data_in + assign { + per_core_mask_in[i], + per_core_tag_in[i], + per_core_fmt[i], + per_core_frm[i], + per_core_dataa[i], + per_core_datab[i], + per_core_datac[i], + per_core_op_type[i] + } = per_core_data_in[i]; + end + + // FMA core /////////////////////////////////////////////////////////////// + + wire is_madd = per_core_op_type[FPU_FMA][1]; + wire is_neg = per_core_op_type[FPU_FMA][0]; + wire is_sub = per_core_fmt[FPU_FMA][1]; + VX_fpu_fma #( .NUM_LANES (NUM_LANES), .TAG_WIDTH (TAG_WIDTH) ) fpu_fma ( .clk (clk), - .reset (fma_reset), - .valid_in (valid_in && (core_select == FPU_FMA)), + .reset (reset), + .valid_in (per_core_valid_in[FPU_FMA]), .ready_in (per_core_ready_in[FPU_FMA]), - .mask_in (mask_in), - .tag_in (tag_in), - .frm (frm), + .mask_in (per_core_mask_in[FPU_FMA]), + .tag_in (per_core_tag_in[FPU_FMA]), + .frm (per_core_frm[FPU_FMA]), .is_madd (is_madd), .is_sub (is_sub), .is_neg (is_neg), - .dataa (dataa_s), - .datab (datab_s), - .datac (datac_s), + .dataa (per_core_dataa[FPU_FMA]), + .datab (per_core_datab[FPU_FMA]), + .datac (per_core_datac[FPU_FMA]), .has_fflags (per_core_has_fflags[FPU_FMA]), .fflags (per_core_fflags[FPU_FMA]), .result (per_core_result[FPU_FMA]), @@ -147,25 +155,99 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( .valid_out (per_core_valid_out[FPU_FMA]) ); + // Div/Sqrt cores ///////////////////////////////////////////////////////// + + wire [1:0] div_sqrt_valid_in; + wire [1:0][REQ_DATAW-1:0] div_sqrt_data_in; + wire [1:0] div_sqrt_ready_in; + + wire [1:0][NUM_LANES-1:0] div_sqrt_mask_in; + wire [1:0][TAG_WIDTH-1:0] div_sqrt_tag_in; + wire [1:0][`INST_FPU_BITS-1:0] div_sqrt_op_type; + wire [1:0][`INST_FMT_BITS-1:0] div_sqrt_fmt; + wire [1:0][`INST_FRM_BITS-1:0] div_sqrt_frm; + wire [1:0][NUM_LANES-1:0][31:0] div_sqrt_dataa; + wire [1:0][NUM_LANES-1:0][31:0] div_sqrt_datab; + wire [1:0][NUM_LANES-1:0][31:0] div_sqrt_datac; + + wire [1:0] div_sqrt_valid_out; + wire [1:0][NUM_LANES-1:0][31:0] div_sqrt_result; + wire [1:0][TAG_WIDTH-1:0] div_sqrt_tag_out; + wire [1:0] div_sqrt_has_fflags; + fflags_t [1:0] div_sqrt_fflags; + wire [1:0] div_sqrt_ready_out; + + wire div_sqrt_valid_tmp_in; + wire [REQ_DATAW-1:0] div_sqrt_data_tmp_in; + wire div_sqrt_ready_tmp_in; + + VX_elastic_buffer #( + .DATAW (REQ_DATAW) + ) div_sqrt_req_buffer ( + .clk (clk), + .reset (reset), + .valid_in (per_core_valid_in[FPU_DIVSQRT]), + .ready_in (per_core_ready_in[FPU_DIVSQRT]), + .data_in (per_core_data_in[FPU_DIVSQRT]), + .data_out (div_sqrt_data_tmp_in), + .valid_out (div_sqrt_valid_tmp_in), + .ready_out (div_sqrt_ready_tmp_in) + ); + + wire is_sqrt = div_sqrt_data_tmp_in[0]; // op_type[0] + + VX_stream_switch #( + .DATAW (REQ_DATAW), + .NUM_OUTPUTS (2) + ) div_sqrt_req_switch ( + .clk (clk), + .reset (reset), + .sel_in (is_sqrt), + .valid_in (div_sqrt_valid_tmp_in), + .ready_in (div_sqrt_ready_tmp_in), + .data_in (div_sqrt_data_tmp_in), + .data_out (div_sqrt_data_in), + .valid_out (div_sqrt_valid_in), + .ready_out (div_sqrt_ready_in) + ); + + for (genvar i = 0; i < 2; ++i) begin : g_div_sqrt_data_in + assign { + div_sqrt_mask_in[i], + div_sqrt_tag_in[i], + div_sqrt_fmt[i], + div_sqrt_frm[i], + div_sqrt_dataa[i], + div_sqrt_datab[i], + div_sqrt_datac[i], + div_sqrt_op_type[i] + } = div_sqrt_data_in[i]; + end + + `UNUSED_VAR (div_sqrt_op_type) + `UNUSED_VAR (div_sqrt_fmt) + `UNUSED_VAR (div_sqrt_datab) + `UNUSED_VAR (div_sqrt_datac) + VX_fpu_div #( .NUM_LANES (NUM_LANES), .TAG_WIDTH (TAG_WIDTH) ) fpu_div ( .clk (clk), - .reset (div_reset), - .valid_in (valid_in && (core_select == FPU_DIVSQRT) && is_div), - .ready_in (div_ready_in), - .mask_in (mask_in), - .tag_in (tag_in), - .frm (frm), - .dataa (dataa_s), - .datab (datab_s), - .has_fflags (div_has_fflags), - .fflags (div_fflags), - .result (div_result), - .tag_out (div_tag_out), - .valid_out (div_valid_out), - .ready_out (div_ready_out) + .reset (reset), + .valid_in (div_sqrt_valid_in[0]), + .ready_in (div_sqrt_ready_in[0]), + .mask_in (div_sqrt_mask_in[0]), + .tag_in (div_sqrt_tag_in[0]), + .frm (div_sqrt_frm[0]), + .dataa (div_sqrt_dataa[0]), + .datab (div_sqrt_datab[0]), + .has_fflags (div_sqrt_has_fflags[0]), + .fflags (div_sqrt_fflags[0]), + .result (div_sqrt_result[0]), + .tag_out (div_sqrt_tag_out[0]), + .valid_out (div_sqrt_valid_out[0]), + .ready_out (div_sqrt_ready_out[0]) ); VX_fpu_sqrt #( @@ -173,38 +255,74 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( .TAG_WIDTH (TAG_WIDTH) ) fpu_sqrt ( .clk (clk), - .reset (sqrt_reset), - .valid_in (valid_in && (core_select == FPU_DIVSQRT) && ~is_div), - .ready_in (sqrt_ready_in), - .mask_in (mask_in), - .tag_in (tag_in), - .frm (frm), - .dataa (dataa_s), - .has_fflags (sqrt_has_fflags), - .fflags (sqrt_fflags), - .result (sqrt_result), - .tag_out (sqrt_tag_out), - .valid_out (sqrt_valid_out), - .ready_out (sqrt_ready_out) + .reset (reset), + .valid_in (div_sqrt_valid_in[1]), + .ready_in (div_sqrt_ready_in[1]), + .mask_in (div_sqrt_mask_in[1]), + .tag_in (div_sqrt_tag_in[1]), + .frm (div_sqrt_frm[1]), + .dataa (div_sqrt_dataa[1]), + .has_fflags (div_sqrt_has_fflags[1]), + .fflags (div_sqrt_fflags[1]), + .result (div_sqrt_result[1]), + .tag_out (div_sqrt_tag_out[1]), + .valid_out (div_sqrt_valid_out[1]), + .ready_out (div_sqrt_ready_out[1]) ); + wire [1:0][RSP_DATAW-1:0] div_sqrt_arb_data_in; + for (genvar i = 0; i < 2; ++i) begin : g_div_sqrt_arb_data_in + assign div_sqrt_arb_data_in[i] = { + div_sqrt_result[i], + div_sqrt_has_fflags[i], + div_sqrt_fflags[i], + div_sqrt_tag_out[i] + }; + end + + VX_stream_arb #( + .NUM_INPUTS (2), + .DATAW (RSP_DATAW), + .ARBITER ("P"), + .OUT_BUF (0) + ) div_sqrt_rsp_arb ( + .clk (clk), + .reset (reset), + .valid_in (div_sqrt_valid_out), + .ready_in (div_sqrt_ready_out), + .data_in (div_sqrt_arb_data_in), + .data_out ({ + per_core_result[FPU_DIVSQRT], + per_core_has_fflags[FPU_DIVSQRT], + per_core_fflags[FPU_DIVSQRT], + per_core_tag_out[FPU_DIVSQRT] + }), + .valid_out (per_core_valid_out[FPU_DIVSQRT]), + .ready_out (per_core_ready_out[FPU_DIVSQRT]), + `UNUSED_PIN (sel_out) + ); + + // CVT core /////////////////////////////////////////////////////////////// + + wire is_itof = per_core_op_type[FPU_CVT][1]; + wire is_signed = ~per_core_op_type[FPU_CVT][0]; wire cvt_ret_int_in = ~is_itof; wire cvt_ret_int_out; VX_fpu_cvt #( .NUM_LANES (NUM_LANES), - .TAG_WIDTH (TAG_WIDTH+1) + .TAG_WIDTH (1+TAG_WIDTH) ) fpu_cvt ( .clk (clk), - .reset (cvt_reset), - .valid_in (valid_in && (core_select == FPU_CVT)), + .reset (reset), + .valid_in (per_core_valid_in[FPU_CVT]), .ready_in (per_core_ready_in[FPU_CVT]), - .mask_in (mask_in), - .tag_in ({cvt_ret_int_in, tag_in}), - .frm (frm), + .mask_in (per_core_mask_in[FPU_CVT]), + .tag_in ({cvt_ret_int_in, per_core_tag_in[FPU_CVT]}), + .frm (per_core_frm[FPU_CVT]), .is_itof (is_itof), .is_signed (is_signed), - .dataa (dataa_s), + .dataa (per_core_dataa[FPU_CVT]), .has_fflags (per_core_has_fflags[FPU_CVT]), .fflags (per_core_fflags[FPU_CVT]), .result (per_core_result[FPU_CVT]), @@ -213,12 +331,14 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( .ready_out (per_core_ready_out[FPU_CVT]) ); - wire ncp_ret_int_in = (op_type == `INST_FPU_CMP) - || `INST_FPU_IS_CLASS(op_type, frm) - || `INST_FPU_IS_MVXW(op_type, frm); + // NCP core /////////////////////////////////////////////////////////////// + + wire ncp_ret_int_in = (per_core_op_type[FPU_NCP] == `INST_FPU_CMP) + || `INST_FPU_IS_CLASS(per_core_op_type[FPU_NCP], per_core_frm[FPU_NCP]) + || `INST_FPU_IS_MVXW(per_core_op_type[FPU_NCP], per_core_frm[FPU_NCP]); wire ncp_ret_int_out; - wire ncp_ret_sext_in = `INST_FPU_IS_MVXW(op_type, frm); + wire ncp_ret_sext_in = `INST_FPU_IS_MVXW(per_core_op_type[FPU_NCP], per_core_frm[FPU_NCP]); wire ncp_ret_sext_out; VX_fpu_ncp #( @@ -226,15 +346,15 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( .TAG_WIDTH (TAG_WIDTH+2) ) fpu_ncp ( .clk (clk), - .reset (ncp_reset), - .valid_in (valid_in && (core_select == FPU_NCP)), + .reset (reset), + .valid_in (per_core_valid_in[FPU_NCP]), .ready_in (per_core_ready_in[FPU_NCP]), - .mask_in (mask_in), - .tag_in ({ncp_ret_sext_in, ncp_ret_int_in, tag_in}), - .op_type (op_type), - .frm (frm), - .dataa (dataa_s), - .datab (datab_s), + .mask_in (per_core_mask_in[FPU_NCP]), + .tag_in ({ncp_ret_sext_in, ncp_ret_int_in, per_core_tag_in[FPU_NCP]}), + .op_type (per_core_op_type[FPU_NCP]), + .frm (per_core_frm[FPU_NCP]), + .dataa (per_core_dataa[FPU_NCP]), + .datab (per_core_datab[FPU_NCP]), .result (per_core_result[FPU_NCP]), .has_fflags (per_core_has_fflags[FPU_NCP]), .fflags (per_core_fflags[FPU_NCP]), @@ -245,37 +365,10 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( /////////////////////////////////////////////////////////////////////////// - assign per_core_ready_in[FPU_DIVSQRT] = is_div ? div_ready_in : sqrt_ready_in; - - VX_stream_arb #( - .NUM_INPUTS (2), - .DATAW (RSP_DATAW), - .ARBITER ("R"), - .OUT_BUF (0) - ) div_sqrt_arb ( - .clk (clk), - .reset (reset), - .valid_in ({sqrt_valid_out, div_valid_out}), - .ready_in ({sqrt_ready_out, div_ready_out}), - .data_in ({{sqrt_result, sqrt_has_fflags, sqrt_fflags, sqrt_tag_out}, - {div_result, div_has_fflags, div_fflags, div_tag_out}}), - .data_out ({ - per_core_result[FPU_DIVSQRT], - per_core_has_fflags[FPU_DIVSQRT], - per_core_fflags[FPU_DIVSQRT], - per_core_tag_out[FPU_DIVSQRT] - }), - .valid_out (per_core_valid_out[FPU_DIVSQRT]), - .ready_out (per_core_ready_out[FPU_DIVSQRT]), - `UNUSED_PIN (sel_out) - ); - - /////////////////////////////////////////////////////////////////////////// - - reg [NUM_FPC-1:0][RSP_DATAW+2-1:0] per_core_data_out; + reg [NUM_FPCORES-1:0][RSP_DATAW+2-1:0] per_core_data_out; always @(*) begin - for (integer i = 0; i < NUM_FPC; ++i) begin + for (integer i = 0; i < NUM_FPCORES; ++i) begin per_core_data_out[i][RSP_DATAW+1:2] = { per_core_result[i], per_core_has_fflags[i], @@ -294,9 +387,9 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( `UNUSED_VAR (op_ret_int_out) VX_stream_arb #( - .NUM_INPUTS (NUM_FPC), + .NUM_INPUTS (NUM_FPCORES), .DATAW (RSP_DATAW + 2), - .ARBITER ("F"), + .ARBITER ("R"), .OUT_BUF (OUT_BUF) ) rsp_arb ( .clk (clk), @@ -310,25 +403,22 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #( `UNUSED_PIN (sel_out) ); - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_result `ifdef FPU_RV64F - reg [`XLEN-1:0] result_r; + reg [`XLEN-1:0] result_w; always @(*) begin case (op_ret_int_out) - 2'b11: result_r = `XLEN'($signed(result_s[i])); - 2'b01: result_r = {32'h00000000, result_s[i]}; - default: result_r = {32'hffffffff, result_s[i]}; + 2'b11: result_w = `XLEN'($signed(result_s[i])); + 2'b01: result_w = {32'h00000000, result_s[i]}; + default: result_w = {32'hffffffff, result_s[i]}; endcase end - assign result[i] = result_r; + assign result[i] = result_w; `else assign result[i] = result_s[i]; `endif end - // can accept new request? - assign ready_in = per_core_ready_in[core_select]; - endmodule `endif diff --git a/hw/rtl/fpu/VX_fpu_fma.sv b/hw/rtl/fpu/VX_fpu_fma.sv index 3522d8a1e..e793ff55b 100644 --- a/hw/rtl/fpu/VX_fpu_fma.sv +++ b/hw/rtl/fpu/VX_fpu_fma.sv @@ -49,26 +49,27 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( input wire ready_out, output wire valid_out ); - `UNUSED_VAR (frm) + localparam DATAW = 3 * 32 + `INST_FRM_BITS; + + wire [NUM_LANES-1:0][DATAW-1:0] data_in; - wire [NUM_LANES-1:0][3*32-1:0] data_in; wire [NUM_LANES-1:0] mask_out; wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out; wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out; wire pe_enable; - wire [NUM_PES-1:0][3*32-1:0] pe_data_in; + wire [NUM_PES-1:0][DATAW-1:0] pe_data_in; wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out; reg [NUM_LANES-1:0][31:0] a, b, c; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_select always @(*) begin if (is_madd) begin // MADD / MSUB / NMADD / NMSUB - a[i] = is_neg ? {~dataa[i][31], dataa[i][30:0]} : dataa[i]; + a[i] = {is_neg ^ dataa[i][31], dataa[i][30:0]}; b[i] = datab[i]; - c[i] = (is_neg ^ is_sub) ? {~datac[i][31], datac[i][30:0]} : datac[i]; + c[i] = {is_neg ^ is_sub ^ datac[i][31], datac[i][30:0]}; end else begin if (is_neg) begin // MUL @@ -77,29 +78,30 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( c[i] = '0; end else begin // ADD / SUB - a[i] = 32'h3f800000; // 1.0f - b[i] = dataa[i]; - c[i] = is_sub ? {~datab[i][31], datab[i][30:0]} : datab[i]; + a[i] = dataa[i]; + b[i] = 32'h3f800000; // 1.0f + c[i] = {is_sub ^ datab[i][31], datab[i][30:0]}; end end end end - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_data_in assign data_in[i][0 +: 32] = a[i]; assign data_in[i][32 +: 32] = b[i]; assign data_in[i][64 +: 32] = c[i]; + assign data_in[i][96 +: `INST_FRM_BITS] = frm; end VX_pe_serializer #( .NUM_LANES (NUM_LANES), .NUM_PES (NUM_PES), .LATENCY (`LATENCY_FMA), - .DATA_IN_WIDTH(3*32), - .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), + .DATA_IN_WIDTH (DATAW), + .DATA_OUT_WIDTH (`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), - .PE_REG ((NUM_LANES != NUM_PES) ? 1 : 0), // must be registered for DSPs - .OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0) + .PE_REG (0), + .OUT_BUF (2) ) pe_serializer ( .clk (clk), .reset (reset), @@ -108,15 +110,17 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( .tag_in ({mask_in, tag_in}), .ready_in (ready_in), .pe_enable (pe_enable), - .pe_data_in (pe_data_in), - .pe_data_out(pe_data_out), + .pe_data_out(pe_data_in), + .pe_data_in (pe_data_out), .valid_out (valid_out), .data_out (data_out), .tag_out ({mask_out, tag_out}), .ready_out (ready_out) ); - for (genvar i = 0; i < NUM_LANES; ++i) begin + `UNUSED_VAR (pe_data_in) + + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_result assign result[i] = data_out[i][0 +: 32]; assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS]; end @@ -125,7 +129,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( `ifdef QUARTUS - for (genvar i = 0; i < NUM_PES; ++i) begin + for (genvar i = 0; i < NUM_PES; ++i) begin : g_fmas acl_fmadd fmadd ( .clk (clk), .areset (1'b0), @@ -143,7 +147,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( `elsif VIVADO - for (genvar i = 0; i < NUM_PES; ++i) begin + for (genvar i = 0; i < NUM_PES; ++i) begin : g_fmas wire [2:0] tuser; xil_fma fma ( @@ -168,7 +172,7 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( `else - for (genvar i = 0; i < NUM_PES; ++i) begin + for (genvar i = 0; i < NUM_PES; ++i) begin : g_fmas reg [63:0] r; `UNUSED_VAR (r) fflags_t f; @@ -177,10 +181,10 @@ module VX_fpu_fma import VX_fpu_pkg::*; #( dpi_fmadd ( pe_enable, int'(0), - {32'hffffffff, pe_data_in[i][0 +: 32]}, - {32'hffffffff, pe_data_in[i][32 +: 32]}, - {32'hffffffff, pe_data_in[i][64 +: 32]}, - frm, + {32'hffffffff, pe_data_in[i][0 +: 32]}, // a + {32'hffffffff, pe_data_in[i][32 +: 32]}, // b + {32'hffffffff, pe_data_in[i][64 +: 32]}, // c + pe_data_in[0][96 +: `INST_FRM_BITS], // frm r, f ); diff --git a/hw/rtl/fpu/VX_fpu_fpnew.sv b/hw/rtl/fpu/VX_fpu_fpnew.sv index 9ee7f1a2c..596a86513 100644 --- a/hw/rtl/fpu/VX_fpu_fpnew.sv +++ b/hw/rtl/fpu/VX_fpu_fpnew.sv @@ -90,7 +90,7 @@ module VX_fpu_fpnew reg [TAG_WIDTH-1:0] fpu_tag_in, fpu_tag_out; - reg [2:0][NUM_LANES-1:0][`XLEN-1:0] fpu_operands; + logic [2:0][NUM_LANES-1:0][`XLEN-1:0] fpu_operands; wire [NUM_LANES-1:0][`XLEN-1:0] fpu_result; fpnew_pkg::status_t fpu_status; @@ -134,20 +134,13 @@ module VX_fpu_fpnew fpu_op = fpnew_pkg::ADD; fpu_operands[1] = dataa; fpu_operands[2] = datab; - end - `INST_FPU_SUB: begin - fpu_op = fpnew_pkg::ADD; - fpu_operands[1] = dataa; - fpu_operands[2] = datab; - fpu_op_mod = 1; + fpu_op_mod = fmt[1]; // FADD or FSUB end `INST_FPU_MUL: begin fpu_op = fpnew_pkg::MUL; end + `INST_FPU_MADD: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = fmt[1]; end + `INST_FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = ~fmt[1]; end `INST_FPU_DIV: begin fpu_op = fpnew_pkg::DIV; end `INST_FPU_SQRT: begin fpu_op = fpnew_pkg::SQRT; end - `INST_FPU_MADD: begin fpu_op = fpnew_pkg::FMADD; end - `INST_FPU_MSUB: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end - `INST_FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = 1; end - `INST_FPU_NMSUB: begin fpu_op = fpnew_pkg::FNMSUB; end `ifdef FLEN_64 `INST_FPU_F2F: begin fpu_op = fpnew_pkg::F2F; fpu_src_fmt = fmt[0] ? fpnew_pkg::FP32 : fpnew_pkg::FP64; end `endif @@ -169,7 +162,7 @@ module VX_fpu_fpnew end `UNUSED_VAR (mask_in) - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_fpnew_coreses wire [(TAG_WIDTH+1)-1:0] fpu_tag; wire fpu_valid_out_uq; wire fpu_ready_in_uq; @@ -183,8 +176,7 @@ module VX_fpu_fpnew .Features (FPU_FEATURES), .Implementation (FPU_IMPLEMENTATION), .TagType (logic[(TAG_WIDTH+1)-1:0]), - .TrueSIMDClass (1), - .EnableSIMDMask (1) + .DivSqrtSel (fpnew_pkg::PULP) ) fpnew_core ( .clk_i (clk), .rst_ni (~reset), @@ -196,11 +188,11 @@ module VX_fpu_fpnew .dst_fmt_i (fpu_dst_fmt), .int_fmt_i (fpu_int_fmt), .vectorial_op_i (1'b0), - .simd_mask_i (mask_in[i]), + .simd_mask_i (1'b1), .tag_i ({fpu_tag_in, fpu_has_fflags}), .in_valid_i (fpu_valid_in), .in_ready_o (fpu_ready_in_uq), - .flush_i (reset), + .flush_i (1'b0), .result_o (fpu_result[i]), .status_o (fpu_status_uq), .tag_o (fpu_tag), @@ -209,7 +201,7 @@ module VX_fpu_fpnew `UNUSED_PIN (busy_o) ); - if (i == 0) begin + if (i == 0) begin : g_output_0 assign {fpu_tag_out, fpu_has_fflags_out} = fpu_tag; assign fpu_valid_out = fpu_valid_out_uq; assign fpu_ready_in = fpu_ready_in_uq; diff --git a/hw/rtl/fpu/VX_fpu_ncp.sv b/hw/rtl/fpu/VX_fpu_ncp.sv index 34b822d89..21162dd6c 100644 --- a/hw/rtl/fpu/VX_fpu_ncp.sv +++ b/hw/rtl/fpu/VX_fpu_ncp.sv @@ -45,31 +45,34 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #( input wire ready_out, output wire valid_out ); - `UNUSED_VAR (frm) + localparam DATAW = 2 * 32 + `INST_FRM_BITS + `INST_FPU_BITS; + + wire [NUM_LANES-1:0][DATAW-1:0] data_in; - wire [NUM_LANES-1:0][2*32-1:0] data_in; wire [NUM_LANES-1:0] mask_out; wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out; fflags_t [NUM_LANES-1:0] fflags_out; wire pe_enable; - wire [NUM_PES-1:0][2*32-1:0] pe_data_in; + wire [NUM_PES-1:0][DATAW-1:0] pe_data_in; wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_data_in assign data_in[i][0 +: 32] = dataa[i]; assign data_in[i][32 +: 32] = datab[i]; + assign data_in[i][64 +: `INST_FRM_BITS] = frm; + assign data_in[i][64 + `INST_FRM_BITS +: `INST_FPU_BITS] = op_type; end VX_pe_serializer #( .NUM_LANES (NUM_LANES), .NUM_PES (NUM_PES), .LATENCY (`LATENCY_FNCP), - .DATA_IN_WIDTH(2*32), - .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), + .DATA_IN_WIDTH (DATAW), + .DATA_OUT_WIDTH (`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), - .OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0) + .OUT_BUF (2) ) pe_serializer ( .clk (clk), .reset (reset), @@ -78,28 +81,31 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #( .tag_in ({mask_in, tag_in}), .ready_in (ready_in), .pe_enable (pe_enable), - .pe_data_in (pe_data_in), - .pe_data_out(pe_data_out), + .pe_data_out(pe_data_in), + .pe_data_in (pe_data_out), .valid_out (valid_out), .data_out (data_out), .tag_out ({mask_out, tag_out}), .ready_out (ready_out) ); - for (genvar i = 0; i < NUM_LANES; ++i) begin + `UNUSED_VAR (pe_data_in) + + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_result assign result[i] = data_out[i][0 +: 32]; assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS]; end - for (genvar i = 0; i < NUM_PES; ++i) begin + for (genvar i = 0; i < NUM_PES; ++i) begin : g_fncp_units VX_fncp_unit #( - .LATENCY (`LATENCY_FNCP) + .LATENCY (`LATENCY_FNCP), + .OUT_REG (1) ) fncp_unit ( .clk (clk), .reset (reset), .enable (pe_enable), - .frm (frm), - .op_type (op_type), + .frm (pe_data_in[0][64 +: `INST_FRM_BITS]), + .op_type (pe_data_in[0][64 + `INST_FRM_BITS +: `INST_FPU_BITS]), .dataa (pe_data_in[i][0 +: 32]), .datab (pe_data_in[i][32 +: 32]), .result (pe_data_out[i][0 +: 32]), diff --git a/hw/rtl/fpu/VX_fpu_sqrt.sv b/hw/rtl/fpu/VX_fpu_sqrt.sv index a6e6dda9a..172a42e6f 100644 --- a/hw/rtl/fpu/VX_fpu_sqrt.sv +++ b/hw/rtl/fpu/VX_fpu_sqrt.sv @@ -43,43 +43,51 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( input wire ready_out, output wire valid_out ); + localparam DATAW = 32 + `INST_FRM_BITS; - `UNUSED_VAR (frm) + wire [NUM_LANES-1:0][DATAW-1:0] data_in; wire [NUM_LANES-1:0] mask_out; wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out; wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out; wire pe_enable; - wire [NUM_PES-1:0][31:0] pe_data_in; + wire [NUM_PES-1:0][DATAW-1:0] pe_data_in; wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out; + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_data_in + assign data_in[i][0 +: 32] = dataa[i]; + assign data_in[i][32 +: `INST_FRM_BITS] = frm; + end + VX_pe_serializer #( .NUM_LANES (NUM_LANES), .NUM_PES (NUM_PES), .LATENCY (`LATENCY_FSQRT), - .DATA_IN_WIDTH(32), - .DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32), + .DATA_IN_WIDTH (DATAW), + .DATA_OUT_WIDTH (`FP_FLAGS_BITS + 32), .TAG_WIDTH (NUM_LANES + TAG_WIDTH), .PE_REG (0), - .OUT_BUF (((NUM_LANES / NUM_PES) > 2) ? 1 : 0) + .OUT_BUF (2) ) pe_serializer ( .clk (clk), .reset (reset), .valid_in (valid_in), - .data_in (dataa), + .data_in (data_in), .tag_in ({mask_in, tag_in}), .ready_in (ready_in), .pe_enable (pe_enable), - .pe_data_in (pe_data_in), - .pe_data_out(pe_data_out), + .pe_data_out(pe_data_in), + .pe_data_in (pe_data_out), .valid_out (valid_out), .data_out (data_out), .tag_out ({mask_out, tag_out}), .ready_out (ready_out) ); - for (genvar i = 0; i < NUM_LANES; ++i) begin + `UNUSED_VAR (pe_data_in) + + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_result assign result[i] = data_out[i][0 +: 32]; assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS]; end @@ -88,12 +96,12 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( `ifdef QUARTUS - for (genvar i = 0; i < NUM_PES; ++i) begin + for (genvar i = 0; i < NUM_PES; ++i) begin : g_fsqrts acl_fsqrt fsqrt ( .clk (clk), .areset (1'b0), .en (pe_enable), - .a (pe_data_in[i]), + .a (pe_data_in[i][0 +: 32]), .q (pe_data_out[i][0 +: 32]) ); assign pe_data_out[i][32 +: `FP_FLAGS_BITS] = 'x; @@ -105,14 +113,14 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( `elsif VIVADO - for (genvar i = 0; i < NUM_PES; ++i) begin + for (genvar i = 0; i < NUM_PES; ++i) begin : g_fsqrts wire tuser; xil_fsqrt fsqrt ( .aclk (clk), .aclken (pe_enable), .s_axis_a_tvalid (1'b1), - .s_axis_a_tdata (pe_data_in[i]), + .s_axis_a_tdata (pe_data_in[i][0 +: 32]), `UNUSED_PIN (m_axis_result_tvalid), .m_axis_result_tdata (pe_data_out[i][0 +: 32]), .m_axis_result_tuser (tuser) @@ -126,7 +134,7 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( `else - for (genvar i = 0; i < NUM_PES; ++i) begin + for (genvar i = 0; i < NUM_PES; ++i) begin : g_fsqrts reg [63:0] r; `UNUSED_VAR (r) fflags_t f; @@ -135,8 +143,8 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #( dpi_fsqrt ( pe_enable, int'(0), - {32'hffffffff, pe_data_in[i]}, - frm, + {32'hffffffff, pe_data_in[i][0 +: 32]}, // a + pe_data_in[0][32 +: `INST_FRM_BITS], // frm r, f ); diff --git a/hw/rtl/interfaces/VX_decode_sched_if.sv b/hw/rtl/interfaces/VX_decode_sched_if.sv index b82aafb55..1f47c30e9 100644 --- a/hw/rtl/interfaces/VX_decode_sched_if.sv +++ b/hw/rtl/interfaces/VX_decode_sched_if.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -16,18 +16,18 @@ interface VX_decode_sched_if (); wire valid; - wire is_wstall; + wire unlock; wire [`NW_WIDTH-1:0] wid; modport master ( output valid, - output is_wstall, + output unlock, output wid ); modport slave ( input valid, - input is_wstall, + input unlock, input wid ); diff --git a/hw/rtl/libs/VX_avs_adapter.sv b/hw/rtl/libs/VX_avs_adapter.sv index 35d329c7b..58144e7fe 100644 --- a/hw/rtl/libs/VX_avs_adapter.sv +++ b/hw/rtl/libs/VX_avs_adapter.sv @@ -16,11 +16,13 @@ `TRACING_OFF module VX_avs_adapter #( parameter DATA_WIDTH = 1, - parameter ADDR_WIDTH = 1, + parameter ADDR_WIDTH_IN = 1, + parameter ADDR_WIDTH_OUT= 32, parameter BURST_WIDTH = 1, parameter NUM_BANKS = 1, parameter TAG_WIDTH = 1, parameter RD_QUEUE_SIZE = 1, + parameter BANK_INTERLEAVE= 0, parameter REQ_OUT_BUF = 0, parameter RSP_OUT_BUF = 0 ) ( @@ -31,7 +33,7 @@ module VX_avs_adapter #( input wire mem_req_valid, input wire mem_req_rw, input wire [DATA_WIDTH/8-1:0] mem_req_byteen, - input wire [ADDR_WIDTH-1:0] mem_req_addr, + input wire [ADDR_WIDTH_IN-1:0] mem_req_addr, input wire [DATA_WIDTH-1:0] mem_req_data, input wire [TAG_WIDTH-1:0] mem_req_tag, output wire mem_req_ready, @@ -45,7 +47,7 @@ module VX_avs_adapter #( // AVS bus output wire [DATA_WIDTH-1:0] avs_writedata [NUM_BANKS], input wire [DATA_WIDTH-1:0] avs_readdata [NUM_BANKS], - output wire [ADDR_WIDTH-1:0] avs_address [NUM_BANKS], + output wire [ADDR_WIDTH_OUT-1:0] avs_address [NUM_BANKS], input wire avs_waitrequest [NUM_BANKS], output wire avs_write [NUM_BANKS], output wire avs_read [NUM_BANKS], @@ -53,59 +55,66 @@ module VX_avs_adapter #( output wire [BURST_WIDTH-1:0] avs_burstcount [NUM_BANKS], input wire avs_readdatavalid [NUM_BANKS] ); - localparam DATA_SIZE = DATA_WIDTH/8; - localparam RD_QUEUE_ADDR_WIDTH = `CLOG2(RD_QUEUE_SIZE+1); - localparam BANK_ADDRW = `LOG2UP(NUM_BANKS); - localparam LOG2_NUM_BANKS = `CLOG2(NUM_BANKS); - localparam BANK_OFFSETW = ADDR_WIDTH - LOG2_NUM_BANKS; + localparam DATA_SIZE = DATA_WIDTH/8; + localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS); + localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS); + localparam DST_ADDR_WDITH = ADDR_WIDTH_OUT + BANK_SEL_BITS; // to input space + localparam BANK_OFFSETW = DST_ADDR_WDITH - BANK_SEL_BITS; + + `STATIC_ASSERT ((DST_ADDR_WDITH >= ADDR_WIDTH_IN), ("invalid address width: current=%0d, expected=%0d", DST_ADDR_WDITH, ADDR_WIDTH_IN)) // Requests handling ////////////////////////////////////////////////////// wire [NUM_BANKS-1:0] req_queue_push, req_queue_pop; wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] req_queue_tag_out; wire [NUM_BANKS-1:0] req_queue_going_full; - wire [NUM_BANKS-1:0][RD_QUEUE_ADDR_WIDTH-1:0] req_queue_size; - wire [BANK_ADDRW-1:0] req_bank_sel; - wire [BANK_OFFSETW-1:0] req_bank_off; wire [NUM_BANKS-1:0] bank_req_ready; - if (NUM_BANKS > 1) begin - assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0]; - end else begin + wire [BANK_OFFSETW-1:0] req_bank_off; + wire [BANK_SEL_WIDTH-1:0] req_bank_sel; + + wire [DST_ADDR_WDITH-1:0] mem_req_addr_out = DST_ADDR_WDITH'(mem_req_addr); + + if (NUM_BANKS > 1) begin : g_bank_sel + if (BANK_INTERLEAVE) begin : g_interleave + assign req_bank_sel = mem_req_addr_out[BANK_SEL_BITS-1:0]; + assign req_bank_off = mem_req_addr_out[BANK_SEL_BITS +: BANK_OFFSETW]; + end else begin : g_no_interleave + assign req_bank_sel = mem_req_addr_out[BANK_OFFSETW +: BANK_SEL_BITS]; + assign req_bank_off = mem_req_addr_out[BANK_OFFSETW-1:0]; + end + end else begin : g_no_bank_sel assign req_bank_sel = '0; + assign req_bank_off = mem_req_addr_out; end - assign req_bank_off = mem_req_addr[ADDR_WIDTH-1:LOG2_NUM_BANKS]; - - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_req_queue_push assign req_queue_push[i] = mem_req_valid && ~mem_req_rw && bank_req_ready[i] && (req_bank_sel == i); end - `RESET_RELAY_EX (bank_reset, reset, NUM_BANKS, 1); - - for (genvar i = 0; i < NUM_BANKS; ++i) begin - + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_pending_sizes VX_pending_size #( .SIZE (RD_QUEUE_SIZE) ) pending_size ( .clk (clk), - .reset (bank_reset[i]), + .reset (reset), .incr (req_queue_push[i]), .decr (req_queue_pop[i]), `UNUSED_PIN (empty), `UNUSED_PIN (alm_empty), .full (req_queue_going_full[i]), `UNUSED_PIN (alm_full), - .size (req_queue_size[i]) + `UNUSED_PIN (size) ); - `UNUSED_VAR (req_queue_size) + end + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_rd_req_queues VX_fifo_queue #( .DATAW (TAG_WIDTH), .DEPTH (RD_QUEUE_SIZE) ) rd_req_queue ( .clk (clk), - .reset (bank_reset[i]), + .reset (reset), .push (req_queue_push[i]), .pop (req_queue_pop[i]), .data_in (mem_req_tag), @@ -118,7 +127,7 @@ module VX_avs_adapter #( ); end - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_req_out_bufs wire valid_out; wire rw_out; wire [DATA_SIZE-1:0] byteen_out; @@ -135,7 +144,7 @@ module VX_avs_adapter #( .OUT_REG (`TO_OUT_BUF_REG(REQ_OUT_BUF)) ) req_out_buf ( .clk (clk), - .reset (bank_reset[i]), + .reset (reset), .valid_in (valid_out_w), .ready_in (ready_out_w), .data_in ({mem_req_rw, mem_req_byteen, req_bank_off, mem_req_data}), @@ -146,7 +155,7 @@ module VX_avs_adapter #( assign avs_read[i] = valid_out && ~rw_out; assign avs_write[i] = valid_out && rw_out; - assign avs_address[i] = ADDR_WIDTH'(addr_out); + assign avs_address[i] = ADDR_WIDTH_OUT'(addr_out); assign avs_byteenable[i] = byteen_out; assign avs_writedata[i] = data_out; assign avs_burstcount[i] = BURST_WIDTH'(1); @@ -155,11 +164,7 @@ module VX_avs_adapter #( assign bank_req_ready[i] = ready_out_w && ~req_queue_going_full[i]; end - if (NUM_BANKS > 1) begin - assign mem_req_ready = bank_req_ready[req_bank_sel]; - end else begin - assign mem_req_ready = bank_req_ready; - end + assign mem_req_ready = bank_req_ready[req_bank_sel]; // Responses handling ///////////////////////////////////////////////////// @@ -170,14 +175,13 @@ module VX_avs_adapter #( wire [NUM_BANKS-1:0][DATA_WIDTH-1:0] rsp_queue_data_out; wire [NUM_BANKS-1:0] rsp_queue_empty; - for (genvar i = 0; i < NUM_BANKS; ++i) begin - + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_rd_rsp_queues VX_fifo_queue #( .DATAW (DATA_WIDTH), .DEPTH (RD_QUEUE_SIZE) ) rd_rsp_queue ( .clk (clk), - .reset (bank_reset[i]), + .reset (reset), .push (avs_readdatavalid[i]), .pop (req_queue_pop[i]), .data_in (avs_readdata[i]), @@ -190,8 +194,8 @@ module VX_avs_adapter #( ); end - for (genvar i = 0; i < NUM_BANKS; ++i) begin - assign rsp_arb_valid_in[i] = !rsp_queue_empty[i]; + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_rsp_arbs + assign rsp_arb_valid_in[i] = ~rsp_queue_empty[i]; assign rsp_arb_data_in[i] = {rsp_queue_data_out[i], req_queue_tag_out[i]}; assign req_queue_pop[i] = rsp_arb_valid_in[i] && rsp_arb_ready_in[i]; end @@ -199,7 +203,7 @@ module VX_avs_adapter #( VX_stream_arb #( .NUM_INPUTS (NUM_BANKS), .DATAW (DATA_WIDTH + TAG_WIDTH), - .ARBITER ("F"), + .ARBITER ("R"), .OUT_BUF (RSP_OUT_BUF) ) rsp_arb ( .clk (clk), diff --git a/hw/rtl/libs/VX_axi_adapter.sv b/hw/rtl/libs/VX_axi_adapter.sv index 7fffb9be2..255789fd7 100644 --- a/hw/rtl/libs/VX_axi_adapter.sv +++ b/hw/rtl/libs/VX_axi_adapter.sv @@ -16,10 +16,13 @@ `TRACING_OFF module VX_axi_adapter #( parameter DATA_WIDTH = 512, - parameter ADDR_WIDTH = 32, - parameter TAG_WIDTH = 8, + parameter ADDR_WIDTH_IN = 1, + parameter ADDR_WIDTH_OUT = 32, + parameter TAG_WIDTH_IN = 8, + parameter TAG_WIDTH_OUT = 8, parameter NUM_BANKS = 1, - parameter AVS_ADDR_WIDTH = (ADDR_WIDTH - `CLOG2(DATA_WIDTH/8)), + parameter BANK_INTERLEAVE= 0, + parameter TAG_BUFFER_SIZE= 32, parameter RSP_OUT_BUF = 0 ) ( input wire clk, @@ -29,22 +32,22 @@ module VX_axi_adapter #( input wire mem_req_valid, input wire mem_req_rw, input wire [DATA_WIDTH/8-1:0] mem_req_byteen, - input wire [AVS_ADDR_WIDTH-1:0] mem_req_addr, + input wire [ADDR_WIDTH_IN-1:0] mem_req_addr, input wire [DATA_WIDTH-1:0] mem_req_data, - input wire [TAG_WIDTH-1:0] mem_req_tag, + input wire [TAG_WIDTH_IN-1:0] mem_req_tag, output wire mem_req_ready, // Vortex response output wire mem_rsp_valid, output wire [DATA_WIDTH-1:0] mem_rsp_data, - output wire [TAG_WIDTH-1:0] mem_rsp_tag, + output wire [TAG_WIDTH_IN-1:0] mem_rsp_tag, input wire mem_rsp_ready, // AXI write request address channel output wire m_axi_awvalid [NUM_BANKS], input wire m_axi_awready [NUM_BANKS], - output wire [ADDR_WIDTH-1:0] m_axi_awaddr [NUM_BANKS], - output wire [TAG_WIDTH-1:0] m_axi_awid [NUM_BANKS], + output wire [ADDR_WIDTH_OUT-1:0] m_axi_awaddr [NUM_BANKS], + output wire [TAG_WIDTH_OUT-1:0] m_axi_awid [NUM_BANKS], output wire [7:0] m_axi_awlen [NUM_BANKS], output wire [2:0] m_axi_awsize [NUM_BANKS], output wire [1:0] m_axi_awburst [NUM_BANKS], @@ -64,14 +67,14 @@ module VX_axi_adapter #( // AXI write response channel input wire m_axi_bvalid [NUM_BANKS], output wire m_axi_bready [NUM_BANKS], - input wire [TAG_WIDTH-1:0] m_axi_bid [NUM_BANKS], + input wire [TAG_WIDTH_OUT-1:0] m_axi_bid [NUM_BANKS], input wire [1:0] m_axi_bresp [NUM_BANKS], // AXI read address channel output wire m_axi_arvalid [NUM_BANKS], input wire m_axi_arready [NUM_BANKS], - output wire [ADDR_WIDTH-1:0] m_axi_araddr [NUM_BANKS], - output wire [TAG_WIDTH-1:0] m_axi_arid [NUM_BANKS], + output wire [ADDR_WIDTH_OUT-1:0] m_axi_araddr [NUM_BANKS], + output wire [TAG_WIDTH_OUT-1:0] m_axi_arid [NUM_BANKS], output wire [7:0] m_axi_arlen [NUM_BANKS], output wire [2:0] m_axi_arsize [NUM_BANKS], output wire [1:0] m_axi_arburst [NUM_BANKS], @@ -86,68 +89,95 @@ module VX_axi_adapter #( output wire m_axi_rready [NUM_BANKS], input wire [DATA_WIDTH-1:0] m_axi_rdata [NUM_BANKS], input wire m_axi_rlast [NUM_BANKS], - input wire [TAG_WIDTH-1:0] m_axi_rid [NUM_BANKS], + input wire [TAG_WIDTH_OUT-1:0] m_axi_rid [NUM_BANKS], input wire [1:0] m_axi_rresp [NUM_BANKS] ); - localparam AXSIZE = `CLOG2(DATA_WIDTH/8); - localparam BANK_ADDRW = `LOG2UP(NUM_BANKS); - localparam LOG2_NUM_BANKS = `CLOG2(NUM_BANKS); - - wire [BANK_ADDRW-1:0] req_bank_sel; - - if (NUM_BANKS > 1) begin - assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0]; - end else begin + localparam DATA_SIZE = `CLOG2(DATA_WIDTH/8); + localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS); + localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS); + localparam DST_ADDR_WDITH = ADDR_WIDTH_OUT + BANK_SEL_BITS - `CLOG2(DATA_WIDTH/8); // to input space + localparam BANK_OFFSETW = DST_ADDR_WDITH - BANK_SEL_BITS; + + `STATIC_ASSERT ((DST_ADDR_WDITH >= ADDR_WIDTH_IN), ("invalid address width: current=%0d, expected=%0d", DST_ADDR_WDITH, ADDR_WIDTH_IN)) + + wire [BANK_OFFSETW-1:0] req_bank_off; + wire [BANK_SEL_WIDTH-1:0] req_bank_sel; + + wire [DST_ADDR_WDITH-1:0] mem_req_addr_out = DST_ADDR_WDITH'(mem_req_addr); + + if (NUM_BANKS > 1) begin : g_bank_sel + if (BANK_INTERLEAVE) begin : g_interleave + assign req_bank_sel = mem_req_addr_out[BANK_SEL_BITS-1:0]; + assign req_bank_off = mem_req_addr_out[BANK_SEL_BITS +: BANK_OFFSETW]; + end else begin : g_no_interleave + assign req_bank_sel = mem_req_addr_out[BANK_OFFSETW +: BANK_SEL_BITS]; + assign req_bank_off = mem_req_addr_out[BANK_OFFSETW-1:0]; + end + end else begin : g_no_bank_sel assign req_bank_sel = '0; + assign req_bank_off = mem_req_addr_out; end - wire mem_req_fire = mem_req_valid && mem_req_ready; - - reg [NUM_BANKS-1:0] m_axi_aw_ack; - reg [NUM_BANKS-1:0] m_axi_w_ack; - - for (genvar i = 0; i < NUM_BANKS; ++i) begin - wire m_axi_aw_fire = m_axi_awvalid[i] && m_axi_awready[i]; - wire m_axi_w_fire = m_axi_wvalid[i] && m_axi_wready[i]; - always @(posedge clk) begin - if (reset) begin - m_axi_aw_ack[i] <= 0; - m_axi_w_ack[i] <= 0; - end else begin - if (mem_req_fire && (req_bank_sel == i)) begin - m_axi_aw_ack[i] <= 0; - m_axi_w_ack[i] <= 0; - end else begin - if (m_axi_aw_fire) - m_axi_aw_ack[i] <= 1; - if (m_axi_w_fire) - m_axi_w_ack[i] <= 1; - end - end - end + // AXi write request synchronization + reg [NUM_BANKS-1:0] m_axi_aw_ack, m_axi_w_ack, axi_write_ready; + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_ready + VX_axi_write_ack axi_write_ack ( + .clk (clk), + .reset (reset), + .awvalid(m_axi_awvalid[i]), + .awready(m_axi_awready[i]), + .wvalid (m_axi_wvalid[i]), + .wready (m_axi_wready[i]), + .aw_ack (m_axi_aw_ack[i]), + .w_ack (m_axi_w_ack[i]), + .tx_rdy (axi_write_ready[i]), + `UNUSED_PIN (tx_ack) + ); end - wire axi_write_ready [NUM_BANKS]; - - for (genvar i = 0; i < NUM_BANKS; ++i) begin - assign axi_write_ready[i] = (m_axi_awready[i] || m_axi_aw_ack[i]) - && (m_axi_wready[i] || m_axi_w_ack[i]); + wire tbuf_full; + wire [TAG_WIDTH_OUT-1:0] mem_req_tag_out; + wire [TAG_WIDTH_OUT-1:0] mem_rsp_tag_out; + + // handle tag width mismatch + if (TAG_WIDTH_IN > TAG_WIDTH_OUT) begin : g_tag_buf + localparam TBUF_ADDRW = `CLOG2(TAG_BUFFER_SIZE); + wire [TBUF_ADDRW-1:0] tbuf_waddr, tbuf_raddr; + VX_index_buffer #( + .DATAW (TAG_WIDTH_IN), + .SIZE (TAG_BUFFER_SIZE) + ) tag_buf ( + .clk (clk), + .reset (reset), + .acquire_en (mem_req_valid && !mem_req_rw && mem_req_ready), + .write_addr (tbuf_waddr), + .write_data (mem_req_tag), + .read_data (mem_rsp_tag), + .read_addr (tbuf_raddr), + .release_en (mem_rsp_valid && mem_rsp_ready), + .full (tbuf_full), + `UNUSED_PIN (empty) + ); + assign mem_req_tag_out = TAG_WIDTH_OUT'(tbuf_waddr); + assign tbuf_raddr = mem_rsp_tag_out[TBUF_ADDRW-1:0]; + `UNUSED_VAR (mem_rsp_tag_out) + end else begin : g_no_tag_buf + assign tbuf_full = 0; + assign mem_req_tag_out = TAG_WIDTH_OUT'(mem_req_tag); + assign mem_rsp_tag = mem_rsp_tag_out[TAG_WIDTH_IN-1:0]; + `UNUSED_VAR (mem_rsp_tag_out) end - // Vortex request ack - if (NUM_BANKS > 1) begin - assign mem_req_ready = mem_req_rw ? axi_write_ready[req_bank_sel] : m_axi_arready[req_bank_sel]; - end else begin - assign mem_req_ready = mem_req_rw ? axi_write_ready[0] : m_axi_arready[0]; - end + // request ack + assign mem_req_ready = (mem_req_rw ? axi_write_ready[req_bank_sel] : m_axi_arready[req_bank_sel]) && ~tbuf_full; // AXI write request address channel - for (genvar i = 0; i < NUM_BANKS; ++i) begin - assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_aw_ack[i]; - assign m_axi_awaddr[i] = (ADDR_WIDTH'(mem_req_addr) >> LOG2_NUM_BANKS) << AXSIZE; - assign m_axi_awid[i] = mem_req_tag; + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_addr + assign m_axi_awvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~tbuf_full && ~m_axi_aw_ack[i]; + assign m_axi_awaddr[i] = ADDR_WIDTH_OUT'(req_bank_off) << `CLOG2(DATA_WIDTH/8); + assign m_axi_awid[i] = mem_req_tag_out; assign m_axi_awlen[i] = 8'b00000000; - assign m_axi_awsize[i] = 3'(AXSIZE); + assign m_axi_awsize[i] = 3'(DATA_SIZE); assign m_axi_awburst[i] = 2'b00; assign m_axi_awlock[i] = 2'b00; assign m_axi_awcache[i] = 4'b0000; @@ -157,29 +187,29 @@ module VX_axi_adapter #( end // AXI write request data channel - for (genvar i = 0; i < NUM_BANKS; ++i) begin - assign m_axi_wvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~m_axi_w_ack[i]; + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_data + assign m_axi_wvalid[i] = mem_req_valid && mem_req_rw && (req_bank_sel == i) && ~tbuf_full && ~m_axi_w_ack[i]; assign m_axi_wdata[i] = mem_req_data; assign m_axi_wstrb[i] = mem_req_byteen; assign m_axi_wlast[i] = 1'b1; end // AXI write response channel (ignore) - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_rsp `UNUSED_VAR (m_axi_bvalid[i]) `UNUSED_VAR (m_axi_bid[i]) `UNUSED_VAR (m_axi_bresp[i]) assign m_axi_bready[i] = 1'b1; - `RUNTIME_ASSERT(~m_axi_bvalid[i] || m_axi_bresp[i] == 0, ("%t: *** AXI response error", $time)); + `RUNTIME_ASSERT(~m_axi_bvalid[i] || m_axi_bresp[i] == 0, ("%t: *** AXI response error", $time)) end // AXI read request channel - for (genvar i = 0; i < NUM_BANKS; ++i) begin - assign m_axi_arvalid[i] = mem_req_valid && ~mem_req_rw && (req_bank_sel == i); - assign m_axi_araddr[i] = (ADDR_WIDTH'(mem_req_addr) >> LOG2_NUM_BANKS) << AXSIZE; - assign m_axi_arid[i] = mem_req_tag; + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_read_req + assign m_axi_arvalid[i] = mem_req_valid && ~mem_req_rw && (req_bank_sel == i) && ~tbuf_full; + assign m_axi_araddr[i] = ADDR_WIDTH_OUT'(req_bank_off) << `CLOG2(DATA_WIDTH/8); + assign m_axi_arid[i] = mem_req_tag_out; assign m_axi_arlen[i] = 8'b00000000; - assign m_axi_arsize[i] = 3'(AXSIZE); + assign m_axi_arsize[i] = 3'(DATA_SIZE); assign m_axi_arburst[i] = 2'b00; assign m_axi_arlock[i] = 2'b00; assign m_axi_arcache[i] = 4'b0000; @@ -191,23 +221,22 @@ module VX_axi_adapter #( // AXI read response channel wire [NUM_BANKS-1:0] rsp_arb_valid_in; - wire [NUM_BANKS-1:0][DATA_WIDTH+TAG_WIDTH-1:0] rsp_arb_data_in; + wire [NUM_BANKS-1:0][DATA_WIDTH+TAG_WIDTH_OUT-1:0] rsp_arb_data_in; wire [NUM_BANKS-1:0] rsp_arb_ready_in; - `UNUSED_VAR (m_axi_rlast) - - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_read_rsp assign rsp_arb_valid_in[i] = m_axi_rvalid[i]; assign rsp_arb_data_in[i] = {m_axi_rdata[i], m_axi_rid[i]}; assign m_axi_rready[i] = rsp_arb_ready_in[i]; - `RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rlast[i] == 1, ("%t: *** AXI response error", $time)); - `RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rresp[i] == 0, ("%t: *** AXI response error", $time)); + `RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rlast[i] == 1, ("%t: *** AXI response error", $time)) + `RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rresp[i] == 0, ("%t: *** AXI response error", $time)) + `UNUSED_VAR (m_axi_rlast[i]) end - + VX_stream_arb #( .NUM_INPUTS (NUM_BANKS), - .DATAW (DATA_WIDTH + TAG_WIDTH), - .ARBITER ("F"), + .DATAW (DATA_WIDTH + TAG_WIDTH_OUT), + .ARBITER ("R"), .OUT_BUF (RSP_OUT_BUF) ) rsp_arb ( .clk (clk), @@ -215,7 +244,7 @@ module VX_axi_adapter #( .valid_in (rsp_arb_valid_in), .data_in (rsp_arb_data_in), .ready_in (rsp_arb_ready_in), - .data_out ({mem_rsp_data, mem_rsp_tag}), + .data_out ({mem_rsp_data, mem_rsp_tag_out}), .valid_out (mem_rsp_valid), .ready_out (mem_rsp_ready), `UNUSED_PIN (sel_out) diff --git a/hw/rtl/libs/VX_axi_write_ack.sv b/hw/rtl/libs/VX_axi_write_ack.sv new file mode 100644 index 000000000..257ef18e5 --- /dev/null +++ b/hw/rtl/libs/VX_axi_write_ack.sv @@ -0,0 +1,60 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_platform.vh" + +`TRACING_OFF +module VX_axi_write_ack ( + input wire clk, + input wire reset, + input wire awvalid, + input wire awready, + input wire wvalid, + input wire wready, + output wire aw_ack, + output wire w_ack, + output wire tx_ack, + output wire tx_rdy +); + reg awfired; + reg wfired; + + wire awfire = awvalid && awready; + wire wfire = wvalid && wready; + + always @(posedge clk) begin + if (reset) begin + awfired <= 0; + wfired <= 0; + end else begin + if (awfire) begin + awfired <= 1; + end + if (wfire) begin + wfired <= 1; + end + if (tx_ack) begin + awfired <= 0; + wfired <= 0; + end + end + end + + assign aw_ack = awfired; + assign w_ack = wfired; + + assign tx_ack = (awfire || awfired) && (wfire || wfired); + assign tx_rdy = (awready || awfired) && (wready || wfired); + +endmodule +`TRACING_ON diff --git a/hw/rtl/libs/VX_bits_insert.sv b/hw/rtl/libs/VX_bits_insert.sv index f0f00a2b5..dee8141bb 100644 --- a/hw/rtl/libs/VX_bits_insert.sv +++ b/hw/rtl/libs/VX_bits_insert.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,19 +19,19 @@ module VX_bits_insert #( parameter S = 1, parameter POS = 0 ) ( - input wire [N-1:0] data_in, - input wire [`UP(S)-1:0] ins_in, + input wire [N-1:0] data_in, + input wire [`UP(S)-1:0] ins_in, output wire [N+S-1:0] data_out -); - if (S == 0) begin +); + if (S == 0) begin : g_passthru `UNUSED_VAR (ins_in) assign data_out = data_in; - end else begin - if (POS == 0) begin + end else begin : g_insert + if (POS == 0) begin : g_pos_0 assign data_out = {data_in, ins_in}; - end else if (POS == N) begin + end else if (POS == N) begin : g_pos_N assign data_out = {ins_in, data_in}; - end else begin + end else begin : g_pos assign data_out = {data_in[N-1:POS], ins_in, data_in[POS-1:0]}; end end diff --git a/hw/rtl/libs/VX_bits_remove.sv b/hw/rtl/libs/VX_bits_remove.sv index bc2f60a70..159bd4993 100644 --- a/hw/rtl/libs/VX_bits_remove.sv +++ b/hw/rtl/libs/VX_bits_remove.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,17 +19,19 @@ module VX_bits_remove #( parameter S = 1, parameter POS = 0 ) ( - input wire [N-1:0] data_in, + input wire [N-1:0] data_in, output wire [N-S-1:0] data_out ); `STATIC_ASSERT (((0 == S) || ((POS + S) <= N)), ("invalid parameter")) - - if (POS == 0 || S == 0) begin + + if (S == 0) begin : g_passthru + assign data_out = data_in; + end else if (POS == 0) begin : g_pos_0 assign data_out = data_in[N-1:S]; - end else if ((POS + S) < N) begin - assign data_out = {data_in[N-1:(POS+S)], data_in[POS-1:0]}; - end else begin + end else if ((POS + S) == N) begin : g_pos_N assign data_out = data_in[POS-1:0]; + end else begin : g_pos + assign data_out = {data_in[N-1:(POS+S)], data_in[POS-1:0]}; end `UNUSED_VAR (data_in) diff --git a/hw/rtl/libs/VX_bypass_buffer.sv b/hw/rtl/libs/VX_bypass_buffer.sv index 4eefce440..7378a4fdd 100644 --- a/hw/rtl/libs/VX_bypass_buffer.sv +++ b/hw/rtl/libs/VX_bypass_buffer.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -25,30 +25,33 @@ module VX_bypass_buffer #( parameter DATAW = 1, parameter PASSTHRU = 0 -) ( +) ( input wire clk, input wire reset, input wire valid_in, - output wire ready_in, + output wire ready_in, input wire [DATAW-1:0] data_in, output wire [DATAW-1:0] data_out, input wire ready_out, output wire valid_out -); - if (PASSTHRU != 0) begin +); + if (PASSTHRU != 0) begin : g_passthru + `UNUSED_VAR (clk) `UNUSED_VAR (reset) assign ready_in = ready_out; - assign valid_out = valid_in; + assign valid_out = valid_in; assign data_out = data_in; - end else begin + + end else begin : g_buffer + reg [DATAW-1:0] buffer; reg has_data; always @(posedge clk) begin if (reset) begin has_data <= 0; - end else begin + end else begin if (ready_out) begin has_data <= 0; end else if (~has_data) begin @@ -63,6 +66,7 @@ module VX_bypass_buffer #( assign ready_in = ready_out || ~has_data; assign data_out = has_data ? buffer : data_in; assign valid_out = valid_in || has_data; + end endmodule diff --git a/hw/rtl/libs/VX_cyclic_arbiter.sv b/hw/rtl/libs/VX_cyclic_arbiter.sv index c4a42da14..a4dead008 100644 --- a/hw/rtl/libs/VX_cyclic_arbiter.sv +++ b/hw/rtl/libs/VX_cyclic_arbiter.sv @@ -26,42 +26,58 @@ module VX_cyclic_arbiter #( output wire grant_valid, input wire grant_ready ); - if (NUM_REQS == 1) begin + if (NUM_REQS == 1) begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) + `UNUSED_VAR (grant_ready) assign grant_index = '0; assign grant_onehot = requests; assign grant_valid = requests[0]; - end else begin + end else begin : g_arbiter localparam IS_POW2 = (1 << LOG_NUM_REQS) == NUM_REQS; + wire [LOG_NUM_REQS-1:0] grant_index_um; + wire [NUM_REQS-1:0] grant_onehot_w, grant_onehot_um; reg [LOG_NUM_REQS-1:0] grant_index_r; always @(posedge clk) begin if (reset) begin grant_index_r <= '0; - end else begin - if (!IS_POW2 && grant_index_r == LOG_NUM_REQS'(NUM_REQS-1)) begin + end else if (grant_valid && grant_ready) begin + if (!IS_POW2 && grant_index == LOG_NUM_REQS'(NUM_REQS-1)) begin grant_index_r <= '0; - end else if (~grant_valid || grant_ready) begin - grant_index_r <= grant_index_r + LOG_NUM_REQS'(1); + end else begin + grant_index_r <= grant_index + LOG_NUM_REQS'(1); end end end - reg [NUM_REQS-1:0] grant_onehot_r; - always @(*) begin - grant_onehot_r = '0; - grant_onehot_r[grant_index_r] = 1'b1; - end + VX_priority_encoder #( + .N (NUM_REQS) + ) priority_encoder ( + .data_in (requests), + .onehot_out (grant_onehot_um), + .index_out (grant_index_um), + .valid_out (grant_valid) + ); + + VX_decoder #( + .N (LOG_NUM_REQS), + .D (NUM_REQS) + ) grant_decoder ( + .data_in (grant_index), + .valid_in (1'b1), + .data_out (grant_onehot_w) + ); + + wire is_hit = requests[grant_index_r]; - assign grant_index = grant_index_r; - assign grant_onehot = grant_onehot_r; - assign grant_valid = requests[grant_index_r]; + assign grant_index = is_hit ? grant_index_r : grant_index_um; + assign grant_onehot = is_hit ? grant_onehot_w : grant_onehot_um; end diff --git a/hw/rtl/libs/VX_decoder.sv b/hw/rtl/libs/VX_decoder.sv new file mode 100644 index 000000000..7c0c760e5 --- /dev/null +++ b/hw/rtl/libs/VX_decoder.sv @@ -0,0 +1,42 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_platform.vh" + +// Fast encoder using parallel prefix computation +// Adapted from BaseJump STL: http://bjump.org/data_out.html + +`TRACING_OFF +module VX_decoder #( + parameter N = 1, + parameter M = 1, + parameter MODEL = 0, + parameter D = 1 << N +) ( + input wire [N-1:0] data_in, + input wire [M-1:0] valid_in, + output wire [D-1:0][M-1:0] data_out +); + logic [D-1:0][M-1:0] shift; + if (MODEL == 1) begin : g_model1 + always @(*) begin + shift = '0; + shift[data_in] = {M{1'b1}}; + end + end else begin : g_model0 + assign shift = ((D*M)'({M{1'b1}})) << (data_in * M); + end + assign data_out = {D{valid_in}} & shift; + +endmodule +`TRACING_ON diff --git a/hw/rtl/libs/VX_divider.sv b/hw/rtl/libs/VX_divider.sv index 551940da1..b8424843d 100644 --- a/hw/rtl/libs/VX_divider.sv +++ b/hw/rtl/libs/VX_divider.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -24,7 +24,7 @@ module VX_divider #( parameter LATENCY = 0 ) ( input wire clk, - input wire enable, + input wire enable, input wire [N_WIDTH-1:0] numer, input wire [D_WIDTH-1:0] denom, output wire [Q_WIDTH-1:0] quotient, @@ -37,7 +37,7 @@ module VX_divider #( wire [D_WIDTH-1:0] remainder_unqual; lpm_divide divide ( - .clock (clk), + .clock (clk), .clken (enable), .numer (numer), .denom (denom), @@ -47,7 +47,7 @@ module VX_divider #( defparam divide.lpm_type = "LPM_DIVIDE", - divide.lpm_widthn = N_WIDTH, + divide.lpm_widthn = N_WIDTH, divide.lpm_widthd = D_WIDTH, divide.lpm_nrepresentation = N_SIGNED ? "SIGNED" : "UNSIGNED", divide.lpm_drepresentation = D_SIGNED ? "SIGNED" : "UNSIGNED", @@ -62,36 +62,36 @@ module VX_divider #( reg [N_WIDTH-1:0] quotient_unqual; reg [D_WIDTH-1:0] remainder_unqual; - always @(*) begin + always @(*) begin begin if (N_SIGNED && D_SIGNED) begin quotient_unqual = $signed(numer) / $signed(denom); remainder_unqual = $signed(numer) % $signed(denom); - end + end else if (N_SIGNED && !D_SIGNED) begin quotient_unqual = $signed(numer) / denom; remainder_unqual = $signed(numer) % denom; - end + end else if (!N_SIGNED && D_SIGNED) begin quotient_unqual = numer / $signed(denom); remainder_unqual = numer % $signed(denom); - end + end else begin quotient_unqual = numer / denom; - remainder_unqual = numer % denom; + remainder_unqual = numer % denom; end end end - if (LATENCY == 0) begin + if (LATENCY == 0) begin : g_comb assign quotient = quotient_unqual [Q_WIDTH-1:0]; assign remainder = remainder_unqual [R_WIDTH-1:0]; - end else begin + end else begin : g_pipe reg [N_WIDTH-1:0] quotient_pipe [LATENCY-1:0]; reg [D_WIDTH-1:0] remainder_pipe [LATENCY-1:0]; - for (genvar i = 0; i < LATENCY; ++i) begin - always @(posedge clk) begin + for (genvar i = 0; i < LATENCY; ++i) begin : g_reg + always @(posedge clk) begin if (enable) begin quotient_pipe[i] <= (0 == i) ? quotient_unqual : quotient_pipe[i-1]; remainder_pipe[i] <= (0 == i) ? remainder_unqual : remainder_pipe[i-1]; @@ -101,7 +101,7 @@ module VX_divider #( assign quotient = quotient_pipe[LATENCY-1][Q_WIDTH-1:0]; assign remainder = remainder_pipe[LATENCY-1][R_WIDTH-1:0]; - end + end `endif diff --git a/hw/rtl/libs/VX_dp_ram.sv b/hw/rtl/libs/VX_dp_ram.sv index 6683eaecc..21ab03ad5 100644 --- a/hw/rtl/libs/VX_dp_ram.sv +++ b/hw/rtl/libs/VX_dp_ram.sv @@ -17,13 +17,13 @@ module VX_dp_ram #( parameter DATAW = 1, parameter SIZE = 1, - parameter ADDR_MIN = 0, parameter WRENW = 1, parameter OUT_REG = 0, - parameter NO_RWCHECK = 0, parameter LUTRAM = 0, + parameter NO_RWCHECK = 0, parameter RW_ASSERT = 0, parameter RESET_RAM = 0, + parameter RESET_OUT = 0, parameter READ_ENABLE = 0, parameter INIT_ENABLE = 0, parameter INIT_FILE = "", @@ -44,96 +44,187 @@ module VX_dp_ram #( `STATIC_ASSERT((WRENW * WSELW == DATAW), ("invalid parameter")) `define RAM_INITIALIZATION \ - if (INIT_ENABLE != 0) begin \ - if (INIT_FILE != "") begin \ + if (INIT_ENABLE != 0) begin : g_init \ + if (INIT_FILE != "") begin : g_file \ initial $readmemh(INIT_FILE, ram); \ - end else begin \ - initial \ + end else begin : g_value \ + initial begin \ for (integer i = 0; i < SIZE; ++i) \ ram[i] = INIT_VALUE; \ + end \ end \ end `UNUSED_PARAM (RW_ASSERT) `UNUSED_VAR (read) - if (WRENW > 1) begin - `RUNTIME_ASSERT(~write || (| wren), ("invalid write enable mask")); - end - - wire [DATAW-1:0] rdata_w; + `RUNTIME_ASSERT((((WRENW == 1) ) || ~write) || (| wren), ("%t: invalid write enable mask", $time)) -`ifdef SYNTHESIS - if (WRENW > 1) begin - `ifdef QUARTUS - if (LUTRAM != 0) begin - `USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - for (integer i = 0; i < WRENW; ++i) begin - if (wren[i]) - ram[waddr][i] <= wdata[i * WSELW +: WSELW]; + if (OUT_REG && !READ_ENABLE) begin : g_out_reg + `UNUSED_PARAM (NO_RWCHECK) + reg [DATAW-1:0] rdata_r; + wire cs = read || write; + if (WRENW != 1) begin : g_writeen + `ifdef QUARTUS + if (LUTRAM != 0) begin : g_lutram + `USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (cs) begin + if (write) begin + for (integer i = 0; i < WRENW; ++i) begin + if (wren[i]) + ram[waddr][i] <= wdata[i * WSELW +: WSELW]; + end + end + if (RESET_OUT && reset) begin + rdata_r <= '0; + end else begin + rdata_r <= ram[raddr]; + end + end + end + end else begin : g_no_lutram + reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (cs) begin + if (write) begin + for (integer i = 0; i < WRENW; ++i) begin + if (wren[i]) + ram[waddr][i] <= wdata[i * WSELW +: WSELW]; + end + end + if (RESET_OUT && reset) begin + rdata_r <= '0; + end else begin + rdata_r <= ram[raddr]; + end end end end - assign rdata_w = ram[raddr]; - end else begin - if (NO_RWCHECK != 0) begin - `NO_RW_RAM_CHECK reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1]; + `else + // default synthesis + if (LUTRAM != 0) begin : g_lutram + `USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin - if (write) begin - for (integer i = 0; i < WRENW; ++i) begin - if (wren[i]) - ram[waddr][i] <= wdata[i * WSELW +: WSELW]; + if (cs) begin + if (write) begin + for (integer i = 0; i < WRENW; ++i) begin + if (wren[i]) + ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; + end + end + if (RESET_OUT && reset) begin + rdata_r <= '0; + end else begin + rdata_r <= ram[raddr]; end end end - assign rdata_w = ram[raddr]; - end else begin - reg [WRENW-1:0][WSELW-1:0] ram [ADDR_MIN:SIZE-1]; + end else begin : g_no_lutram + reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin - if (write) begin - for (integer i = 0; i < WRENW; ++i) begin - if (wren[i]) - ram[waddr][i] <= wdata[i * WSELW +: WSELW]; + if (cs) begin + if (write) begin + for (integer i = 0; i < WRENW; ++i) begin + if (wren[i]) + ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; + end + end + if (RESET_OUT && reset) begin + rdata_r <= '0; + end else begin + rdata_r <= ram[raddr]; end end end - assign rdata_w = ram[raddr]; end - end - `else - // default synthesis - if (LUTRAM != 0) begin - `USE_FAST_BRAM reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - for (integer i = 0; i < WRENW; ++i) begin - if (wren[i]) - ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; + `endif + end else begin : g_no_writeen + if (LUTRAM != 0) begin : g_lutram + `USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (cs) begin + if (write) + ram[waddr] <= wdata; + if (RESET_OUT && reset) begin + rdata_r <= '0; + end else begin + rdata_r <= ram[raddr]; + end + end + end + + end else begin : g_no_lutram + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (cs) begin + if (write) + ram[waddr] <= wdata; + if (RESET_OUT && reset) begin + rdata_r <= '0; + end else begin + rdata_r <= ram[raddr]; + end end end end - assign rdata_w = ram[raddr]; - end else begin - if (NO_RWCHECK != 0) begin - `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; + end + assign rdata = rdata_r; + end else begin : g_no_out_reg + // OUT_REG==0 || READ_ENABLE=1 + wire [DATAW-1:0] rdata_w; + `ifdef SYNTHESIS + if (WRENW > 1) begin : g_writeen + `ifdef QUARTUS + if (LUTRAM != 0) begin : g_lutram + `USE_FAST_BRAM reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin for (integer i = 0; i < WRENW; ++i) begin if (wren[i]) - ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; + ram[waddr][i] <= wdata[i * WSELW +: WSELW]; end end end assign rdata_w = ram[raddr]; - end else begin - reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; + end else begin : g_no_lutram + if (NO_RWCHECK != 0) begin : g_no_rwcheck + `NO_RW_RAM_CHECK reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + for (integer i = 0; i < WRENW; ++i) begin + if (wren[i]) + ram[waddr][i] <= wdata[i * WSELW +: WSELW]; + end + end + end + assign rdata_w = ram[raddr]; + end else begin : g_rwcheck + reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + for (integer i = 0; i < WRENW; ++i) begin + if (wren[i]) + ram[waddr][i] <= wdata[i * WSELW +: WSELW]; + end + end + end + assign rdata_w = ram[raddr]; + end + end + `else + // default synthesis + if (LUTRAM != 0) begin : g_lutram + `USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin @@ -144,23 +235,38 @@ module VX_dp_ram #( end end assign rdata_w = ram[raddr]; - end - end - `endif - end else begin - // (WRENW == 1) - if (LUTRAM != 0) begin - `USE_FAST_BRAM reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - ram[waddr] <= wdata; + end else begin : g_no_lutram + if (NO_RWCHECK != 0) begin : g_no_rwcheck + `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + for (integer i = 0; i < WRENW; ++i) begin + if (wren[i]) + ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; + end + end + end + assign rdata_w = ram[raddr]; + end else begin : g_rwcheck + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + for (integer i = 0; i < WRENW; ++i) begin + if (wren[i]) + ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; + end + end + end + assign rdata_w = ram[raddr]; end end - assign rdata_w = ram[raddr]; - end else begin - if (NO_RWCHECK != 0) begin - `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; + `endif + end else begin : g_no_writeen + // (WRENW == 1) + if (LUTRAM != 0) begin : g_lutram + `USE_FAST_BRAM reg [DATAW-1:0] ram [0:SIZE-1]; `RAM_INITIALIZATION always @(posedge clk) begin if (write) begin @@ -168,78 +274,90 @@ module VX_dp_ram #( end end assign rdata_w = ram[raddr]; - end else begin - reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; - `RAM_INITIALIZATION - always @(posedge clk) begin - if (write) begin - ram[waddr] <= wdata; + end else begin : g_no_lutram + if (NO_RWCHECK != 0) begin : g_no_rwcheck + `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + ram[waddr] <= wdata; + end end + assign rdata_w = ram[raddr]; + end else begin : g_rwcheck + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION + always @(posedge clk) begin + if (write) begin + ram[waddr] <= wdata; + end + end + assign rdata_w = ram[raddr]; end - assign rdata_w = ram[raddr]; end end - end -`else - // simulation - reg [DATAW-1:0] ram [ADDR_MIN:SIZE-1]; - `RAM_INITIALIZATION - - wire [DATAW-1:0] ram_n; - for (genvar i = 0; i < WRENW; ++i) begin - assign ram_n[i * WSELW +: WSELW] = ((WRENW == 1) | wren[i]) ? wdata[i * WSELW +: WSELW] : ram[waddr][i * WSELW +: WSELW]; - end + `else + // simulation + reg [DATAW-1:0] ram [0:SIZE-1]; + `RAM_INITIALIZATION - reg [DATAW-1:0] prev_data; - reg [ADDRW-1:0] prev_waddr; - reg prev_write; + wire [DATAW-1:0] ram_n; + for (genvar i = 0; i < WRENW; ++i) begin : g_ram_n + assign ram_n[i * WSELW +: WSELW] = ((WRENW == 1) | wren[i]) ? wdata[i * WSELW +: WSELW] : ram[waddr][i * WSELW +: WSELW]; + end - always @(posedge clk) begin - if (RESET_RAM && reset) begin - for (integer i = 0; i < SIZE; ++i) begin - ram[i] <= DATAW'(INIT_VALUE); - end - end else begin - if (write) begin - ram[waddr] <= ram_n; + always @(posedge clk) begin + if (RESET_RAM && reset) begin + for (integer i = 0; i < SIZE; ++i) begin + ram[i] <= DATAW'(INIT_VALUE); + end + end else begin + if (write) begin + ram[waddr] <= ram_n; + end end end - if (reset) begin - prev_write <= 0; - prev_data <= '0; - prev_waddr <= '0; - end else begin - prev_write <= write; - prev_data <= ram[waddr]; - prev_waddr <= waddr; - end - end - if (LUTRAM || !NO_RWCHECK) begin - `UNUSED_VAR (prev_write) - `UNUSED_VAR (prev_data) - `UNUSED_VAR (prev_waddr) - assign rdata_w = ram[raddr]; - end else begin - assign rdata_w = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr]; - if (RW_ASSERT) begin - `RUNTIME_ASSERT(~read || (rdata_w == ram[raddr]), ("read after write hazard")); + if (!LUTRAM && NO_RWCHECK) begin : g_rdata_no_bypass + reg [DATAW-1:0] prev_data; + reg [ADDRW-1:0] prev_waddr; + reg prev_write; + + always @(posedge clk) begin + if (reset) begin + prev_write <= 0; + prev_data <= '0; + prev_waddr <= '0; + end else begin + prev_write <= write; + prev_data <= ram[waddr]; + prev_waddr <= waddr; + end + end + + assign rdata_w = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr]; + if (RW_ASSERT) begin : g_rw_assert + `RUNTIME_ASSERT(~read || (rdata_w == ram[raddr]), ("%t: read after write hazard", $time)) + end + end else begin : g_rdata_with_bypass + assign rdata_w = ram[raddr]; end - end -`endif + `endif - if (OUT_REG != 0) begin - reg [DATAW-1:0] rdata_r; - always @(posedge clk) begin - if (READ_ENABLE && reset) begin - rdata_r <= '0; - end else if (!READ_ENABLE || read) begin - rdata_r <= rdata_w; + if (OUT_REG != 0) begin : g_rdata_req + reg [DATAW-1:0] rdata_r; + always @(posedge clk) begin + if (READ_ENABLE && reset) begin + rdata_r <= '0; + end else if (!READ_ENABLE || read) begin + rdata_r <= rdata_w; + end end + assign rdata = rdata_r; + end else begin : g_rdata_comb + assign rdata = rdata_w; end - assign rdata = rdata_r; - end else begin - assign rdata = rdata_w; + end endmodule diff --git a/hw/rtl/libs/VX_edge_trigger.sv b/hw/rtl/libs/VX_edge_trigger.sv new file mode 100644 index 000000000..9e876985c --- /dev/null +++ b/hw/rtl/libs/VX_edge_trigger.sv @@ -0,0 +1,43 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_platform.vh" + +`TRACING_OFF +module VX_edge_trigger #( + parameter POS = 0, + parameter INIT = 0 +) ( + input wire clk, + input wire reset, + input wire data_in, + output wire data_out +); + reg prev; + + always @(posedge clk) begin + if (reset) begin + prev <= INIT; + end else begin + prev <= data_in; + end + end + + if (POS != 0) begin : g_pos + assign data_out = data_in & ~prev; + end else begin : g_neg + assign data_out = ~data_in & prev; + end + +endmodule +`TRACING_ON diff --git a/hw/rtl/libs/VX_elastic_buffer.sv b/hw/rtl/libs/VX_elastic_buffer.sv index 9213572d3..c90aa0616 100644 --- a/hw/rtl/libs/VX_elastic_buffer.sv +++ b/hw/rtl/libs/VX_elastic_buffer.sv @@ -18,8 +18,7 @@ module VX_elastic_buffer #( parameter DATAW = 1, parameter SIZE = 1, parameter OUT_REG = 0, - parameter LUTRAM = 0, - parameter MAX_FANOUT = 0 + parameter LUTRAM = 0 ) ( input wire clk, input wire reset, @@ -32,7 +31,7 @@ module VX_elastic_buffer #( input wire ready_out, output wire valid_out ); - if (SIZE == 0) begin + if (SIZE == 0) begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) @@ -41,47 +40,11 @@ module VX_elastic_buffer #( assign data_out = data_in; assign ready_in = ready_out; - end else if (MAX_FANOUT != 0 && (DATAW > (MAX_FANOUT + MAX_FANOUT/2))) begin - - localparam NUM_SLICES = `CDIV(DATAW, MAX_FANOUT); - localparam N_DATAW = DATAW / NUM_SLICES; - - for (genvar i = 0; i < NUM_SLICES; ++i) begin - - localparam S_DATAW = (i == NUM_SLICES-1) ? (DATAW - i * N_DATAW) : N_DATAW; - - wire valid_out_t, ready_in_t; - `UNUSED_VAR (valid_out_t) - `UNUSED_VAR (ready_in_t) - - `RESET_RELAY (slice_reset, reset); - - VX_elastic_buffer #( - .DATAW (S_DATAW), - .SIZE (SIZE), - .OUT_REG (OUT_REG), - .LUTRAM (LUTRAM) - ) buffer_slice ( - .clk (clk), - .reset (slice_reset), - .valid_in (valid_in), - .data_in (data_in[i * N_DATAW +: S_DATAW]), - .ready_in (ready_in_t), - .valid_out (valid_out_t), - .data_out (data_out[i * N_DATAW +: S_DATAW]), - .ready_out (ready_out) - ); - - if (i == 0) begin - assign ready_in = ready_in_t; - assign valid_out = valid_out_t; - end - end - - end else if (SIZE == 1) begin + end else if (SIZE == 1) begin : g_eb1 VX_pipe_buffer #( - .DATAW (DATAW) + .DATAW (DATAW), + .DEPTH (`MAX(OUT_REG, 1)) ) pipe_buffer ( .clk (clk), .reset (reset), @@ -93,32 +56,51 @@ module VX_elastic_buffer #( .ready_out (ready_out) ); - end else if (SIZE == 2 && LUTRAM == 0) begin + end else if (SIZE == 2 && LUTRAM == 0) begin : g_eb2 + + wire valid_out_t; + wire [DATAW-1:0] data_out_t; + wire ready_out_t; - VX_skid_buffer #( + VX_stream_buffer #( .DATAW (DATAW), - .HALF_BW (OUT_REG == 2), - .OUT_REG (OUT_REG) - ) skid_buffer ( + .OUT_REG (OUT_REG == 1) + ) stream_buffer ( .clk (clk), .reset (reset), .valid_in (valid_in), .data_in (data_in), .ready_in (ready_in), + .valid_out (valid_out_t), + .data_out (data_out_t), + .ready_out (ready_out_t) + ); + + VX_pipe_buffer #( + .DATAW (DATAW), + .DEPTH ((OUT_REG > 1) ? (OUT_REG-1) : 0) + ) out_buf ( + .clk (clk), + .reset (reset), + .valid_in (valid_out_t), + .data_in (data_out_t), + .ready_in (ready_out_t), .valid_out (valid_out), .data_out (data_out), .ready_out (ready_out) ); - end else begin + end else begin : g_ebN wire empty, full; wire [DATAW-1:0] data_out_t; wire ready_out_t; + wire valid_out_t = ~empty; + wire push = valid_in && ready_in; - wire pop = ~empty && ready_out_t; + wire pop = valid_out_t && ready_out_t; VX_fifo_queue #( .DATAW (DATAW), @@ -143,11 +125,11 @@ module VX_elastic_buffer #( VX_pipe_buffer #( .DATAW (DATAW), - .DEPTH ((OUT_REG > 0) ? (OUT_REG-1) : 0) + .DEPTH ((OUT_REG > 1) ? (OUT_REG-1) : 0) ) out_buf ( .clk (clk), .reset (reset), - .valid_in (~empty), + .valid_in (valid_out_t), .data_in (data_out_t), .ready_in (ready_out_t), .valid_out (valid_out), diff --git a/hw/rtl/libs/VX_onehot_encoder.sv b/hw/rtl/libs/VX_encoder.sv similarity index 69% rename from hw/rtl/libs/VX_onehot_encoder.sv rename to hw/rtl/libs/VX_encoder.sv index 8f7ada257..86ccad792 100644 --- a/hw/rtl/libs/VX_onehot_encoder.sv +++ b/hw/rtl/libs/VX_encoder.sv @@ -17,7 +17,7 @@ // Adapted from BaseJump STL: http://bjump.org/data_out.html `TRACING_OFF -module VX_onehot_encoder #( +module VX_encoder #( parameter N = 1, parameter REVERSE = 0, parameter MODEL = 1, @@ -27,44 +27,40 @@ module VX_onehot_encoder #( output wire [LN-1:0] data_out, output wire valid_out ); - if (N == 1) begin + if (N == 1) begin : g_n1 assign data_out = 0; assign valid_out = data_in; - end else if (N == 2) begin + end else if (N == 2) begin : g_n2 assign data_out = data_in[!REVERSE]; assign valid_out = (| data_in); - end else if (MODEL == 1) begin + end else if (MODEL == 1) begin : g_model1 localparam M = 1 << LN; `IGNORE_UNOPTFLAT_BEGIN - wire [LN-1:0][M-1:0] addr; - wire [LN:0][M-1:0] v; + wire [M-1:0] addr [LN]; + wire [M-1:0] v [LN+1]; `IGNORE_UNOPTFLAT_END // base case, also handle padding for non-power of two inputs assign v[0] = REVERSE ? (M'(data_in) << (M - N)) : M'(data_in); - for (genvar lvl = 1; lvl < (LN+1); ++lvl) begin + for (genvar lvl = 1; lvl < (LN+1); ++lvl) begin : g_scan_l localparam SN = 1 << (LN - lvl); localparam SI = M / SN; - localparam SW = lvl; - - for (genvar s = 0; s < SN; ++s) begin + for (genvar s = 0; s < SN; ++s) begin : g_scan_s `IGNORE_UNOPTFLAT_BEGIN wire [1:0] vs = {v[lvl-1][s*SI+(SI>>1)], v[lvl-1][s*SI]}; `IGNORE_UNOPTFLAT_END - assign v[lvl][s*SI] = (| vs); - - if (lvl == 1) begin - assign addr[lvl-1][s*SI +: SW] = vs[!REVERSE]; - end else begin - assign addr[lvl-1][s*SI +: SW] = { + if (lvl == 1) begin : g_lvl_1 + assign addr[lvl-1][s*SI +: lvl] = vs[!REVERSE]; + end else begin : g_lvl_n + assign addr[lvl-1][s*SI +: lvl] = { vs[!REVERSE], - addr[lvl-2][s*SI +: SW-1] | addr[lvl-2][s*SI+(SI>>1) +: SW-1] + addr[lvl-2][s*SI +: lvl-1] | addr[lvl-2][s*SI+(SI>>1) +: lvl-1] }; end end @@ -73,11 +69,11 @@ module VX_onehot_encoder #( assign data_out = addr[LN-1][LN-1:0]; assign valid_out = v[LN][0]; - end else if (MODEL == 2 && REVERSE == 0) begin + end else if (MODEL == 2 && REVERSE == 0) begin : g_model2 - for (genvar j = 0; j < LN; ++j) begin + for (genvar j = 0; j < LN; ++j) begin : g_data_out wire [N-1:0] mask; - for (genvar i = 0; i < N; ++i) begin + for (genvar i = 0; i < N; ++i) begin : g_mask assign mask[i] = i[j]; end assign data_out[j] = | (mask & data_in); @@ -85,31 +81,31 @@ module VX_onehot_encoder #( assign valid_out = (| data_in); - end else begin + end else begin : g_model0 - reg [LN-1:0] index_r; + reg [LN-1:0] index_w; - if (REVERSE != 0) begin + if (REVERSE != 0) begin : g_msb always @(*) begin - index_r = 'x; + index_w = 'x; for (integer i = N-1; i >= 0; --i) begin if (data_in[i]) begin - index_r = LN'(N-1-i); + index_w = LN'(N-1-i); end end end - end else begin + end else begin : g_lsb always @(*) begin - index_r = 'x; + index_w = 'x; for (integer i = 0; i < N; ++i) begin if (data_in[i]) begin - index_r = LN'(i); + index_w = LN'(i); end end end end - assign data_out = index_r; + assign data_out = index_w; assign valid_out = (| data_in); end diff --git a/hw/rtl/libs/VX_fair_arbiter.sv b/hw/rtl/libs/VX_fair_arbiter.sv deleted file mode 100644 index 82bcfc5c6..000000000 --- a/hw/rtl/libs/VX_fair_arbiter.sv +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -`include "VX_platform.vh" - -`TRACING_OFF -module VX_fair_arbiter #( - parameter NUM_REQS = 1, - parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS) -) ( - input wire clk, - input wire reset, - input wire [NUM_REQS-1:0] requests, - output wire [LOG_NUM_REQS-1:0] grant_index, - output wire [NUM_REQS-1:0] grant_onehot, - output wire grant_valid, - input wire grant_ready -); - if (NUM_REQS == 1) begin - - `UNUSED_VAR (clk) - `UNUSED_VAR (reset) - `UNUSED_VAR (grant_ready) - - assign grant_index = '0; - assign grant_onehot = requests; - assign grant_valid = requests[0]; - - end else begin - - reg [NUM_REQS-1:0] requests_r; - - wire [NUM_REQS-1:0] requests_sel = requests_r & requests; - wire [NUM_REQS-1:0] requests_qual = (| requests_sel) ? requests_sel : requests; - - always @(posedge clk) begin - if (reset) begin - requests_r <= '0; - end else if (grant_ready) begin - requests_r <= requests_qual & ~grant_onehot; - end - end - - VX_priority_arbiter #( - .NUM_REQS (NUM_REQS) - ) priority_arbiter ( - .requests (requests_qual), - .grant_index (grant_index), - .grant_onehot (grant_onehot), - .grant_valid (grant_valid) - ); - - end - -endmodule -`TRACING_ON diff --git a/hw/rtl/libs/VX_fifo_queue.sv b/hw/rtl/libs/VX_fifo_queue.sv index ea00d67c7..c5a4bf32e 100644 --- a/hw/rtl/libs/VX_fifo_queue.sv +++ b/hw/rtl/libs/VX_fifo_queue.sv @@ -36,225 +36,134 @@ module VX_fifo_queue #( output wire [SIZEW-1:0] size ); - localparam ADDRW = `CLOG2(DEPTH); - `STATIC_ASSERT(ALM_FULL > 0, ("alm_full must be greater than 0!")) `STATIC_ASSERT(ALM_FULL < DEPTH, ("alm_full must be smaller than size!")) `STATIC_ASSERT(ALM_EMPTY > 0, ("alm_empty must be greater than 0!")) `STATIC_ASSERT(ALM_EMPTY < DEPTH, ("alm_empty must be smaller than size!")) - `STATIC_ASSERT(`IS_POW2(DEPTH), ("size must be a power of 2!")) - - if (DEPTH == 1) begin + `STATIC_ASSERT(`IS_POW2(DEPTH), ("depth must be a power of 2!")) + + VX_pending_size #( + .SIZE (DEPTH), + .ALM_EMPTY (ALM_EMPTY), + .ALM_FULL (ALM_FULL) + ) pending_size ( + .clk (clk), + .reset (reset), + .incr (push), + .decr (pop), + .empty (empty), + .full (full), + .alm_empty(alm_empty), + .alm_full(alm_full), + .size (size) + ); + + if (DEPTH == 1) begin : g_depth_1 reg [DATAW-1:0] head_r; - reg size_r; - - always @(posedge clk) begin - if (reset) begin - head_r <= '0; - size_r <= '0; - end else begin - `ASSERT(~push || ~full, ("runtime error: writing to a full queue")); - `ASSERT(~pop || ~empty, ("runtime error: reading an empty queue")); - if (push) begin - if (~pop) begin - size_r <= 1; - end - end else if (pop) begin - size_r <= '0; - end - if (push) begin - head_r <= data_in; - end - end - end - - assign data_out = head_r; - assign empty = (size_r == 0); - assign alm_empty = 1'b1; - assign full = (size_r != 0); - assign alm_full = 1'b1; - assign size = size_r; - - end else begin - - reg empty_r, alm_empty_r; - reg full_r, alm_full_r; - reg [ADDRW-1:0] used_r; - wire [ADDRW-1:0] used_n; always @(posedge clk) begin - if (reset) begin - empty_r <= 1; - alm_empty_r <= 1; - full_r <= 0; - alm_full_r <= 0; - used_r <= '0; - end else begin - `ASSERT(~(push && ~pop) || ~full, ("runtime error: incrementing full queue")); - `ASSERT(~(pop && ~push) || ~empty, ("runtime error: decrementing empty queue")); - if (push) begin - if (~pop) begin - empty_r <= 0; - if (used_r == ADDRW'(ALM_EMPTY)) - alm_empty_r <= 0; - if (used_r == ADDRW'(DEPTH-1)) - full_r <= 1; - if (used_r == ADDRW'(ALM_FULL-1)) - alm_full_r <= 1; - end - end else if (pop) begin - full_r <= 0; - if (used_r == ADDRW'(ALM_FULL)) - alm_full_r <= 0; - if (used_r == ADDRW'(1)) - empty_r <= 1; - if (used_r == ADDRW'(ALM_EMPTY+1)) - alm_empty_r <= 1; - end - used_r <= used_n; + if (push) begin + head_r <= data_in; end end - if (DEPTH == 2 && LUTRAM == 0) begin + assign data_out = head_r; - assign used_n = used_r ^ (push ^ pop); + end else begin : g_depth_n - if (0 == OUT_REG) begin + localparam ADDRW = `CLOG2(DEPTH); - reg [1:0][DATAW-1:0] shift_reg; + if (OUT_REG != 0) begin : g_out_reg - always @(posedge clk) begin - if (push) begin - shift_reg[1] <= shift_reg[0]; - shift_reg[0] <= data_in; - end - end - - assign data_out = shift_reg[!used_r[0]]; - - end else begin + wire [DATAW-1:0] dout; + reg [DATAW-1:0] dout_r; + reg [ADDRW-1:0] wr_ptr_r; + reg [ADDRW-1:0] rd_ptr_r; + reg [ADDRW-1:0] rd_ptr_n_r; - reg [DATAW-1:0] data_out_r; - reg [DATAW-1:0] buffer; - - always @(posedge clk) begin - if (push) begin - buffer <= data_in; - end - if (push && (empty_r || (used_r && pop))) begin - data_out_r <= data_in; - end else if (pop) begin - data_out_r <= buffer; + always @(posedge clk) begin + if (reset) begin + wr_ptr_r <= '0; + rd_ptr_r <= '0; + rd_ptr_n_r <= 1; + end else begin + wr_ptr_r <= wr_ptr_r + ADDRW'(push); + if (pop) begin + rd_ptr_r <= rd_ptr_n_r; + if (DEPTH > 2) begin + rd_ptr_n_r <= rd_ptr_r + ADDRW'(2); + end else begin // (DEPTH == 2); + rd_ptr_n_r <= ~rd_ptr_n_r; + end end end - - assign data_out = data_out_r; - end - end else begin - - assign used_n = $signed(used_r) + ADDRW'($signed(2'(push) - 2'(pop))); - - if (0 == OUT_REG) begin - - reg [ADDRW-1:0] rd_ptr_r; - reg [ADDRW-1:0] wr_ptr_r; - - always @(posedge clk) begin - if (reset) begin - rd_ptr_r <= '0; - wr_ptr_r <= '0; - end else begin - wr_ptr_r <= wr_ptr_r + ADDRW'(push); - rd_ptr_r <= rd_ptr_r + ADDRW'(pop); - end + VX_dp_ram #( + .DATAW (DATAW), + .SIZE (DEPTH), + .LUTRAM (LUTRAM) + ) dp_ram ( + .clk (clk), + .reset (reset), + .read (1'b1), + .write (push), + .wren (1'b1), + .waddr (wr_ptr_r), + .wdata (data_in), + .raddr (rd_ptr_n_r), + .rdata (dout) + ); + + wire going_empty = (ALM_EMPTY == 1) ? alm_empty : (size[ADDRW-1:0] == ADDRW'(1)); + + always @(posedge clk) begin + if (push && (empty || (going_empty && pop))) begin + dout_r <= data_in; + end else if (pop) begin + dout_r <= dout; end + end - VX_dp_ram #( - .DATAW (DATAW), - .SIZE (DEPTH), - .LUTRAM (LUTRAM) - ) dp_ram ( - .clk (clk), - .reset (reset), - .read (1'b1), - .write (push), - .wren (1'b1), - .waddr (wr_ptr_r), - .wdata (data_in), - .raddr (rd_ptr_r), - .rdata (data_out) - ); - - end else begin + assign data_out = dout_r; - wire [DATAW-1:0] dout; - reg [DATAW-1:0] dout_r; - reg [ADDRW-1:0] wr_ptr_r; - reg [ADDRW-1:0] rd_ptr_r; - reg [ADDRW-1:0] rd_ptr_n_r; + end else begin : g_no_out_reg - always @(posedge clk) begin - if (reset) begin - wr_ptr_r <= '0; - rd_ptr_r <= '0; - rd_ptr_n_r <= 1; - end else begin - wr_ptr_r <= wr_ptr_r + ADDRW'(push); - if (pop) begin - rd_ptr_r <= rd_ptr_n_r; - if (DEPTH > 2) begin - rd_ptr_n_r <= rd_ptr_r + ADDRW'(2); - end else begin // (DEPTH == 2); - rd_ptr_n_r <= ~rd_ptr_n_r; - end - end - end - end + reg [ADDRW-1:0] rd_ptr_r; + reg [ADDRW-1:0] wr_ptr_r; - wire going_empty; - if (ALM_EMPTY == 1) begin - assign going_empty = alm_empty_r; + always @(posedge clk) begin + if (reset) begin + rd_ptr_r <= '0; + wr_ptr_r <= '0; end else begin - assign going_empty = (used_r == ADDRW'(1)); + wr_ptr_r <= wr_ptr_r + ADDRW'(push); + rd_ptr_r <= rd_ptr_r + ADDRW'(pop); end + end - VX_dp_ram #( - .DATAW (DATAW), - .SIZE (DEPTH), - .LUTRAM (LUTRAM) - ) dp_ram ( - .clk (clk), - .reset (reset), - .read (1'b1), - .write (push), - .wren (1'b1), - .waddr (wr_ptr_r), - .wdata (data_in), - .raddr (rd_ptr_n_r), - .rdata (dout) - ); + VX_dp_ram #( + .DATAW (DATAW), + .SIZE (DEPTH), + .LUTRAM (LUTRAM) + ) dp_ram ( + .clk (clk), + .reset (reset), + .read (1'b1), + .write (push), + .wren (1'b1), + .waddr (wr_ptr_r), + .wdata (data_in), + .raddr (rd_ptr_r), + .rdata (data_out) + ); - always @(posedge clk) begin - if (push && (empty_r || (going_empty && pop))) begin - dout_r <= data_in; - end else if (pop) begin - dout_r <= dout; - end - end - - assign data_out = dout_r; - end end - - assign empty = empty_r; - assign alm_empty = alm_empty_r; - assign full = full_r; - assign alm_full = alm_full_r; - assign size = {full_r, used_r}; end + `RUNTIME_ASSERT(~(push && ~pop) || ~full, ("%t: runtime error: incrementing full queue", $time)) + `RUNTIME_ASSERT(~(pop && ~push) || ~empty, ("%t: runtime error: decrementing empty queue", $time)) + endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_find_first.sv b/hw/rtl/libs/VX_find_first.sv index 18f345855..2a1714e18 100644 --- a/hw/rtl/libs/VX_find_first.sv +++ b/hw/rtl/libs/VX_find_first.sv @@ -28,27 +28,29 @@ module VX_find_first #( localparam TL = (1 << LOGN) - 1; localparam TN = (1 << (LOGN+1)) - 1; -`IGNORE_WARNINGS_BEGIN - wire [TN-1:0] s_n; - wire [TN-1:0][DATAW-1:0] d_n; -`IGNORE_WARNINGS_END +`IGNORE_UNOPTFLAT_BEGIN + wire s_n [TN]; + wire [DATAW-1:0] d_n [TN]; +`IGNORE_UNOPTFLAT_END - for (genvar i = 0; i < N; ++i) begin + for (genvar i = 0; i < N; ++i) begin : g_reverse assign s_n[TL+i] = REVERSE ? valid_in[N-1-i] : valid_in[i]; assign d_n[TL+i] = REVERSE ? data_in[N-1-i] : data_in[i]; end - if (TL < (TN-N)) begin - for (genvar i = TL+N; i < TN; ++i) begin + if (TL < (TN-N)) begin : g_fill + for (genvar i = TL+N; i < TN; ++i) begin : g_i assign s_n[i] = 0; assign d_n[i] = '0; end end - for (genvar j = 0; j < LOGN; ++j) begin - for (genvar i = 0; i < (2**j); ++i) begin - assign s_n[2**j-1+i] = s_n[2**(j+1)-1+i*2] | s_n[2**(j+1)-1+i*2+1]; - assign d_n[2**j-1+i] = s_n[2**(j+1)-1+i*2] ? d_n[2**(j+1)-1+i*2] : d_n[2**(j+1)-1+i*2+1]; + for (genvar j = 0; j < LOGN; ++j) begin : g_scan + localparam I = 1 << j; + for (genvar i = 0; i < I; ++i) begin : g_i + localparam K = I+i-1; + assign s_n[K] = s_n[2*K+1] | s_n[2*K+2]; + assign d_n[K] = s_n[2*K+1] ? d_n[2*K+1] : d_n[2*K+2]; end end diff --git a/hw/rtl/libs/VX_generic_arbiter.sv b/hw/rtl/libs/VX_generic_arbiter.sv index a1f7be4a0..5e090ebdd 100644 --- a/hw/rtl/libs/VX_generic_arbiter.sv +++ b/hw/rtl/libs/VX_generic_arbiter.sv @@ -27,7 +27,7 @@ module VX_generic_arbiter #( output wire grant_valid, input wire grant_ready ); - if (TYPE == "P") begin + if (TYPE == "P") begin : g_priority `UNUSED_VAR (clk) `UNUSED_VAR (reset) @@ -42,7 +42,7 @@ module VX_generic_arbiter #( .grant_onehot (grant_onehot) ); - end else if (TYPE == "R") begin + end else if (TYPE == "R") begin : g_round_robin VX_rr_arbiter #( .NUM_REQS (NUM_REQS) @@ -56,21 +56,7 @@ module VX_generic_arbiter #( .grant_ready (grant_ready) ); - end else if (TYPE == "F") begin - - VX_fair_arbiter #( - .NUM_REQS (NUM_REQS) - ) fair_arbiter ( - .clk (clk), - .reset (reset), - .requests (requests), - .grant_valid (grant_valid), - .grant_index (grant_index), - .grant_onehot (grant_onehot), - .grant_ready (grant_ready) - ); - - end else if (TYPE == "M") begin + end else if (TYPE == "M") begin : g_matrix VX_matrix_arbiter #( .NUM_REQS (NUM_REQS) @@ -84,7 +70,7 @@ module VX_generic_arbiter #( .grant_ready (grant_ready) ); - end else if (TYPE == "C") begin + end else if (TYPE == "C") begin : g_cyclic VX_cyclic_arbiter #( .NUM_REQS (NUM_REQS) @@ -98,11 +84,13 @@ module VX_generic_arbiter #( .grant_ready (grant_ready) ); - end else begin + end else begin : g_invalid `ERROR(("invalid parameter")); end + `RUNTIME_ASSERT (((~(| requests) != 1) || (grant_valid && (requests[grant_index] != 0) && (grant_onehot == (NUM_REQS'(1) << grant_index)))), ("%t: invalid arbiter grant!", $time)) + endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_index_queue.sv b/hw/rtl/libs/VX_index_queue.sv index 23ec6ed83..e73db0ff9 100644 --- a/hw/rtl/libs/VX_index_queue.sv +++ b/hw/rtl/libs/VX_index_queue.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -20,9 +20,9 @@ module VX_index_queue #( ) ( input wire clk, input wire reset, - input wire [DATAW-1:0] write_data, + input wire [DATAW-1:0] write_data, output wire [`LOG2UP(SIZE)-1:0] write_addr, - input wire push, + input wire push, input wire pop, output wire full, output wire empty, @@ -30,33 +30,33 @@ module VX_index_queue #( output wire [DATAW-1:0] read_data ); reg [DATAW-1:0] entries [SIZE-1:0]; - reg [SIZE-1:0] valid; + reg [SIZE-1:0] valid; reg [`LOG2UP(SIZE):0] rd_ptr, wr_ptr; wire [`LOG2UP(SIZE)-1:0] rd_a, wr_a; wire enqueue, dequeue; assign rd_a = rd_ptr[`LOG2UP(SIZE)-1:0]; - assign wr_a = wr_ptr[`LOG2UP(SIZE)-1:0]; + assign wr_a = wr_ptr[`LOG2UP(SIZE)-1:0]; assign empty = (wr_ptr == rd_ptr); assign full = (wr_a == rd_a) && (wr_ptr[`LOG2UP(SIZE)] != rd_ptr[`LOG2UP(SIZE)]); - assign enqueue = push; + assign enqueue = push; assign dequeue = !empty && !valid[rd_a]; // auto-remove when head is invalid - `RUNTIME_ASSERT(!push || !full, ("%t: *** invalid inputs", $time)); - + `RUNTIME_ASSERT(!push || !full, ("%t: *** invalid inputs", $time)) + always @(posedge clk) begin if (reset) begin rd_ptr <= '0; wr_ptr <= '0; - valid <= '0; + valid <= '0; end else begin if (enqueue) begin valid[wr_a] <= 1; wr_ptr <= wr_ptr + 1; - end + end if (dequeue) begin rd_ptr <= rd_ptr + 1; end @@ -67,7 +67,7 @@ module VX_index_queue #( if (enqueue) begin entries[wr_a] <= write_data; - end + end end assign write_addr = wr_a; diff --git a/hw/rtl/libs/VX_lzc.sv b/hw/rtl/libs/VX_lzc.sv index 2589bf5a7..af2cb650d 100644 --- a/hw/rtl/libs/VX_lzc.sv +++ b/hw/rtl/libs/VX_lzc.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -23,18 +23,18 @@ module VX_lzc #( output wire [LOGN-1:0] data_out, output wire valid_out ); - if (N == 1) begin + if (N == 1) begin : g_passthru `UNUSED_PARAM (REVERSE) assign data_out = '0; assign valid_out = data_in; - end else begin + end else begin : g_lzc wire [N-1:0][LOGN-1:0] indices; - for (genvar i = 0; i < N; ++i) begin + for (genvar i = 0; i < N; ++i) begin : g_indices assign indices[i] = REVERSE ? LOGN'(i) : LOGN'(N-1-i); end @@ -42,7 +42,7 @@ module VX_lzc #( .N (N), .DATAW (LOGN), .REVERSE (!REVERSE) - ) find_first ( + ) find_first ( .data_in (indices), .valid_in (data_in), .data_out (data_out), @@ -50,6 +50,6 @@ module VX_lzc #( ); end - + endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_matrix_arbiter.sv b/hw/rtl/libs/VX_matrix_arbiter.sv index 23f9ea2a0..2840ef43e 100644 --- a/hw/rtl/libs/VX_matrix_arbiter.sv +++ b/hw/rtl/libs/VX_matrix_arbiter.sv @@ -26,7 +26,7 @@ module VX_matrix_arbiter #( output wire grant_valid, input wire grant_ready ); - if (NUM_REQS == 1) begin + if (NUM_REQS == 1) begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) @@ -36,59 +36,49 @@ module VX_matrix_arbiter #( assign grant_onehot = requests; assign grant_valid = requests[0]; - end else begin + end else begin : g_arbiter - reg [NUM_REQS-1:1] state [NUM_REQS-1:0]; + reg [NUM_REQS-1:1] state [NUM_REQS-1:0]; wire [NUM_REQS-1:0] pri [NUM_REQS-1:0]; - wire [NUM_REQS-1:0] grant_unqual; + wire [NUM_REQS-1:0] grant; - for (genvar i = 0; i < NUM_REQS; ++i) begin - for (genvar j = 0; j < NUM_REQS; ++j) begin - if (j > i) begin - assign pri[j][i] = requests[i] && state[i][j]; - end - else if (j < i) begin - assign pri[j][i] = requests[i] && !state[j][i]; - end - else begin - assign pri[j][i] = 0; + for (genvar r = 0; r < NUM_REQS; ++r) begin : g_pri_r + for (genvar c = 0; c < NUM_REQS; ++c) begin : g_pri_c + if (r > c) begin : g_row + assign pri[r][c] = requests[c] && state[c][r]; + end else if (r < c) begin : g_col + assign pri[r][c] = requests[c] && !state[r][c]; + end else begin : g_equal + assign pri[r][c] = 0; end end - assign grant_unqual[i] = requests[i] && !(| pri[i]); end - for (genvar i = 0; i < NUM_REQS; ++i) begin - for (genvar j = i + 1; j < NUM_REQS; ++j) begin + for (genvar r = 0; r < NUM_REQS; ++r) begin : g_grant + assign grant[r] = requests[r] && ~(| pri[r]); + end + + for (genvar r = 0; r < NUM_REQS; ++r) begin : g_state_r + for (genvar c = r + 1; c < NUM_REQS; ++c) begin : g_state_c always @(posedge clk) begin if (reset) begin - state[i][j] <= '0; - end else begin - state[i][j] <= (state[i][j] || grant_unqual[j]) && !grant_unqual[i]; + state[r][c] <= '0; + end else if (grant_ready) begin + state[r][c] <= (state[r][c] || grant[c]) && ~grant[r]; end end end end - reg [NUM_REQS-1:0] grant_unqual_prev; - always @(posedge clk) begin - if (reset) begin - grant_unqual_prev <= '0; - end else if (grant_ready) begin - grant_unqual_prev <= grant_unqual; - end - end - assign grant_onehot = grant_ready ? grant_unqual : grant_unqual_prev; + assign grant_onehot = grant; - VX_onehot_encoder #( + VX_encoder #( .N (NUM_REQS) ) encoder ( - .data_in (grant_unqual), - .data_out (grant_index), - `UNUSED_PIN (valid_out) + .data_in (grant_onehot), + .data_out (grant_index), + .valid_out (grant_valid) ); - - assign grant_valid = (| requests); - end endmodule diff --git a/hw/rtl/libs/VX_mem_adapter.sv b/hw/rtl/libs/VX_mem_adapter.sv index 263df0159..4ece7cf69 100644 --- a/hw/rtl/libs/VX_mem_adapter.sv +++ b/hw/rtl/libs/VX_mem_adapter.sv @@ -53,14 +53,16 @@ module VX_mem_adapter #( input wire [DST_TAG_WIDTH-1:0] mem_rsp_tag_out, output wire mem_rsp_ready_out ); - `STATIC_ASSERT ((DST_TAG_WIDTH >= SRC_TAG_WIDTH), ("oops!")) - localparam DST_DATA_SIZE = (DST_DATA_WIDTH / 8); localparam DST_LDATAW = `CLOG2(DST_DATA_WIDTH); localparam SRC_LDATAW = `CLOG2(SRC_DATA_WIDTH); localparam D = `ABS(DST_LDATAW - SRC_LDATAW); localparam P = 2**D; + localparam EXPECTED_TAG_WIDTH = SRC_TAG_WIDTH + ((DST_LDATAW > SRC_LDATAW) ? D : 0); + + `STATIC_ASSERT(DST_TAG_WIDTH >= EXPECTED_TAG_WIDTH, ("invalid DST_TAG_WIDTH parameter, current=%0d, expected=%0d", DST_TAG_WIDTH, EXPECTED_TAG_WIDTH)) + wire mem_req_valid_out_w; wire [DST_ADDR_WIDTH-1:0] mem_req_addr_out_w; wire mem_req_rw_out_w; @@ -74,9 +76,10 @@ module VX_mem_adapter #( wire [SRC_TAG_WIDTH-1:0] mem_rsp_tag_in_w; wire mem_rsp_ready_in_w; + `UNUSED_VAR (mem_req_tag_in) `UNUSED_VAR (mem_rsp_tag_out) - if (DST_LDATAW > SRC_LDATAW) begin + if (DST_LDATAW > SRC_LDATAW) begin : g_wider_dst_data `UNUSED_VAR (clk) `UNUSED_VAR (reset) @@ -88,28 +91,44 @@ module VX_mem_adapter #( wire [P-1:0][SRC_DATA_WIDTH-1:0] mem_rsp_data_out_w = mem_rsp_data_out; - if (DST_ADDR_WIDTH < (SRC_ADDR_WIDTH - D)) begin + if (DST_ADDR_WIDTH < (SRC_ADDR_WIDTH - D)) begin : g_mem_req_addr_out_w_src `UNUSED_VAR (mem_req_addr_in_qual) assign mem_req_addr_out_w = mem_req_addr_in_qual[DST_ADDR_WIDTH-1:0]; - end else if (DST_ADDR_WIDTH > (SRC_ADDR_WIDTH - D)) begin + end else if (DST_ADDR_WIDTH > (SRC_ADDR_WIDTH - D)) begin : g_mem_req_addr_out_w_dst assign mem_req_addr_out_w = DST_ADDR_WIDTH'(mem_req_addr_in_qual); - end else begin + end else begin : g_mem_req_addr_out_w assign mem_req_addr_out_w = mem_req_addr_in_qual; end + VX_decoder #( + .N (D), + .M (SRC_DATA_WIDTH/8) + ) req_be_dec ( + .data_in (req_idx), + .valid_in (mem_req_byteen_in), + .data_out (mem_req_byteen_out_w) + ); + + VX_decoder #( + .N (D), + .M (SRC_DATA_WIDTH) + ) req_data_dec ( + .data_in (req_idx), + .valid_in (mem_req_data_in), + .data_out (mem_req_data_out_w) + ); + assign mem_req_valid_out_w = mem_req_valid_in; assign mem_req_rw_out_w = mem_req_rw_in; - assign mem_req_byteen_out_w = DST_DATA_SIZE'(mem_req_byteen_in) << ((DST_LDATAW-3)'(req_idx) << (SRC_LDATAW-3)); - assign mem_req_data_out_w = DST_DATA_WIDTH'(mem_req_data_in) << ((DST_LDATAW'(req_idx)) << SRC_LDATAW); assign mem_req_tag_out_w = DST_TAG_WIDTH'({mem_req_tag_in, req_idx}); assign mem_req_ready_in = mem_req_ready_out_w; assign mem_rsp_valid_in_w = mem_rsp_valid_out; assign mem_rsp_data_in_w = mem_rsp_data_out_w[rsp_idx]; - assign mem_rsp_tag_in_w = SRC_TAG_WIDTH'(mem_rsp_tag_out[SRC_TAG_WIDTH+D-1:D]); + assign mem_rsp_tag_in_w = SRC_TAG_WIDTH'(mem_rsp_tag_out[DST_TAG_WIDTH-1:D]); assign mem_rsp_ready_out = mem_rsp_ready_in_w; - end else if (DST_LDATAW < SRC_LDATAW) begin + end else if (DST_LDATAW < SRC_LDATAW) begin : g_wider_src_data reg [D-1:0] req_ctr, rsp_ctr; @@ -153,16 +172,16 @@ module VX_mem_adapter #( end assign mem_rsp_tag_in_x = (rsp_ctr != 0) ? mem_rsp_tag_in_r : mem_rsp_tag_out; `RUNTIME_ASSERT(!mem_rsp_in_fire || (mem_rsp_tag_in_x == mem_rsp_tag_out), - ("%t: *** out-of-order memory reponse! cur=%d, expected=%d", $time, mem_rsp_tag_in_x, mem_rsp_tag_out)) + ("%t: *** out-of-order memory reponse! cur=0x%0h, expected=0x%0h", $time, mem_rsp_tag_in_x, mem_rsp_tag_out)) wire [SRC_ADDR_WIDTH+D-1:0] mem_req_addr_in_qual = {mem_req_addr_in, req_ctr}; - if (DST_ADDR_WIDTH < (SRC_ADDR_WIDTH + D)) begin + if (DST_ADDR_WIDTH < (SRC_ADDR_WIDTH + D)) begin : g_mem_req_addr_out_w_src `UNUSED_VAR (mem_req_addr_in_qual) assign mem_req_addr_out_w = mem_req_addr_in_qual[DST_ADDR_WIDTH-1:0]; - end else if (DST_ADDR_WIDTH > (SRC_ADDR_WIDTH + D)) begin + end else if (DST_ADDR_WIDTH > (SRC_ADDR_WIDTH + D)) begin : g_mem_req_addr_out_w_dst assign mem_req_addr_out_w = DST_ADDR_WIDTH'(mem_req_addr_in_qual); - end else begin + end else begin : g_mem_req_addr_out_w assign mem_req_addr_out_w = mem_req_addr_in_qual; end @@ -178,17 +197,17 @@ module VX_mem_adapter #( assign mem_rsp_tag_in_w = SRC_TAG_WIDTH'(mem_rsp_tag_out); assign mem_rsp_ready_out = mem_rsp_ready_in_w; - end else begin + end else begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) - if (DST_ADDR_WIDTH < SRC_ADDR_WIDTH) begin + if (DST_ADDR_WIDTH < SRC_ADDR_WIDTH) begin : g_mem_req_addr_out_w_src `UNUSED_VAR (mem_req_addr_in) assign mem_req_addr_out_w = mem_req_addr_in[DST_ADDR_WIDTH-1:0]; - end else if (DST_ADDR_WIDTH > SRC_ADDR_WIDTH) begin + end else if (DST_ADDR_WIDTH > SRC_ADDR_WIDTH) begin : g_mem_req_addr_out_w_dst assign mem_req_addr_out_w = DST_ADDR_WIDTH'(mem_req_addr_in); - end else begin + end else begin : g_mem_req_addr_out_w assign mem_req_addr_out_w = mem_req_addr_in; end diff --git a/hw/rtl/libs/VX_mem_coalescer.sv b/hw/rtl/libs/VX_mem_coalescer.sv index d1ffde09a..c27f04da4 100644 --- a/hw/rtl/libs/VX_mem_coalescer.sv +++ b/hw/rtl/libs/VX_mem_coalescer.sv @@ -18,7 +18,7 @@ module VX_mem_coalescer #( parameter `STRING INSTANCE_ID = "", parameter NUM_REQS = 1, parameter ADDR_WIDTH = 32, - parameter ATYPE_WIDTH = 1, + parameter FLAGS_WIDTH = 1, parameter DATA_IN_SIZE = 4, parameter DATA_OUT_SIZE = 64, parameter TAG_WIDTH = 8, @@ -43,7 +43,7 @@ module VX_mem_coalescer #( input wire [NUM_REQS-1:0] in_req_mask, input wire [NUM_REQS-1:0][DATA_IN_SIZE-1:0] in_req_byteen, input wire [NUM_REQS-1:0][ADDR_WIDTH-1:0] in_req_addr, - input wire [NUM_REQS-1:0][ATYPE_WIDTH-1:0] in_req_atype, + input wire [NUM_REQS-1:0][FLAGS_WIDTH-1:0] in_req_flags, input wire [NUM_REQS-1:0][DATA_IN_WIDTH-1:0] in_req_data, input wire [TAG_WIDTH-1:0] in_req_tag, output wire in_req_ready, @@ -61,7 +61,7 @@ module VX_mem_coalescer #( output wire [OUT_REQS-1:0] out_req_mask, output wire [OUT_REQS-1:0][DATA_OUT_SIZE-1:0] out_req_byteen, output wire [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr, - output wire [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype, + output wire [OUT_REQS-1:0][FLAGS_WIDTH-1:0] out_req_flags, output wire [OUT_REQS-1:0][DATA_OUT_WIDTH-1:0] out_req_data, output wire [OUT_TAG_WIDTH-1:0] out_req_tag, input wire out_req_ready, @@ -76,16 +76,15 @@ module VX_mem_coalescer #( `UNUSED_SPARAM (INSTANCE_ID) `STATIC_ASSERT (`IS_DIVISBLE(NUM_REQS * DATA_IN_WIDTH, DATA_OUT_WIDTH), ("invalid parameter")) `STATIC_ASSERT ((NUM_REQS * DATA_IN_WIDTH >= DATA_OUT_WIDTH), ("invalid parameter")) - `RUNTIME_ASSERT ((~in_req_valid || in_req_mask != 0), ("invalid request mask")); - `RUNTIME_ASSERT ((~out_rsp_valid || out_rsp_mask != 0), ("invalid request mask")); + `RUNTIME_ASSERT ((~in_req_valid || in_req_mask != 0), ("%t: invalid request mask", $time)) + `RUNTIME_ASSERT ((~out_rsp_valid || out_rsp_mask != 0), ("%t: invalid request mask", $time)) localparam TAG_ID_WIDTH = TAG_WIDTH - UUID_WIDTH; - localparam NUM_REQS_W = `LOG2UP(NUM_REQS); // tag + mask + offest localparam IBUF_DATA_WIDTH = TAG_ID_WIDTH + NUM_REQS + (NUM_REQS * DATA_RATIO_W); - localparam STATE_SETUP = 0; - localparam STATE_SEND = 1; + localparam STATE_WAIT = 0; + localparam STATE_SEND = 1; logic state_r, state_n; @@ -93,7 +92,7 @@ module VX_mem_coalescer #( logic out_req_rw_r, out_req_rw_n; logic [OUT_REQS-1:0] out_req_mask_r, out_req_mask_n; logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr_r, out_req_addr_n; - logic [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype_r, out_req_atype_n; + logic [OUT_REQS-1:0][FLAGS_WIDTH-1:0] out_req_flags_r, out_req_flags_n; logic [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] out_req_byteen_r, out_req_byteen_n; logic [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] out_req_data_r, out_req_data_n; logic [OUT_TAG_WIDTH-1:0] out_req_tag_r, out_req_tag_n; @@ -111,95 +110,94 @@ module VX_mem_coalescer #( logic [OUT_REQS-1:0] batch_valid_r, batch_valid_n; logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] seed_addr_r, seed_addr_n; - logic [OUT_REQS-1:0][ATYPE_WIDTH-1:0] seed_atype_r, seed_atype_n; + logic [OUT_REQS-1:0][FLAGS_WIDTH-1:0] seed_flags_r, seed_flags_n; logic [NUM_REQS-1:0] addr_matches_r, addr_matches_n; - logic [NUM_REQS-1:0] processed_mask_r, processed_mask_n; + logic [NUM_REQS-1:0] req_rem_mask_r, req_rem_mask_n; - wire [OUT_REQS-1:0][NUM_REQS_W-1:0] seed_idx; - - wire [NUM_REQS-1:0][OUT_ADDR_WIDTH-1:0] in_addr_base; wire [NUM_REQS-1:0][DATA_RATIO_W-1:0] in_addr_offset; - for (genvar i = 0; i < NUM_REQS; i++) begin - assign in_addr_base[i] = in_req_addr[i][ADDR_WIDTH-1:DATA_RATIO_W]; + for (genvar i = 0; i < NUM_REQS; i++) begin : g_in_addr_offset assign in_addr_offset[i] = in_req_addr[i][DATA_RATIO_W-1:0]; end - for (genvar i = 0; i < OUT_REQS; ++i) begin - wire [DATA_RATIO-1:0] batch_mask = in_req_mask[i * DATA_RATIO +: DATA_RATIO] & ~processed_mask_r[i * DATA_RATIO +: DATA_RATIO]; + for (genvar i = 0; i < OUT_REQS; ++i) begin : g_seed_gen + wire [DATA_RATIO-1:0] batch_mask; wire [DATA_RATIO_W-1:0] batch_idx; + + assign batch_mask = in_req_mask[i * DATA_RATIO +: DATA_RATIO] & req_rem_mask_r[i * DATA_RATIO +: DATA_RATIO]; + VX_priority_encoder #( .N (DATA_RATIO) ) priority_encoder ( - .data_in (batch_mask), - .index (batch_idx), - `UNUSED_PIN (onehot), - .valid_out (batch_valid_n[i]) + .data_in (batch_mask), + .index_out (batch_idx), + `UNUSED_PIN (onehot_out), + .valid_out (batch_valid_n[i]) ); - if (OUT_REQS > 1) begin - assign seed_idx[i] = {(NUM_REQS_W-DATA_RATIO_W)'(i), batch_idx}; - end else begin - assign seed_idx[i] = batch_idx; + + wire [DATA_RATIO-1:0][OUT_ADDR_WIDTH-1:0] addr_base; + for (genvar j = 0; j < DATA_RATIO; ++j) begin : g_addr_base + assign addr_base[j] = in_req_addr[DATA_RATIO * i + j][ADDR_WIDTH-1:DATA_RATIO_W]; end - end - for (genvar i = 0; i < OUT_REQS; ++i) begin - assign seed_addr_n[i] = in_addr_base[seed_idx[i]]; - assign seed_atype_n[i] = in_req_atype[seed_idx[i]]; - end + wire [DATA_RATIO-1:0][FLAGS_WIDTH-1:0] req_flags; + for (genvar j = 0; j < DATA_RATIO; ++j) begin : g_req_flags + assign req_flags[j] = in_req_flags[DATA_RATIO * i + j]; + end + + assign seed_addr_n[i] = addr_base[batch_idx]; + assign seed_flags_n[i] = req_flags[batch_idx]; - for (genvar i = 0; i < OUT_REQS; ++i) begin - for (genvar j = 0; j < DATA_RATIO; ++j) begin - assign addr_matches_n[i * DATA_RATIO + j] = (in_addr_base[i * DATA_RATIO + j] == seed_addr_n[i]); + for (genvar j = 0; j < DATA_RATIO; ++j) begin : g_addr_matches_n + assign addr_matches_n[i * DATA_RATIO + j] = (addr_base[j] == seed_addr_n[i]); end end wire [NUM_REQS-1:0] current_pmask = in_req_mask & addr_matches_r; - reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] req_byteen_merged; - reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] req_data_merged; + wire [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] req_byteen_merged; + wire [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] req_data_merged; - always @(*) begin - req_byteen_merged = '0; - req_data_merged = 'x; - for (integer i = 0; i < OUT_REQS; ++i) begin + for (genvar i = 0; i < OUT_REQS; ++i) begin : g_data_merged + reg [DATA_RATIO-1:0][DATA_IN_SIZE-1:0] byteen_merged; + reg [DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] data_merged; + always @(*) begin + byteen_merged = '0; + data_merged = 'x; for (integer j = 0; j < DATA_RATIO; ++j) begin - if (current_pmask[i * DATA_RATIO + j]) begin - for (integer k = 0; k < DATA_IN_SIZE; ++k) begin - if (in_req_byteen[DATA_RATIO * i + j][k]) begin - req_byteen_merged[i][in_addr_offset[DATA_RATIO * i + j]][k] = 1'b1; - req_data_merged[i][in_addr_offset[DATA_RATIO * i + j]][k * 8 +: 8] = in_req_data[DATA_RATIO * i + j][k * 8 +: 8]; - end + for (integer k = 0; k < DATA_IN_SIZE; ++k) begin + // perform byte-level merge since each thread may have different bytes enabled + if (current_pmask[i * DATA_RATIO + j] && in_req_byteen[DATA_RATIO * i + j][k]) begin + byteen_merged[in_addr_offset[DATA_RATIO * i + j]][k] = 1'b1; + data_merged[in_addr_offset[DATA_RATIO * i + j]][k * 8 +: 8] = in_req_data[DATA_RATIO * i + j][k * 8 +: 8]; end end end end + assign req_byteen_merged[i] = byteen_merged; + assign req_data_merged[i] = data_merged; end - wire [OUT_REQS * DATA_RATIO - 1:0] pending_mask; - for (genvar i = 0; i < OUT_REQS * DATA_RATIO; ++i) begin - assign pending_mask[i] = in_req_mask[i] && ~addr_matches_r[i] && ~processed_mask_r[i]; - end - wire batch_completed = ~(| pending_mask); + wire is_last_batch = ~(| (in_req_mask & ~addr_matches_r & req_rem_mask_r)); + + wire out_req_fire = out_req_valid && out_req_ready; always @(*) begin state_n = state_r; - out_req_valid_n = out_req_valid_r; out_req_mask_n = out_req_mask_r; out_req_rw_n = out_req_rw_r; out_req_addr_n = out_req_addr_r; - out_req_atype_n = out_req_atype_r; + out_req_flags_n = out_req_flags_r; out_req_byteen_n = out_req_byteen_r; out_req_data_n = out_req_data_r; out_req_tag_n = out_req_tag_r; - - processed_mask_n = processed_mask_r; + req_rem_mask_n = req_rem_mask_r; in_req_ready_n = 0; case (state_r) - STATE_SETUP: begin + STATE_WAIT: begin // wait for pending outgoing request to submit - if (out_req_valid && out_req_ready) begin + if (out_req_fire) begin out_req_valid_n = 0; end if (in_req_valid && ~out_req_valid_n && ~ibuf_full) begin @@ -207,37 +205,31 @@ module VX_mem_coalescer #( end end default/*STATE_SEND*/: begin + state_n = STATE_WAIT; out_req_valid_n = 1; out_req_mask_n = batch_valid_r; out_req_rw_n = in_req_rw; out_req_addr_n = seed_addr_r; - out_req_atype_n = seed_atype_r; + out_req_flags_n = seed_flags_r; out_req_byteen_n= req_byteen_merged; out_req_data_n = req_data_merged; out_req_tag_n = {in_req_tag[TAG_WIDTH-1 -: UUID_WIDTH], ibuf_waddr}; - - in_req_ready_n = batch_completed; - - if (batch_completed) begin - processed_mask_n = '0; - end else begin - processed_mask_n = processed_mask_r | current_pmask; - end - - state_n = STATE_SETUP; + req_rem_mask_n = is_last_batch ? '1 : (req_rem_mask_r & ~current_pmask); + in_req_ready_n = is_last_batch; end endcase end VX_pipe_register #( - .DATAW (1 + NUM_REQS + 1 + 1 + NUM_REQS + OUT_REQS * (1 + 1 + OUT_ADDR_WIDTH + ATYPE_WIDTH + OUT_ADDR_WIDTH + ATYPE_WIDTH + DATA_OUT_SIZE + DATA_OUT_WIDTH) + OUT_TAG_WIDTH), - .RESETW (1 + NUM_REQS + 1) + .DATAW (1 + NUM_REQS + 1 + 1 + NUM_REQS + OUT_REQS * (1 + 1 + OUT_ADDR_WIDTH + FLAGS_WIDTH + OUT_ADDR_WIDTH + FLAGS_WIDTH + DATA_OUT_SIZE + DATA_OUT_WIDTH) + OUT_TAG_WIDTH), + .RESETW (1 + NUM_REQS + 1), + .INIT_VALUE ({1'b0, {NUM_REQS{1'b1}}, 1'b0}) ) pipe_reg ( .clk (clk), .reset (reset), .enable (1'b1), - .data_in ({state_n, processed_mask_n, out_req_valid_n, out_req_rw_n, addr_matches_n, batch_valid_n, out_req_mask_n, seed_addr_n, seed_atype_n, out_req_addr_n, out_req_atype_n, out_req_byteen_n, out_req_data_n, out_req_tag_n}), - .data_out ({state_r, processed_mask_r, out_req_valid_r, out_req_rw_r, addr_matches_r, batch_valid_r, out_req_mask_r, seed_addr_r, seed_atype_r, out_req_addr_r, out_req_atype_r, out_req_byteen_r, out_req_data_r, out_req_tag_r}) + .data_in ({state_n, req_rem_mask_n, out_req_valid_n, out_req_rw_n, addr_matches_n, batch_valid_n, out_req_mask_n, seed_addr_n, seed_flags_n, out_req_addr_n, out_req_flags_n, out_req_byteen_n, out_req_data_n, out_req_tag_n}), + .data_out ({state_r, req_rem_mask_r, out_req_valid_r, out_req_rw_r, addr_matches_r, batch_valid_r, out_req_mask_r, seed_addr_r, seed_flags_r, out_req_addr_r, out_req_flags_r, out_req_byteen_r, out_req_data_r, out_req_tag_r}) ); wire out_rsp_fire = out_rsp_valid && out_rsp_ready; @@ -278,7 +270,7 @@ module VX_mem_coalescer #( assign out_req_mask = out_req_mask_r; assign out_req_byteen = out_req_byteen_r; assign out_req_addr = out_req_addr_r; - assign out_req_atype = out_req_atype_r; + assign out_req_flags = out_req_flags_r; assign out_req_data = out_req_data_r; assign out_req_tag = out_req_tag_r; @@ -306,12 +298,16 @@ module VX_mem_coalescer #( assign {ibuf_dout_tag, ibuf_dout_pmask, ibuf_dout_offset} = ibuf_dout; wire [NUM_REQS-1:0][DATA_IN_WIDTH-1:0] in_rsp_data_n; - wire [NUM_REQS-1:0] in_rsp_mask_n; + for (genvar i = 0; i < OUT_REQS; ++i) begin : g_in_rsp_data_n + for (genvar j = 0; j < DATA_RATIO; ++j) begin : g_j + assign in_rsp_data_n[i * DATA_RATIO + j] = out_rsp_data[i][ibuf_dout_offset[i * DATA_RATIO + j] * DATA_IN_WIDTH +: DATA_IN_WIDTH]; + end + end - for (genvar i = 0; i < OUT_REQS; ++i) begin - for (genvar j = 0; j < DATA_RATIO; ++j) begin + wire [NUM_REQS-1:0] in_rsp_mask_n; + for (genvar i = 0; i < OUT_REQS; ++i) begin : g_in_rsp_mask_n + for (genvar j = 0; j < DATA_RATIO; ++j) begin : g_j assign in_rsp_mask_n[i * DATA_RATIO + j] = out_rsp_mask[i] && ibuf_dout_pmask[i * DATA_RATIO + j]; - assign in_rsp_data_n[i * DATA_RATIO + j] = out_rsp_data[i][ibuf_dout_offset[i * DATA_RATIO + j] * DATA_IN_WIDTH +: DATA_IN_WIDTH]; end end @@ -325,11 +321,15 @@ module VX_mem_coalescer #( wire [`UP(UUID_WIDTH)-1:0] out_req_uuid; wire [`UP(UUID_WIDTH)-1:0] out_rsp_uuid; - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_out_req_uuid assign out_req_uuid = out_req_tag[OUT_TAG_WIDTH-1 -: UUID_WIDTH]; - assign out_rsp_uuid = out_rsp_tag[OUT_TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin + end else begin : g_out_req_uuid_0 assign out_req_uuid = '0; + end + + if (UUID_WIDTH != 0) begin : g_out_rsp_uuid + assign out_rsp_uuid = out_rsp_tag[OUT_TAG_WIDTH-1 -: UUID_WIDTH]; + end else begin : g_out_rsp_uuid_0 assign out_rsp_uuid = '0; end @@ -343,38 +343,33 @@ module VX_mem_coalescer #( end end - wire out_req_fire = out_req_valid && out_req_ready; - always @(posedge clk) begin if (out_req_fire) begin if (out_req_rw) begin - `TRACE(1, ("%d: %s-out-req-wr: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)); - `TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS); - `TRACE(1, (", atype=")); - `TRACE_ARRAY1D(1, "%b", out_req_atype, OUT_REQS); - `TRACE(1, (", byteen=")); - `TRACE_ARRAY1D(1, "0x%h", out_req_byteen, OUT_REQS); - `TRACE(1, (", data=")); - `TRACE_ARRAY1D(1, "0x%0h", out_req_data, OUT_REQS); + `TRACE(1, ("%t: %s out-req-wr: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)) + `TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS) + `TRACE(1, (", flags=")) + `TRACE_ARRAY1D(1, "%b", out_req_flags, OUT_REQS) + `TRACE(1, (", byteen=")) + `TRACE_ARRAY1D(1, "0x%h", out_req_byteen, OUT_REQS) + `TRACE(1, (", data=")) + `TRACE_ARRAY1D(1, "0x%0h", out_req_data, OUT_REQS) end else begin - `TRACE(1, ("%d: %s-out-req-rd: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)); - `TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS); - `TRACE(1, (", atype=")); - `TRACE_ARRAY1D(1, "%b", out_req_atype, OUT_REQS); - end - `TRACE(1, (", offset=")); - `TRACE_ARRAY1D(1, "%0d", out_req_offset, NUM_REQS); - `TRACE(1, (", pmask=%b, tag=0x%0h (#%0d)\n", out_req_pmask, out_req_tag, out_req_uuid)); - if ($countones(out_req_pmask) > 1) begin - `TRACE(1, ("%t: *** %s: coalesced=%d (#%0d)\n", $time, INSTANCE_ID, $countones(out_req_pmask), out_req_uuid)); + `TRACE(1, ("%d: %s out-req-rd: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask)) + `TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS) + `TRACE(1, (", flags=")) + `TRACE_ARRAY1D(1, "%b", out_req_flags, OUT_REQS) end + `TRACE(1, (", offset=")) + `TRACE_ARRAY1D(1, "%0d", out_req_offset, NUM_REQS) + `TRACE(1, (", pmask=%b, coalesced=%0d, tag=0x%0h (#%0d)\n", out_req_pmask, $countones(out_req_pmask), out_req_tag, out_req_uuid)) end if (out_rsp_fire) begin - `TRACE(1, ("%d: %s-out-rsp: valid=%b, data=", $time, INSTANCE_ID, out_rsp_mask)); - `TRACE_ARRAY1D(1, "0x%0h", out_rsp_data, OUT_REQS); - `TRACE(1, (", offset=")); - `TRACE_ARRAY1D(1, "%0d", ibuf_dout_offset, NUM_REQS); - `TRACE(1, (", eop=%b, pmask=%b, tag=0x%0h (#%0d)\n", out_rsp_eop, ibuf_dout_pmask, out_rsp_tag, out_rsp_uuid)); + `TRACE(1, ("%t: %s out-rsp: valid=%b, data=", $time, INSTANCE_ID, out_rsp_mask)) + `TRACE_ARRAY1D(1, "0x%0h", out_rsp_data, OUT_REQS) + `TRACE(1, (", offset=")) + `TRACE_ARRAY1D(1, "%0d", ibuf_dout_offset, NUM_REQS) + `TRACE(1, (", eop=%b, pmask=%b, tag=0x%0h (#%0d)\n", out_rsp_eop, ibuf_dout_pmask, out_rsp_tag, out_rsp_uuid)) end end `endif diff --git a/hw/rtl/libs/VX_mem_scheduler.sv b/hw/rtl/libs/VX_mem_scheduler.sv index aa3ef9b2f..4ba8bf147 100644 --- a/hw/rtl/libs/VX_mem_scheduler.sv +++ b/hw/rtl/libs/VX_mem_scheduler.sv @@ -21,7 +21,7 @@ module VX_mem_scheduler #( parameter WORD_SIZE = 4, parameter LINE_SIZE = WORD_SIZE, parameter ADDR_WIDTH = 32 - `CLOG2(WORD_SIZE), - parameter ATYPE_WIDTH = 1, + parameter FLAGS_WIDTH = 1, parameter TAG_WIDTH = 8, parameter UUID_WIDTH = 0, // upper section of the request tag contains the UUID parameter CORE_QUEUE_SIZE= 8, @@ -50,12 +50,12 @@ module VX_mem_scheduler #( input wire [CORE_REQS-1:0] core_req_mask, input wire [CORE_REQS-1:0][WORD_SIZE-1:0] core_req_byteen, input wire [CORE_REQS-1:0][ADDR_WIDTH-1:0] core_req_addr, - input wire [CORE_REQS-1:0][ATYPE_WIDTH-1:0] core_req_atype, + input wire [CORE_REQS-1:0][FLAGS_WIDTH-1:0] core_req_flags, input wire [CORE_REQS-1:0][WORD_WIDTH-1:0] core_req_data, input wire [TAG_WIDTH-1:0] core_req_tag, output wire core_req_ready, output wire core_req_empty, - output wire core_req_sent, + output wire core_req_wr_notify, // Core response output wire core_rsp_valid, @@ -72,7 +72,7 @@ module VX_mem_scheduler #( output wire [MEM_CHANNELS-1:0] mem_req_mask, output wire [MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen, output wire [MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr, - output wire [MEM_CHANNELS-1:0][ATYPE_WIDTH-1:0] mem_req_atype, + output wire [MEM_CHANNELS-1:0][FLAGS_WIDTH-1:0] mem_req_flags, output wire [MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data, output wire [MEM_TAG_WIDTH-1:0] mem_req_tag, input wire mem_req_ready, @@ -96,8 +96,7 @@ module VX_mem_scheduler #( `STATIC_ASSERT (`IS_DIVISBLE(CORE_REQS * WORD_SIZE, LINE_SIZE), ("invalid parameter")) `STATIC_ASSERT ((TAG_WIDTH >= UUID_WIDTH), ("invalid parameter")) - `STATIC_ASSERT ((0 == RSP_PARTIAL) || (1 == RSP_PARTIAL), ("invalid parameter")) - `RUNTIME_ASSERT((~core_req_valid || core_req_mask != 0), ("invalid request mask")); + `RUNTIME_ASSERT((~core_req_valid || core_req_mask != 0), ("%t: invalid request mask", $time)) wire ibuf_push; wire ibuf_pop; @@ -113,7 +112,7 @@ module VX_mem_scheduler #( wire reqq_rw; wire [CORE_REQS-1:0][WORD_SIZE-1:0] reqq_byteen; wire [CORE_REQS-1:0][ADDR_WIDTH-1:0] reqq_addr; - wire [CORE_REQS-1:0][ATYPE_WIDTH-1:0] reqq_atype; + wire [CORE_REQS-1:0][FLAGS_WIDTH-1:0] reqq_flags; wire [CORE_REQS-1:0][WORD_WIDTH-1:0] reqq_data; wire [REQQ_TAG_WIDTH-1:0] reqq_tag; wire reqq_ready; @@ -123,7 +122,7 @@ module VX_mem_scheduler #( wire reqq_rw_s; wire [MERGED_REQS-1:0][LINE_SIZE-1:0] reqq_byteen_s; wire [MERGED_REQS-1:0][MEM_ADDR_WIDTH-1:0] reqq_addr_s; - wire [MERGED_REQS-1:0][ATYPE_WIDTH-1:0] reqq_atype_s; + wire [MERGED_REQS-1:0][FLAGS_WIDTH-1:0] reqq_flags_s; wire [MERGED_REQS-1:0][LINE_WIDTH-1:0] reqq_data_s; wire [MERGED_TAG_WIDTH-1:0] reqq_tag_s; wire reqq_ready_s; @@ -133,7 +132,7 @@ module VX_mem_scheduler #( wire mem_req_rw_s; wire [MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen_s; wire [MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr_s; - wire [MEM_CHANNELS-1:0][ATYPE_WIDTH-1:0] mem_req_atype_s; + wire [MEM_CHANNELS-1:0][FLAGS_WIDTH-1:0] mem_req_flags_s; wire [MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data_s; wire [MEM_TAG_WIDTH-1:0] mem_req_tag_s; wire mem_req_ready_s; @@ -161,14 +160,14 @@ module VX_mem_scheduler #( wire reqq_ready_in; wire [REQQ_TAG_WIDTH-1:0] reqq_tag_u; - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_reqq_tag_u_uuid assign reqq_tag_u = {core_req_tag[TAG_WIDTH-1 -: UUID_WIDTH], ibuf_waddr}; - end else begin + end else begin : g_reqq_tag_u assign reqq_tag_u = ibuf_waddr; end VX_elastic_buffer #( - .DATAW (1 + CORE_REQS * (1 + WORD_SIZE + ADDR_WIDTH + ATYPE_WIDTH + WORD_WIDTH) + REQQ_TAG_WIDTH), + .DATAW (1 + CORE_REQS * (1 + WORD_SIZE + ADDR_WIDTH + FLAGS_WIDTH + WORD_WIDTH) + REQQ_TAG_WIDTH), .SIZE (CORE_QUEUE_SIZE), .OUT_REG (1) ) req_queue ( @@ -176,8 +175,8 @@ module VX_mem_scheduler #( .reset (reset), .valid_in (reqq_valid_in), .ready_in (reqq_ready_in), - .data_in ({core_req_rw, core_req_mask, core_req_byteen, core_req_addr, core_req_atype, core_req_data, reqq_tag_u}), - .data_out ({reqq_rw, reqq_mask, reqq_byteen, reqq_addr, reqq_atype, reqq_data, reqq_tag}), + .data_in ({core_req_rw, core_req_mask, core_req_byteen, core_req_addr, core_req_flags, core_req_data, reqq_tag_u}), + .data_out ({reqq_rw, reqq_mask, reqq_byteen, reqq_addr, reqq_flags, reqq_data, reqq_tag}), .valid_out(reqq_valid), .ready_out(reqq_ready) ); @@ -188,8 +187,8 @@ module VX_mem_scheduler #( // no pending requests assign core_req_empty = !reqq_valid && ibuf_empty; - // notify request submisison - assign core_req_sent = reqq_valid && reqq_ready; + // notify write request submisison + assign core_req_wr_notify = reqq_valid && reqq_ready && reqq_rw; // Index buffer /////////////////////////////////////////////////////////// @@ -221,9 +220,7 @@ module VX_mem_scheduler #( // Handle memory coalescing /////////////////////////////////////////////// - if (COALESCE_ENABLE) begin - - `RESET_RELAY (coalescer_reset, reset); + if (COALESCE_ENABLE) begin : g_coalescer VX_mem_coalescer #( .INSTANCE_ID ($sformatf("%s-coalescer", INSTANCE_ID)), @@ -231,13 +228,13 @@ module VX_mem_scheduler #( .DATA_IN_SIZE (WORD_SIZE), .DATA_OUT_SIZE (LINE_SIZE), .ADDR_WIDTH (ADDR_WIDTH), - .ATYPE_WIDTH (ATYPE_WIDTH), + .FLAGS_WIDTH (FLAGS_WIDTH), .TAG_WIDTH (REQQ_TAG_WIDTH), .UUID_WIDTH (UUID_WIDTH), .QUEUE_SIZE (MEM_QUEUE_SIZE) ) coalescer ( - .clk (clk), - .reset (coalescer_reset), + .clk (clk), + .reset (reset), // Input request .in_req_valid (reqq_valid), @@ -245,7 +242,7 @@ module VX_mem_scheduler #( .in_req_rw (reqq_rw), .in_req_byteen (reqq_byteen), .in_req_addr (reqq_addr), - .in_req_atype (reqq_atype), + .in_req_flags (reqq_flags), .in_req_data (reqq_data), .in_req_tag (reqq_tag), .in_req_ready (reqq_ready), @@ -263,7 +260,7 @@ module VX_mem_scheduler #( .out_req_rw (reqq_rw_s), .out_req_byteen (reqq_byteen_s), .out_req_addr (reqq_addr_s), - .out_req_atype (reqq_atype_s), + .out_req_flags (reqq_flags_s), .out_req_data (reqq_data_s), .out_req_tag (reqq_tag_s), .out_req_ready (reqq_ready_s), @@ -276,14 +273,13 @@ module VX_mem_scheduler #( .out_rsp_ready (mem_rsp_ready) ); - end else begin - + end else begin : g_no_coalescer assign reqq_valid_s = reqq_valid; assign reqq_mask_s = reqq_mask; assign reqq_rw_s = reqq_rw; assign reqq_byteen_s= reqq_byteen; assign reqq_addr_s = reqq_addr; - assign reqq_atype_s = reqq_atype; + assign reqq_flags_s = reqq_flags; assign reqq_data_s = reqq_data; assign reqq_tag_s = reqq_tag; assign reqq_ready = reqq_ready_s; @@ -301,25 +297,25 @@ module VX_mem_scheduler #( wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0] mem_req_mask_b; wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen_b; wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr_b; - wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][ATYPE_WIDTH-1:0] mem_req_atype_b; + wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][FLAGS_WIDTH-1:0] mem_req_flags_b; wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data_b; wire [BATCH_SEL_WIDTH-1:0] req_batch_idx; - for (genvar i = 0; i < MEM_BATCHES; ++i) begin - for (genvar j = 0; j < MEM_CHANNELS; ++j) begin + for (genvar i = 0; i < MEM_BATCHES; ++i) begin : g_mem_req_data_b + for (genvar j = 0; j < MEM_CHANNELS; ++j) begin : g_j localparam r = i * MEM_CHANNELS + j; - if (r < MERGED_REQS) begin + if (r < MERGED_REQS) begin : g_valid assign mem_req_mask_b[i][j] = reqq_mask_s[r]; assign mem_req_byteen_b[i][j] = reqq_byteen_s[r]; assign mem_req_addr_b[i][j] = reqq_addr_s[r]; - assign mem_req_atype_b[i][j] = reqq_atype_s[r]; + assign mem_req_flags_b[i][j] = reqq_flags_s[r]; assign mem_req_data_b[i][j] = reqq_data_s[r]; - end else begin + end else begin : g_padding assign mem_req_mask_b[i][j] = 0; assign mem_req_byteen_b[i][j] = '0; assign mem_req_addr_b[i][j] = '0; - assign mem_req_atype_b[i][j] = '0; + assign mem_req_flags_b[i][j] = '0; assign mem_req_data_b[i][j] = '0; end end @@ -329,10 +325,10 @@ module VX_mem_scheduler #( assign mem_req_rw_s = reqq_rw_s; assign mem_req_byteen_s = mem_req_byteen_b[req_batch_idx]; assign mem_req_addr_s = mem_req_addr_b[req_batch_idx]; - assign mem_req_atype_s = mem_req_atype_b[req_batch_idx]; + assign mem_req_flags_s = mem_req_flags_b[req_batch_idx]; assign mem_req_data_s = mem_req_data_b[req_batch_idx]; - if (MEM_BATCHES != 1) begin + if (MEM_BATCHES != 1) begin : g_batch reg [MEM_BATCH_BITS-1:0] req_batch_idx_r; wire is_degenerate_batch = ~(| mem_req_mask_s); @@ -357,7 +353,7 @@ module VX_mem_scheduler #( wire [MEM_BATCHES-1:0][MEM_BATCH_BITS-1:0] req_batch_idxs; wire [MEM_BATCH_BITS-1:0] req_batch_idx_last; - for (genvar i = 0; i < MEM_BATCHES; ++i) begin + for (genvar i = 0; i < MEM_BATCHES; ++i) begin : g_req_batch assign req_batch_valids[i] = (| mem_req_mask_b[i]); assign req_batch_idxs[i] = MEM_BATCH_BITS'(i); end @@ -378,7 +374,7 @@ module VX_mem_scheduler #( assign req_sent_all = mem_req_ready_b && (req_batch_idx_r == req_batch_idx_last); assign mem_req_tag_s = {reqq_tag_s, req_batch_idx}; - end else begin + end else begin : g_no_batch assign mem_req_valid_s = reqq_valid_s; assign req_batch_idx = '0; @@ -390,7 +386,7 @@ module VX_mem_scheduler #( assign reqq_ready_s = req_sent_all; VX_elastic_buffer #( - .DATAW (MEM_CHANNELS + 1 + MEM_CHANNELS * (LINE_SIZE + MEM_ADDR_WIDTH + ATYPE_WIDTH + LINE_WIDTH) + MEM_TAG_WIDTH), + .DATAW (MEM_CHANNELS + 1 + MEM_CHANNELS * (LINE_SIZE + MEM_ADDR_WIDTH + FLAGS_WIDTH + LINE_WIDTH) + MEM_TAG_WIDTH), .SIZE (`TO_OUT_BUF_SIZE(MEM_OUT_BUF)), .OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF)) ) mem_req_buf ( @@ -398,8 +394,8 @@ module VX_mem_scheduler #( .reset (reset), .valid_in (mem_req_valid_s), .ready_in (mem_req_ready_s), - .data_in ({mem_req_mask_s, mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_atype_s, mem_req_data_s, mem_req_tag_s}), - .data_out ({mem_req_mask, mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_atype, mem_req_data, mem_req_tag}), + .data_in ({mem_req_mask_s, mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_flags_s, mem_req_data_s, mem_req_tag_s}), + .data_out ({mem_req_mask, mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_flags, mem_req_data, mem_req_tag}), .valid_out (mem_req_valid), .ready_out (mem_req_ready) ); @@ -410,13 +406,13 @@ module VX_mem_scheduler #( wire [CORE_REQS-1:0] rsp_rem_mask_n, curr_mask; wire [BATCH_SEL_WIDTH-1:0] rsp_batch_idx; - if (CORE_BATCHES > 1) begin + if (CORE_BATCHES > 1) begin : g_rsp_batch_idx assign rsp_batch_idx = mem_rsp_tag_s[CORE_BATCH_BITS-1:0]; - end else begin + end else begin : g_rsp_batch_idx_0 assign rsp_batch_idx = '0; end - for (genvar r = 0; r < CORE_REQS; ++r) begin + for (genvar r = 0; r < CORE_REQS; ++r) begin : g_curr_mask localparam i = r / CORE_CHANNELS; localparam j = r % CORE_CHANNELS; assign curr_mask[r] = (BATCH_SEL_WIDTH'(i) == rsp_batch_idx) && mem_rsp_mask_s[j]; @@ -437,7 +433,7 @@ module VX_mem_scheduler #( end end - if (RSP_PARTIAL == 1) begin + if (RSP_PARTIAL != 0 || CORE_REQS == 1) begin : g_rsp_partial reg [CORE_QUEUE_SIZE-1:0] rsp_sop_r; @@ -454,25 +450,30 @@ module VX_mem_scheduler #( assign crsp_mask = curr_mask; assign crsp_sop = rsp_sop_r[ibuf_raddr]; - for (genvar r = 0; r < CORE_REQS; ++r) begin + for (genvar r = 0; r < CORE_REQS; ++r) begin : g_crsp_data localparam j = r % CORE_CHANNELS; assign crsp_data[r] = mem_rsp_data_s[j]; end assign mem_rsp_ready_s = crsp_ready; - end else begin + end else begin : g_rsp_full - reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store [CORE_QUEUE_SIZE-1:0]; - reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store_n; + wire [CORE_CHANNELS-1:0][CORE_BATCHES-1:0][WORD_WIDTH-1:0] rsp_store_n; reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0]; - always @(*) begin - rsp_store_n = rsp_store[ibuf_raddr]; - for (integer i = 0; i < CORE_CHANNELS; ++i) begin - if ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]) begin - rsp_store_n[(rsp_batch_idx * CORE_CHANNELS + i) * WORD_WIDTH +: WORD_WIDTH] = mem_rsp_data_s[i]; + for (genvar i = 0; i < CORE_CHANNELS; ++i) begin : g_rsp_store + for (genvar j = 0; j < CORE_BATCHES; ++j) begin : g_j + reg [WORD_WIDTH-1:0] rsp_store [CORE_QUEUE_SIZE-1:0]; + wire rsp_wren = mem_rsp_fire_s + && (BATCH_SEL_WIDTH'(j) == rsp_batch_idx) + && ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]); + always @(posedge clk) begin + if (rsp_wren) begin + rsp_store[ibuf_raddr] <= mem_rsp_data_s[i]; + end end + assign rsp_store_n[i][j] = rsp_wren ? mem_rsp_data_s[i] : rsp_store[ibuf_raddr]; end end @@ -480,28 +481,25 @@ module VX_mem_scheduler #( if (ibuf_push) begin rsp_orig_mask[ibuf_waddr] <= core_req_mask; end - if (mem_rsp_valid_s) begin - rsp_store[ibuf_raddr] <= rsp_store_n; - end end assign crsp_valid = mem_rsp_valid_s && rsp_complete; assign crsp_mask = rsp_orig_mask[ibuf_raddr]; assign crsp_sop = 1'b1; - for (genvar r = 0; r < CORE_REQS; ++r) begin + for (genvar r = 0; r < CORE_REQS; ++r) begin : g_crsp_data localparam i = r / CORE_CHANNELS; localparam j = r % CORE_CHANNELS; - assign crsp_data[r] = rsp_store_n[(i * CORE_CHANNELS + j) * WORD_WIDTH +: WORD_WIDTH]; + assign crsp_data[r] = rsp_store_n[j][i]; end assign mem_rsp_ready_s = crsp_ready || ~rsp_complete; end - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_crsp_tag assign crsp_tag = {mem_rsp_tag_s[MEM_TAG_WIDTH-1 -: UUID_WIDTH], ibuf_dout}; - end else begin + end else begin : g_crsp_tag_0 assign crsp_tag = ibuf_dout; end @@ -527,9 +525,9 @@ module VX_mem_scheduler #( `ifdef SIMULATION wire [`UP(UUID_WIDTH)-1:0] req_dbg_uuid; - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_req_dbg_uuid assign req_dbg_uuid = core_req_tag[TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin + end else begin : g_req_dbg_uuid_0 assign req_dbg_uuid = '0; end @@ -569,11 +567,11 @@ module VX_mem_scheduler #( wire [`UP(UUID_WIDTH)-1:0] mem_rsp_dbg_uuid; wire [`UP(UUID_WIDTH)-1:0] rsp_dbg_uuid; - if (UUID_WIDTH != 0) begin + if (UUID_WIDTH != 0) begin : g_dbg_uuid assign mem_req_dbg_uuid = mem_req_tag_s[MEM_TAG_WIDTH-1 -: UUID_WIDTH]; assign mem_rsp_dbg_uuid = mem_rsp_tag_s[MEM_TAG_WIDTH-1 -: UUID_WIDTH]; assign rsp_dbg_uuid = core_rsp_tag[TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin + end else begin : g_dbg_uuid_0 assign mem_req_dbg_uuid = '0; assign mem_rsp_dbg_uuid = '0; assign rsp_dbg_uuid = '0; @@ -586,41 +584,41 @@ module VX_mem_scheduler #( always @(posedge clk) begin if (core_req_fire) begin if (core_req_rw) begin - `TRACE(1, ("%d: %s-core-req-wr: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)); - `TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS); - `TRACE(1, (", byteen=")); - `TRACE_ARRAY1D(1, "0x%h", core_req_byteen, CORE_REQS); - `TRACE(1, (", data=")); - `TRACE_ARRAY1D(1, "0x%0h", core_req_data, CORE_REQS); + `TRACE(1, ("%t: %s core-req-wr: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)) + `TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS) + `TRACE(1, (", byteen=")) + `TRACE_ARRAY1D(1, "0x%h", core_req_byteen, CORE_REQS) + `TRACE(1, (", data=")) + `TRACE_ARRAY1D(1, "0x%0h", core_req_data, CORE_REQS) end else begin - `TRACE(1, ("%d: %s-core-req-rd: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)); - `TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS); + `TRACE(1, ("%t: %s core-req-rd: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask)) + `TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS) end - `TRACE(1, (", tag=0x%0h (#%0d)\n", core_req_tag, req_dbg_uuid)); + `TRACE(1, (", tag=0x%0h (#%0d)\n", core_req_tag, req_dbg_uuid)) end if (core_rsp_valid && core_rsp_ready) begin - `TRACE(1, ("%d: %s-core-rsp: valid=%b, sop=%b, eop=%b, data=", $time, INSTANCE_ID, core_rsp_mask, core_rsp_sop, core_rsp_eop)); - `TRACE_ARRAY1D(1, "0x%0h", core_rsp_data, CORE_REQS); - `TRACE(1, (", tag=0x%0h (#%0d)\n", core_rsp_tag, rsp_dbg_uuid)); + `TRACE(1, ("%t: %s core-rsp: valid=%b, sop=%b, eop=%b, data=", $time, INSTANCE_ID, core_rsp_mask, core_rsp_sop, core_rsp_eop)) + `TRACE_ARRAY1D(1, "0x%0h", core_rsp_data, CORE_REQS) + `TRACE(1, (", tag=0x%0h (#%0d)\n", core_rsp_tag, rsp_dbg_uuid)) end if (| mem_req_fire_s) begin if (| mem_req_rw_s) begin - `TRACE(1, ("%d: %s-mem-req-wr: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)); - `TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS); - `TRACE(1, (", byteen=")); - `TRACE_ARRAY1D(1, "0x%h", mem_req_byteen_s, CORE_CHANNELS); - `TRACE(1, (", data=")); - `TRACE_ARRAY1D(1, "0x%0h", mem_req_data_s, CORE_CHANNELS); + `TRACE(1, ("%t: %s mem-req-wr: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)) + `TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS) + `TRACE(1, (", byteen=")) + `TRACE_ARRAY1D(1, "0x%h", mem_req_byteen_s, CORE_CHANNELS) + `TRACE(1, (", data=")) + `TRACE_ARRAY1D(1, "0x%0h", mem_req_data_s, CORE_CHANNELS) end else begin - `TRACE(1, ("%d: %s-mem-req-rd: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)); - `TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS); + `TRACE(1, ("%t: %s mem-req-rd: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s)) + `TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS) end - `TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_waddr_s, req_batch_idx, mem_req_dbg_uuid)); + `TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_waddr_s, req_batch_idx, mem_req_dbg_uuid)) end if (mem_rsp_fire_s) begin - `TRACE(1, ("%d: %s-mem-rsp: valid=%b, data=", $time, INSTANCE_ID, mem_rsp_mask_s)); - `TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data_s, CORE_CHANNELS); - `TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_raddr, rsp_batch_idx, mem_rsp_dbg_uuid)); + `TRACE(1, ("%t: %s mem-rsp: valid=%b, data=", $time, INSTANCE_ID, mem_rsp_mask_s)) + `TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data_s, CORE_CHANNELS) + `TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_raddr, rsp_batch_idx, mem_rsp_dbg_uuid)) end end `endif diff --git a/hw/rtl/libs/VX_multiplier.sv b/hw/rtl/libs/VX_multiplier.sv index 2f046779f..11bf13a9f 100644 --- a/hw/rtl/libs/VX_multiplier.sv +++ b/hw/rtl/libs/VX_multiplier.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -21,7 +21,7 @@ module VX_multiplier #( parameter SIGNED = 0, parameter LATENCY = 0 ) ( - input wire clk, + input wire clk, input wire enable, input wire [A_WIDTH-1:0] dataa, input wire [B_WIDTH-1:0] datab, @@ -29,15 +29,15 @@ module VX_multiplier #( ); wire [R_WIDTH-1:0] prod_w; - if (SIGNED != 0) begin + if (SIGNED != 0) begin : g_prod_s assign prod_w = R_WIDTH'($signed(dataa) * $signed(datab)); - end else begin + end else begin : g_prod_u assign prod_w = R_WIDTH'(dataa * datab); end - - if (LATENCY == 0) begin + + if (LATENCY == 0) begin : g_passthru assign result = prod_w; - end else begin + end else begin : g_latency reg [LATENCY-1:0][R_WIDTH-1:0] prod_r; always @(posedge clk) begin if (enable) begin @@ -46,8 +46,8 @@ module VX_multiplier #( prod_r[i] <= prod_r[i-1]; end end - end - assign result = prod_r[LATENCY-1]; + end + assign result = prod_r[LATENCY-1]; end endmodule diff --git a/hw/rtl/libs/VX_mux.sv b/hw/rtl/libs/VX_mux.sv index f0bc78cae..19a06600f 100644 --- a/hw/rtl/libs/VX_mux.sv +++ b/hw/rtl/libs/VX_mux.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,13 +19,13 @@ module VX_mux #( parameter N = 1, parameter LN = `LOG2UP(N) ) ( - input wire [N-1:0][DATAW-1:0] data_in, - input wire [LN-1:0] sel_in, + input wire [N-1:0][DATAW-1:0] data_in, + input wire [LN-1:0] sel_in, output wire [DATAW-1:0] data_out -); - if (N > 1) begin +); + if (N > 1) begin : g_mux assign data_out = data_in[sel_in]; - end else begin + end else begin : g_passthru `UNUSED_VAR (sel_in) assign data_out = data_in; end diff --git a/hw/rtl/libs/VX_onehot_mux.sv b/hw/rtl/libs/VX_onehot_mux.sv index cc0fffaa6..8b97692f5 100644 --- a/hw/rtl/libs/VX_onehot_mux.sv +++ b/hw/rtl/libs/VX_onehot_mux.sv @@ -24,116 +24,126 @@ module VX_onehot_mux #( input wire [N-1:0] sel_in, output wire [DATAW-1:0] data_out ); - if (N == 1) begin + if (N == 1) begin : g_passthru `UNUSED_VAR (sel_in) assign data_out = data_in; - end else if (LUT_OPT && N == 2) begin + end else if (LUT_OPT && N == 2) begin : g_lut2 `UNUSED_VAR (sel_in) assign data_out = sel_in[0] ? data_in[0] : data_in[1]; - end else if (LUT_OPT && N == 3) begin - reg [DATAW-1:0] data_out_r; + end else if (LUT_OPT && N == 3) begin : g_lut3 + reg [DATAW-1:0] data_out_w; always @(*) begin case (sel_in) - 3'b001: data_out_r = data_in[0]; - 3'b010: data_out_r = data_in[1]; - 3'b100: data_out_r = data_in[2]; - default: data_out_r = 'x; + 3'b001: data_out_w = data_in[0]; + 3'b010: data_out_w = data_in[1]; + 3'b100: data_out_w = data_in[2]; + default: data_out_w = 'x; endcase end - assign data_out = data_out_r; - end else if (LUT_OPT && N == 4) begin - reg [DATAW-1:0] data_out_r; + assign data_out = data_out_w; + end else if (LUT_OPT && N == 4) begin : g_lut4 + reg [DATAW-1:0] data_out_w; always @(*) begin case (sel_in) - 4'b0001: data_out_r = data_in[0]; - 4'b0010: data_out_r = data_in[1]; - 4'b0100: data_out_r = data_in[2]; - 4'b1000: data_out_r = data_in[3]; - default: data_out_r = 'x; + 4'b0001: data_out_w = data_in[0]; + 4'b0010: data_out_w = data_in[1]; + 4'b0100: data_out_w = data_in[2]; + 4'b1000: data_out_w = data_in[3]; + default: data_out_w = 'x; endcase end - assign data_out = data_out_r; - end else if (LUT_OPT && N == 5) begin - reg [DATAW-1:0] data_out_r; + assign data_out = data_out_w; + end else if (LUT_OPT && N == 5) begin : g_lut5 + reg [DATAW-1:0] data_out_w; always @(*) begin case (sel_in) - 5'b00001: data_out_r = data_in[0]; - 5'b00010: data_out_r = data_in[1]; - 5'b00100: data_out_r = data_in[2]; - 5'b01000: data_out_r = data_in[3]; - 5'b10000: data_out_r = data_in[4]; - default: data_out_r = 'x; + 5'b00001: data_out_w = data_in[0]; + 5'b00010: data_out_w = data_in[1]; + 5'b00100: data_out_w = data_in[2]; + 5'b01000: data_out_w = data_in[3]; + 5'b10000: data_out_w = data_in[4]; + default: data_out_w = 'x; endcase end - assign data_out = data_out_r; - end else if (LUT_OPT && N == 6) begin - reg [DATAW-1:0] data_out_r; + assign data_out = data_out_w; + end else if (LUT_OPT && N == 6) begin : g_lut6 + reg [DATAW-1:0] data_out_w; always @(*) begin case (sel_in) - 6'b000001: data_out_r = data_in[0]; - 6'b000010: data_out_r = data_in[1]; - 6'b000100: data_out_r = data_in[2]; - 6'b001000: data_out_r = data_in[3]; - 6'b010000: data_out_r = data_in[4]; - 6'b100000: data_out_r = data_in[5]; - default: data_out_r = 'x; + 6'b000001: data_out_w = data_in[0]; + 6'b000010: data_out_w = data_in[1]; + 6'b000100: data_out_w = data_in[2]; + 6'b001000: data_out_w = data_in[3]; + 6'b010000: data_out_w = data_in[4]; + 6'b100000: data_out_w = data_in[5]; + default: data_out_w = 'x; endcase end - assign data_out = data_out_r; - end else if (LUT_OPT && N == 7) begin - reg [DATAW-1:0] data_out_r; + assign data_out = data_out_w; + end else if (LUT_OPT && N == 7) begin : g_lut7 + reg [DATAW-1:0] data_out_w; always @(*) begin case (sel_in) - 7'b0000001: data_out_r = data_in[0]; - 7'b0000010: data_out_r = data_in[1]; - 7'b0000100: data_out_r = data_in[2]; - 7'b0001000: data_out_r = data_in[3]; - 7'b0010000: data_out_r = data_in[4]; - 7'b0100000: data_out_r = data_in[5]; - 7'b1000000: data_out_r = data_in[6]; - default: data_out_r = 'x; + 7'b0000001: data_out_w = data_in[0]; + 7'b0000010: data_out_w = data_in[1]; + 7'b0000100: data_out_w = data_in[2]; + 7'b0001000: data_out_w = data_in[3]; + 7'b0010000: data_out_w = data_in[4]; + 7'b0100000: data_out_w = data_in[5]; + 7'b1000000: data_out_w = data_in[6]; + default: data_out_w = 'x; endcase end - assign data_out = data_out_r; - end else if (LUT_OPT && N == 8) begin - reg [DATAW-1:0] data_out_r; + assign data_out = data_out_w; + end else if (LUT_OPT && N == 8) begin : g_lut8 + reg [DATAW-1:0] data_out_w; always @(*) begin case (sel_in) - 8'b00000001: data_out_r = data_in[0]; - 8'b00000010: data_out_r = data_in[1]; - 8'b00000100: data_out_r = data_in[2]; - 8'b00001000: data_out_r = data_in[3]; - 8'b00010000: data_out_r = data_in[4]; - 8'b00100000: data_out_r = data_in[5]; - 8'b01000000: data_out_r = data_in[6]; - 8'b10000000: data_out_r = data_in[7]; - default: data_out_r = 'x; + 8'b00000001: data_out_w = data_in[0]; + 8'b00000010: data_out_w = data_in[1]; + 8'b00000100: data_out_w = data_in[2]; + 8'b00001000: data_out_w = data_in[3]; + 8'b00010000: data_out_w = data_in[4]; + 8'b00100000: data_out_w = data_in[5]; + 8'b01000000: data_out_w = data_in[6]; + 8'b10000000: data_out_w = data_in[7]; + default: data_out_w = 'x; endcase end - assign data_out = data_out_r; - end else if (MODEL == 1) begin + assign data_out = data_out_w; + end else if (MODEL == 1) begin : g_model1 wire [N-1:0][DATAW-1:0] mask; - for (genvar i = 0; i < N; ++i) begin + for (genvar i = 0; i < N; ++i) begin : g_mask assign mask[i] = {DATAW{sel_in[i]}} & data_in[i]; end - for (genvar i = 0; i < DATAW; ++i) begin + for (genvar i = 0; i < DATAW; ++i) begin : g_data_out wire [N-1:0] gather; - for (genvar j = 0; j < N; ++j) begin + for (genvar j = 0; j < N; ++j) begin : g_gather assign gather[j] = mask[j][i]; end assign data_out[i] = (| gather); end - end else if (MODEL == 2) begin - reg [DATAW-1:0] data_out_r; + end else if (MODEL == 2) begin : g_model2 + VX_find_first #( + .N (N), + .DATAW (DATAW) + ) find_first ( + .valid_in (sel_in), + .data_in (data_in), + .data_out (data_out), + `UNUSED_PIN (valid_out) + ); + end else if (MODEL == 3) begin : g_model3 + reg [DATAW-1:0] data_out_w; always @(*) begin - data_out_r = 'x; + data_out_w = 'x; for (integer i = 0; i < N; ++i) begin if (sel_in[i]) begin - data_out_r = data_in[i]; + data_out_w = data_in[i]; end end end - assign data_out = data_out_r; + assign data_out = data_out_w; end endmodule diff --git a/hw/syn/xilinx/test/kernel/start.S b/hw/rtl/libs/VX_onehot_shift.sv similarity index 57% rename from hw/syn/xilinx/test/kernel/start.S rename to hw/rtl/libs/VX_onehot_shift.sv index e9295d643..3222e3067 100644 --- a/hw/syn/xilinx/test/kernel/start.S +++ b/hw/rtl/libs/VX_onehot_shift.sv @@ -1,23 +1,32 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -.section .init, "ax" -.global _start -.type _start, @function -_start: - # call main routine - call main +`include "VX_platform.vh" - # end execution - .insn r 0x0b, 0, 0, x0, x0, x0 -.size _start, .-_start \ No newline at end of file +`TRACING_OFF +module VX_onehot_shift #( + parameter N = 1, + parameter M = 1 +) ( + input wire [N-1:0] data_in0, + input wire [M-1:0] data_in1, + output wire [N*M-1:0] data_out +); + for (genvar i = 0; i < M; ++i) begin : g_i + for (genvar j = 0; j < N; ++j) begin : g_j + assign data_out[i*N + j] = data_in1[i] & data_in0[j]; + end + end + +endmodule +`TRACING_ON diff --git a/hw/rtl/libs/VX_pe_serializer.sv b/hw/rtl/libs/VX_pe_serializer.sv index eac1eddcb..4a66a6399 100644 --- a/hw/rtl/libs/VX_pe_serializer.sv +++ b/hw/rtl/libs/VX_pe_serializer.sv @@ -35,8 +35,8 @@ module VX_pe_serializer #( // PE output wire pe_enable, - output wire [NUM_PES-1:0][DATA_IN_WIDTH-1:0] pe_data_in, - input wire [NUM_PES-1:0][DATA_OUT_WIDTH-1:0] pe_data_out, + output wire [NUM_PES-1:0][DATA_IN_WIDTH-1:0] pe_data_out, + input wire [NUM_PES-1:0][DATA_OUT_WIDTH-1:0] pe_data_in, // output output wire valid_out, @@ -49,101 +49,92 @@ module VX_pe_serializer #( wire [TAG_WIDTH-1:0] tag_out_u; wire ready_out_u; - wire [NUM_PES-1:0][DATA_IN_WIDTH-1:0] pe_data_in_s; - wire valid_out_s; - wire [TAG_WIDTH-1:0] tag_out_s; + wire [NUM_PES-1:0][DATA_IN_WIDTH-1:0] pe_data_out_w; + wire pe_valid_in; + wire [TAG_WIDTH-1:0] pe_tag_in; wire enable; VX_shift_register #( .DATAW (1 + TAG_WIDTH), - .DEPTH (LATENCY + PE_REG), + .DEPTH (PE_REG + LATENCY), .RESETW (1) ) shift_reg ( .clk (clk), .reset (reset), .enable (enable), - .data_in ({valid_in, tag_in}), - .data_out ({valid_out_s, tag_out_s}) + .data_in ({valid_in, tag_in}), + .data_out ({pe_valid_in, pe_tag_in}) ); VX_pipe_register #( - .DATAW (NUM_PES * DATA_IN_WIDTH), - .DEPTH (PE_REG) - ) pe_reg ( + .DATAW (NUM_PES * DATA_IN_WIDTH), + .DEPTH (PE_REG) + ) pe_data_reg ( .clk (clk), .reset (reset), .enable (enable), - .data_in (pe_data_in_s), - .data_out (pe_data_in) + .data_in (pe_data_out_w), + .data_out (pe_data_out) ); - if (NUM_LANES != NUM_PES) begin + assign pe_enable = enable; + + if (NUM_LANES != NUM_PES) begin : g_serialize localparam BATCH_SIZE = NUM_LANES / NUM_PES; localparam BATCH_SIZEW = `LOG2UP(BATCH_SIZE); - reg [BATCH_SIZEW-1:0] batch_in_idx; - reg [BATCH_SIZEW-1:0] batch_out_idx; + reg [BATCH_SIZEW-1:0] batch_in_idx, batch_out_idx; + reg batch_in_done, batch_out_done; - for (genvar i = 0; i < NUM_PES; ++i) begin - assign pe_data_in_s[i] = data_in[batch_in_idx * NUM_PES + i]; + for (genvar i = 0; i < NUM_PES; ++i) begin : g_pe_data_out_w + assign pe_data_out_w[i] = data_in[batch_in_idx * NUM_PES + i]; end always @(posedge clk) begin if (reset) begin - batch_in_idx <= '0; - batch_out_idx <= '0; + batch_in_idx <= '0; + batch_out_idx <= '0; + batch_in_done <= 0; + batch_out_done <= 0; end else if (enable) begin - if (valid_in) begin - batch_in_idx <= batch_in_idx + BATCH_SIZEW'(1); - end - if (valid_out_s) begin - batch_out_idx <= batch_out_idx + BATCH_SIZEW'(1); - end + batch_in_idx <= batch_in_idx + BATCH_SIZEW'(valid_in); + batch_out_idx <= batch_out_idx + BATCH_SIZEW'(pe_valid_in); + batch_in_done <= valid_in && (batch_in_idx == BATCH_SIZEW'(BATCH_SIZE-2)); + batch_out_done <= pe_valid_in && (batch_out_idx == BATCH_SIZEW'(BATCH_SIZE-2)); end end - wire batch_in_done = (batch_in_idx == BATCH_SIZEW'(BATCH_SIZE-1)); - wire batch_out_done = (batch_out_idx == BATCH_SIZEW'(BATCH_SIZE-1)); - - reg valid_out_r; - reg [BATCH_SIZE-1:0][NUM_PES-1:0][DATA_OUT_WIDTH-1:0] data_out_r; - reg [TAG_WIDTH-1:0] tag_out_r; + reg [BATCH_SIZE-1:0][(NUM_PES * DATA_OUT_WIDTH)-1:0] data_out_r, data_out_n; - wire valid_out_b = valid_out_s && batch_out_done; - wire ready_out_b = ready_out_u || ~valid_out_u; + always @(*) begin + data_out_n = data_out_r; + if (pe_valid_in) begin + data_out_n[batch_out_idx] = pe_data_in; + end + end always @(posedge clk) begin - if (reset) begin - valid_out_r <= 1'b0; - end else if (ready_out_b) begin - valid_out_r <= valid_out_b; - end - if (ready_out_b) begin - data_out_r[batch_out_idx] <= pe_data_out; - tag_out_r <= tag_out_s; - end + data_out_r <= data_out_n; end - assign enable = ready_out_b || ~valid_out_b; + assign enable = ready_out_u || ~valid_out_u; assign ready_in = enable && batch_in_done; - assign pe_enable = enable; - assign valid_out_u = valid_out_r; - assign data_out_u = data_out_r; - assign tag_out_u = tag_out_r; + assign valid_out_u = batch_out_done; + assign data_out_u = data_out_n; + assign tag_out_u = pe_tag_in; - end else begin + end else begin : g_passthru - assign pe_data_in_s = data_in; + assign pe_data_out_w = data_in; - assign enable = ready_out_u || ~valid_out_u; + assign enable = ready_out_u || ~pe_valid_in; assign ready_in = enable; - assign pe_enable = enable; - assign valid_out_u = valid_out_s; - assign data_out_u = pe_data_out; - assign tag_out_u = tag_out_s; + assign valid_out_u = pe_valid_in; + assign data_out_u = pe_data_in; + assign tag_out_u = pe_tag_in; end diff --git a/hw/rtl/libs/VX_pending_size.sv b/hw/rtl/libs/VX_pending_size.sv index 031e57695..1e72cef19 100644 --- a/hw/rtl/libs/VX_pending_size.sv +++ b/hw/rtl/libs/VX_pending_size.sv @@ -13,7 +13,7 @@ `include "VX_platform.vh" -//`TRACING_OFF +`TRACING_OFF module VX_pending_size #( parameter SIZE = 1, parameter INCRW = 1, @@ -34,97 +34,157 @@ module VX_pending_size #( ); `STATIC_ASSERT(INCRW <= SIZEW, ("invalid parameter: %d vs %d", INCRW, SIZEW)) `STATIC_ASSERT(DECRW <= SIZEW, ("invalid parameter: %d vs %d", DECRW, SIZEW)) - localparam ADDRW = `LOG2UP(SIZE); - reg empty_r, alm_empty_r; - reg full_r, alm_full_r; + if (SIZE == 1) begin : g_size_eq1 - if (INCRW != 1 || DECRW != 1) begin - - reg [SIZEW-1:0] size_r; - - wire [SIZEW-1:0] size_n = size_r + SIZEW'(incr) - SIZEW'(decr); + reg size_r; always @(posedge clk) begin if (reset) begin - empty_r <= 1; - alm_empty_r <= 1; - alm_full_r <= 0; - full_r <= 0; - size_r <= '0; + size_r <= '0; end else begin - `ASSERT((SIZEW'(incr) >= SIZEW'(decr)) || (size_n >= size_r), ("runtime error: counter overflow")); - `ASSERT((SIZEW'(incr) <= SIZEW'(decr)) || (size_n <= size_r), ("runtime error: counter underflow")); - size_r <= size_n; - empty_r <= (size_n == SIZEW'(0)); - alm_empty_r <= (size_n == SIZEW'(ALM_EMPTY)); - full_r <= (size_n == SIZEW'(SIZE)); - alm_full_r <= (size_n == SIZEW'(ALM_FULL)); + if (incr) begin + if (~decr) begin + size_r <= 1; + end + end else if (decr) begin + size_r <= '0; + end end end - assign size = size_r; + assign empty = (size_r == 0); + assign full = (size_r != 0); + assign alm_empty = 1'b1; + assign alm_full = 1'b1; + assign size = size_r; - end else begin + end else begin : g_size_gt1 - reg [ADDRW-1:0] used_r; - wire [ADDRW-1:0] used_n; + reg empty_r, alm_empty_r; + reg full_r, alm_full_r; - always @(posedge clk) begin - if (reset) begin - empty_r <= 1; - alm_empty_r <= 1; - full_r <= 0; - alm_full_r <= 0; - used_r <= '0; - end else begin - `ASSERT(~(incr && ~decr) || ~full, ("runtime error: counter overflow")); - `ASSERT(~(decr && ~incr) || ~empty, ("runtime error: counter underflow")); - if (incr) begin - if (~decr) begin - empty_r <= 0; - if (used_r == ADDRW'(ALM_EMPTY)) - alm_empty_r <= 0; - if (used_r == ADDRW'(SIZE-1)) - full_r <= 1; - if (used_r == ADDRW'(ALM_FULL-1)) - alm_full_r <= 1; + if (INCRW != 1 || DECRW != 1) begin : g_wide_step + + localparam SUBW = `MIN(SIZEW, `MAX(INCRW, DECRW)+1); + + logic [SIZEW-1:0] size_n, size_r; + + assign size_n = $signed(size_r) + SIZEW'($signed(SUBW'(incr) - SUBW'(decr))); + + always @(posedge clk) begin + if (reset) begin + empty_r <= 1; + full_r <= 0; + alm_empty_r <= 1; + alm_full_r <= 0; + size_r <= '0; + end else begin + `ASSERT((SIZEW'(incr) >= SIZEW'(decr)) || (size_n >= size_r), ("runtime error: counter overflow")); + `ASSERT((SIZEW'(incr) <= SIZEW'(decr)) || (size_n <= size_r), ("runtime error: counter underflow")); + empty_r <= (size_n == SIZEW'(0)); + full_r <= (size_n == SIZEW'(SIZE)); + alm_empty_r <= (size_n <= SIZEW'(ALM_EMPTY)); + alm_full_r <= (size_n >= SIZEW'(ALM_FULL)); + size_r <= size_n; + end + end + + assign size = size_r; + + end else begin : g_single_step + + localparam ADDRW = `LOG2UP(SIZE); + + reg [ADDRW-1:0] used_r; + + wire is_alm_empty = (used_r == ADDRW'(ALM_EMPTY)); + wire is_alm_empty_n = (used_r == ADDRW'(ALM_EMPTY+1)); + wire is_alm_full = (used_r == ADDRW'(ALM_FULL)); + wire is_alm_full_n = (used_r == ADDRW'(ALM_FULL-1)); + + always @(posedge clk) begin + if (reset) begin + alm_empty_r <= 1; + alm_full_r <= 0; + end else begin + if (incr) begin + if (~decr) begin + if (is_alm_empty) + alm_empty_r <= 0; + if (is_alm_full_n) + alm_full_r <= 1; + end + end else if (decr) begin + if (is_alm_full) + alm_full_r <= 0; + if (is_alm_empty_n) + alm_empty_r <= 1; end - end else if (decr) begin - if (used_r == ADDRW'(1)) - empty_r <= 1; - if (used_r == ADDRW'(ALM_EMPTY+1)) - alm_empty_r <= 1; - full_r <= 0; - if (used_r == ADDRW'(ALM_FULL)) - alm_full_r <= 0; end - used_r <= used_n; end - end - if (SIZE == 2) begin - assign used_n = used_r ^ (incr ^ decr); - end else begin - assign used_n = $signed(used_r) + ADDRW'($signed(2'(incr) - 2'(decr))); - end + if (SIZE > 2) begin : g_size_gt2 - if (SIZE > 1) begin - if (SIZEW > ADDRW) begin - assign size = {full_r, used_r}; - end else begin - assign size = used_r; + wire is_empty_n = (used_r == ADDRW'(1)); + wire is_full_n = (used_r == ADDRW'(SIZE-1)); + + wire [1:0] push_minus_pop = {~incr & decr, incr ^ decr}; + + always @(posedge clk) begin + if (reset) begin + empty_r <= 1; + full_r <= 0; + used_r <= '0; + end else begin + if (incr) begin + if (~decr) begin + empty_r <= 0; + if (is_full_n) + full_r <= 1; + end + end else if (decr) begin + full_r <= 0; + if (is_empty_n) + empty_r <= 1; + end + used_r <= $signed(used_r) + ADDRW'($signed(push_minus_pop)); + end + end + + end else begin : g_size_eq2 + + always @(posedge clk) begin + if (reset) begin + empty_r <= 1; + full_r <= 0; + used_r <= '0; + end else begin + empty_r <= (empty_r & ~incr) | (~full_r & decr & ~incr); + full_r <= (~empty_r & incr & ~decr) | (full_r & ~(decr ^ incr)); + used_r <= used_r ^ (incr ^ decr); + end + end + end + + if (SIZE > 1) begin : g_sizeN + if (SIZEW > ADDRW) begin : g_not_log2 + assign size = {full_r, used_r}; + end else begin : g_log2 + assign size = used_r; + end + end else begin : g_size1 + assign size = full_r; end - end else begin - assign size = full_r; + end - end + assign empty = empty_r; + assign full = full_r; + assign alm_empty = alm_empty_r; + assign alm_full = alm_full_r; - assign empty = empty_r; - assign alm_empty = alm_empty_r; - assign alm_full = alm_full_r; - assign full = full_r; + end endmodule -//`TRACING_ON +`TRACING_ON diff --git a/hw/rtl/libs/VX_pipe_buffer.sv b/hw/rtl/libs/VX_pipe_buffer.sv index 167235c17..5ba23bc08 100644 --- a/hw/rtl/libs/VX_pipe_buffer.sv +++ b/hw/rtl/libs/VX_pipe_buffer.sv @@ -24,8 +24,9 @@ `TRACING_OFF module VX_pipe_buffer #( - parameter DATAW = 1, - parameter DEPTH = 1 + parameter DATAW = 1, + parameter RESETW = 0, + parameter DEPTH = 1 ) ( input wire clk, input wire reset, @@ -36,16 +37,16 @@ module VX_pipe_buffer #( input wire ready_out, output wire valid_out ); - if (DEPTH == 0) begin + if (DEPTH == 0) begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) assign ready_in = ready_out; assign valid_out = valid_in; assign data_out = data_in; - end else begin + end else begin : g_register wire [DEPTH:0] valid; `IGNORE_UNOPTFLAT_BEGIN - wire [DEPTH:0] ready; + wire ready [DEPTH+1]; `IGNORE_UNOPTFLAT_END wire [DEPTH:0][DATAW-1:0] data; @@ -53,11 +54,11 @@ module VX_pipe_buffer #( assign data[0] = data_in; assign ready_in = ready[0]; - for (genvar i = 0; i < DEPTH; ++i) begin + for (genvar i = 0; i < DEPTH; ++i) begin : g_pipe_regs assign ready[i] = (ready[i+1] || ~valid[i+1]); VX_pipe_register #( .DATAW (1 + DATAW), - .RESETW (1) + .RESETW (1 + RESETW) ) pipe_register ( .clk (clk), .reset (reset), @@ -70,7 +71,6 @@ module VX_pipe_buffer #( assign valid_out = valid[DEPTH]; assign data_out = data[DEPTH]; assign ready[DEPTH] = ready_out; - end endmodule diff --git a/hw/rtl/libs/VX_pipe_register.sv b/hw/rtl/libs/VX_pipe_register.sv index 707438abd..ef19cb58b 100644 --- a/hw/rtl/libs/VX_pipe_register.sv +++ b/hw/rtl/libs/VX_pipe_register.sv @@ -17,8 +17,8 @@ module VX_pipe_register #( parameter DATAW = 1, parameter RESETW = 0, - parameter DEPTH = 1, - parameter MAX_FANOUT = 0 + parameter [`UP(RESETW)-1:0] INIT_VALUE = {`UP(RESETW){1'b0}}, + parameter DEPTH = 1 ) ( input wire clk, input wire reset, @@ -26,81 +26,61 @@ module VX_pipe_register #( input wire [DATAW-1:0] data_in, output wire [DATAW-1:0] data_out ); - if (DEPTH == 0) begin + if (DEPTH == 0) begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) `UNUSED_VAR (enable) assign data_out = data_in; - end else if (DEPTH == 1) begin - if (MAX_FANOUT != 0 && (DATAW > (MAX_FANOUT + MAX_FANOUT/2))) begin - localparam NUM_SLICES = `CDIV(DATAW, MAX_FANOUT); - localparam N_DATAW = DATAW / NUM_SLICES; - for (genvar i = 0; i < NUM_SLICES; ++i) begin - localparam SLICE_START = i * N_DATAW; - localparam SLICE_END = SLICE_START + S_DATAW - 1; - localparam S_DATAW = (i == NUM_SLICES-1) ? (DATAW - SLICE_START) : N_DATAW; - localparam S_RESETW = (SLICE_END >= (DATAW - RESETW)) ? - ((SLICE_START >= (DATAW - RESETW)) ? S_DATAW : (SLICE_END - (DATAW - RESETW) + 1)) : 0; - VX_pipe_register #( - .DATAW (S_DATAW), - .RESETW (S_RESETW) - ) pipe_register_slice ( - .clk (clk), - .reset (reset), - .enable (enable), - .data_in (data_in[i * N_DATAW +: S_DATAW]), - .data_out (data_out[i * N_DATAW +: S_DATAW]) - ); - end - end else begin - if (RESETW == 0) begin - `UNUSED_VAR (reset) - reg [DATAW-1:0] value; + end else if (DEPTH == 1) begin : g_depth1 + if (RESETW == 0) begin : g_no_reset + `UNUSED_VAR (reset) + reg [DATAW-1:0] value; - always @(posedge clk) begin - if (enable) begin - value <= data_in; - end + always @(posedge clk) begin + if (enable) begin + value <= data_in; end - assign data_out = value; - end else if (RESETW == DATAW) begin - reg [DATAW-1:0] value; + end + assign data_out = value; + end else if (RESETW < DATAW) begin : g_partial_reset + reg [DATAW-RESETW-1:0] value_d; + reg [RESETW-1:0] value_r; - always @(posedge clk) begin - if (reset) begin - value <= RESETW'(0); - end else if (enable) begin - value <= data_in; - end + always @(posedge clk) begin + if (reset) begin + value_r <= INIT_VALUE; + end else if (enable) begin + value_r <= data_in[DATAW-1:DATAW-RESETW]; end - assign data_out = value; - end else begin - reg [DATAW-RESETW-1:0] value_d; - reg [RESETW-1:0] value_r; + end - always @(posedge clk) begin - if (reset) begin - value_r <= RESETW'(0); - end else if (enable) begin - value_r <= data_in[DATAW-1:DATAW-RESETW]; - end + always @(posedge clk) begin + if (enable) begin + value_d <= data_in[DATAW-RESETW-1:0]; end + end + assign data_out = {value_r, value_d}; + end else begin : g_full_reset + reg [DATAW-1:0] value; - always @(posedge clk) begin - if (enable) begin - value_d <= data_in[DATAW-RESETW-1:0]; - end + always @(posedge clk) begin + if (reset) begin + value <= INIT_VALUE; + end else if (enable) begin + value <= data_in; end - assign data_out = {value_r, value_d}; end + assign data_out = value; end - end else begin + end else begin : g_recursive wire [DEPTH:0][DATAW-1:0] data_delayed; assign data_delayed[0] = data_in; - for (genvar i = 1; i <= DEPTH; ++i) begin + + for (genvar i = 1; i <= DEPTH; ++i) begin : g_pipe_reg VX_pipe_register #( .DATAW (DATAW), - .RESETW (RESETW) + .RESETW (RESETW), + .INIT_VALUE (INIT_VALUE) ) pipe_reg ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_popcount.sv b/hw/rtl/libs/VX_popcount.sv index eaec78789..fa8c49099 100644 --- a/hw/rtl/libs/VX_popcount.sv +++ b/hw/rtl/libs/VX_popcount.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -21,21 +21,21 @@ module VX_popcount63( reg [2:0] sum; always @(*) begin case (data_in) - 6'd0: sum=3'd0; 6'd1: sum=3'd1; 6'd2: sum=3'd1; 6'd3: sum=3'd2; + 6'd0: sum=3'd0; 6'd1: sum=3'd1; 6'd2: sum=3'd1; 6'd3: sum=3'd2; 6'd4: sum=3'd1; 6'd5: sum=3'd2; 6'd6: sum=3'd2; 6'd7: sum=3'd3; - 6'd8: sum=3'd1; 6'd9: sum=3'd2; 6'd10: sum=3'd2; 6'd11: sum=3'd3; + 6'd8: sum=3'd1; 6'd9: sum=3'd2; 6'd10: sum=3'd2; 6'd11: sum=3'd3; 6'd12: sum=3'd2; 6'd13: sum=3'd3; 6'd14: sum=3'd3; 6'd15: sum=3'd4; - 6'd16: sum=3'd1; 6'd17: sum=3'd2; 6'd18: sum=3'd2; 6'd19: sum=3'd3; + 6'd16: sum=3'd1; 6'd17: sum=3'd2; 6'd18: sum=3'd2; 6'd19: sum=3'd3; 6'd20: sum=3'd2; 6'd21: sum=3'd3; 6'd22: sum=3'd3; 6'd23: sum=3'd4; - 6'd24: sum=3'd2; 6'd25: sum=3'd3; 6'd26: sum=3'd3; 6'd27: sum=3'd4; + 6'd24: sum=3'd2; 6'd25: sum=3'd3; 6'd26: sum=3'd3; 6'd27: sum=3'd4; 6'd28: sum=3'd3; 6'd29: sum=3'd4; 6'd30: sum=3'd4; 6'd31: sum=3'd5; - 6'd32: sum=3'd1; 6'd33: sum=3'd2; 6'd34: sum=3'd2; 6'd35: sum=3'd3; + 6'd32: sum=3'd1; 6'd33: sum=3'd2; 6'd34: sum=3'd2; 6'd35: sum=3'd3; 6'd36: sum=3'd2; 6'd37: sum=3'd3; 6'd38: sum=3'd3; 6'd39: sum=3'd4; - 6'd40: sum=3'd2; 6'd41: sum=3'd3; 6'd42: sum=3'd3; 6'd43: sum=3'd4; + 6'd40: sum=3'd2; 6'd41: sum=3'd3; 6'd42: sum=3'd3; 6'd43: sum=3'd4; 6'd44: sum=3'd3; 6'd45: sum=3'd4; 6'd46: sum=3'd4; 6'd47: sum=3'd5; - 6'd48: sum=3'd2; 6'd49: sum=3'd3; 6'd50: sum=3'd3; 6'd51: sum=3'd4; + 6'd48: sum=3'd2; 6'd49: sum=3'd3; 6'd50: sum=3'd3; 6'd51: sum=3'd4; 6'd52: sum=3'd3; 6'd53: sum=3'd4; 6'd54: sum=3'd4; 6'd55: sum=3'd5; - 6'd56: sum=3'd3; 6'd57: sum=3'd4; 6'd58: sum=3'd4; 6'd59: sum=3'd5; + 6'd56: sum=3'd3; 6'd57: sum=3'd4; 6'd58: sum=3'd4; 6'd59: sum=3'd5; 6'd60: sum=3'd4; 6'd61: sum=3'd5; 6'd62: sum=3'd5; 6'd63: sum=3'd6; endcase end @@ -49,7 +49,7 @@ module VX_popcount32( reg [1:0] sum; always @(*) begin case (data_in) - 3'd0: sum=2'd0; 3'd1: sum=2'd1; 3'd2: sum=2'd1; 3'd3: sum=2'd2; + 3'd0: sum=2'd0; 3'd1: sum=2'd1; 3'd2: sum=2'd1; 3'd3: sum=2'd2; 3'd4: sum=2'd1; 3'd5: sum=2'd2; 3'd6: sum=2'd2; 3'd7: sum=2'd3; endcase end @@ -88,23 +88,23 @@ endmodule module VX_popcount #( parameter MODEL = 1, parameter N = 1, - parameter M = `CLOG2(N+1) + parameter M = `CLOG2(N+1) ) ( input wire [N-1:0] data_in, output wire [M-1:0] data_out ); - `UNUSED_PARAM (MODEL) + `UNUSED_PARAM (MODEL) `ifndef SYNTHESIS assign data_out = $countones(data_in); `elsif QUARTUS assign data_out = $countones(data_in); `else - if (N == 1) begin + if (N == 1) begin : g_passthru assign data_out = data_in; - end else if (N <= 3) begin + end else if (N <= 3) begin : g_popcount3 reg [2:0] t_in; wire [1:0] t_out; @@ -113,10 +113,10 @@ module VX_popcount #( t_in[N-1:0] = data_in; end VX_popcount32 pc32(t_in, t_out); - assign data_out = t_out[M-1:0]; - - end else if (N <= 6) begin - + assign data_out = t_out[M-1:0]; + + end else if (N <= 6) begin : g_popcount6 + reg [5:0] t_in; wire [2:0] t_out; always @(*) begin @@ -125,9 +125,9 @@ module VX_popcount #( end VX_popcount63 pc63(t_in, t_out); assign data_out = t_out[M-1:0]; - - end else if (N <= 9) begin - + + end else if (N <= 9) begin : g_popcount9 + reg [8:0] t_in; wire [4:0] t1_out; wire [3:0] t2_out; @@ -140,8 +140,8 @@ module VX_popcount #( VX_sum33 sum33(t1_out[2:0], {1'b0, t1_out[4:3]}, t2_out); assign data_out = t2_out[M-1:0]; - end else if (N <= 12) begin - + end else if (N <= 12) begin : g_popcount12 + reg [11:0] t_in; wire [5:0] t1_out; wire [3:0] t2_out; @@ -154,8 +154,8 @@ module VX_popcount #( VX_sum33 sum33(t1_out[2:0], t1_out[5:3], t2_out); assign data_out = t2_out[M-1:0]; - end else if (N <= 18) begin - + end else if (N <= 18) begin : g_popcount18 + reg [17:0] t_in; wire [8:0] t1_out; wire [5:0] t2_out; @@ -171,23 +171,23 @@ module VX_popcount #( VX_popcount32 pc32c({t1_out[2], t1_out[5], t1_out[8]}, t2_out[5:4]); assign data_out = {2'b0,t2_out[1:0]} + {1'b0,t2_out[3:2],1'b0} + {t2_out[5:4],2'b0}; - end else if (MODEL == 1) begin + end else if (MODEL == 1) begin : g_model1 localparam PN = 1 << `CLOG2(N); localparam LOGPN = `CLOG2(PN); `IGNORE_UNOPTFLAT_BEGIN - wire [M-1:0] tmp [LOGPN-1:0][PN-1:0]; + wire [M-1:0] tmp [LOGPN-1:0][PN-1:0]; `IGNORE_UNOPTFLAT_END for (genvar j = 0; j < LOGPN; ++j) begin localparam D = j + 1; localparam Q = (D < LOGPN) ? (D + 1) : M; - for (genvar i = 0; i < (1 << (LOGPN-j-1)); ++i) begin + for (genvar i = 0; i < (1 << (LOGPN-j-1)); ++i) begin localparam l = i * 2; localparam r = i * 2 + 1; - wire [Q-1:0] res; - if (j == 0) begin + wire [Q-1:0] res; + if (j == 0) begin if (r < N) begin assign res = data_in[l] + data_in[r]; end else if (l < N) begin @@ -203,20 +203,20 @@ module VX_popcount #( end assign data_out = tmp[LOGPN-1][0]; - - end else begin - reg [M-1:0] cnt_r; + end else begin : g_model2 + + reg [M-1:0] cnt_w; always @(*) begin - cnt_r = '0; + cnt_w = '0; for (integer i = 0; i < N; ++i) begin - cnt_r = cnt_r + M'(data_in[i]); + cnt_w = cnt_w + M'(data_in[i]); end end - assign data_out = cnt_r; - + assign data_out = cnt_w; + end `endif diff --git a/hw/rtl/libs/VX_priority_arbiter.sv b/hw/rtl/libs/VX_priority_arbiter.sv index cd4844d25..de5a3b3b1 100644 --- a/hw/rtl/libs/VX_priority_arbiter.sv +++ b/hw/rtl/libs/VX_priority_arbiter.sv @@ -23,21 +23,21 @@ module VX_priority_arbiter #( output wire [NUM_REQS-1:0] grant_onehot, output wire grant_valid ); - if (NUM_REQS == 1) begin + if (NUM_REQS == 1) begin : g_passthru assign grant_index = '0; assign grant_onehot = requests; assign grant_valid = requests[0]; - end else begin + end else begin : g_encoder VX_priority_encoder #( .N (NUM_REQS) ) priority_encoder ( - .data_in (requests), - .index (grant_index), - .onehot (grant_onehot), - .valid_out (grant_valid) + .data_in (requests), + .index_out (grant_index), + .onehot_out (grant_onehot), + .valid_out (grant_valid) ); end diff --git a/hw/rtl/libs/VX_priority_encoder.sv b/hw/rtl/libs/VX_priority_encoder.sv index 5a08e3412..444c40683 100644 --- a/hw/rtl/libs/VX_priority_encoder.sv +++ b/hw/rtl/libs/VX_priority_encoder.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -14,116 +14,117 @@ `include "VX_platform.vh" `TRACING_OFF -module VX_priority_encoder #( - parameter N = 1, +module VX_priority_encoder #( + parameter N = 1, parameter REVERSE = 0, parameter MODEL = 1, parameter LN = `LOG2UP(N) ) ( - input wire [N-1:0] data_in, - output wire [N-1:0] onehot, - output wire [LN-1:0] index, + input wire [N-1:0] data_in, + output wire [N-1:0] onehot_out, + output wire [LN-1:0] index_out, output wire valid_out ); - wire [N-1:0] reversed; + wire [N-1:0] reversed; - if (REVERSE != 0) begin - for (genvar i = 0; i < N; ++i) begin + if (REVERSE != 0) begin : g_reverse + for (genvar i = 0; i < N; ++i) begin : g_i assign reversed[N-i-1] = data_in[i]; - end - end else begin + end + end else begin : g_no_reverse assign reversed = data_in; end - if (N == 1) begin + if (N == 1) begin : g_n1 - assign onehot = reversed; - assign index = '0; - assign valid_out = reversed; + assign onehot_out = reversed; + assign index_out = '0; + assign valid_out = reversed; - end else if (N == 2) begin + end else if (N == 2) begin : g_n2 - assign onehot = {~reversed[0], reversed[0]}; - assign index = ~reversed[0]; - assign valid_out = (| reversed); + assign onehot_out = {reversed[1] && ~reversed[0], reversed[0]}; + assign index_out = ~reversed[0]; + assign valid_out = (| reversed); - end else if (MODEL == 1) begin + end else if (MODEL == 1) begin : g_model1 - wire [N-1:0] scan_lo; + `IGNORE_UNOPTFLAT_BEGIN + wire [N-1:0] higher_pri_regs; + `IGNORE_UNOPTFLAT_END - VX_scan #( - .N (N), - .OP (2) - ) scan ( - .data_in (reversed), - .data_out (scan_lo) - ); + assign higher_pri_regs[0] = 1'b0; + for (genvar i = 1; i < N; ++i) begin : g_higher_pri_regs + assign higher_pri_regs[i] = higher_pri_regs[i-1] | reversed[i-1]; + end + assign onehot_out[N-1:0] = reversed[N-1:0] & ~higher_pri_regs[N-1:0]; VX_lzc #( .N (N), .REVERSE (1) ) lzc ( - .data_in (reversed), - .data_out (index), - `UNUSED_PIN (valid_out) + .data_in (reversed), + .data_out (index_out), + .valid_out (valid_out) ); - assign onehot = scan_lo & {(~scan_lo[N-2:0]), 1'b1}; - assign valid_out = scan_lo[N-1]; + end else if (MODEL == 2) begin : g_model2 - end else if (MODEL == 2) begin + wire [N-1:0] scan_lo; - `IGNORE_WARNINGS_BEGIN - wire [N-1:0] higher_pri_regs; - `IGNORE_WARNINGS_END - assign higher_pri_regs[N-1:1] = higher_pri_regs[N-2:0] | reversed[N-2:0]; - assign higher_pri_regs[0] = 1'b0; - assign onehot[N-1:0] = reversed[N-1:0] & ~higher_pri_regs[N-1:0]; + VX_scan #( + .N (N), + .OP ("|") + ) scan ( + .data_in (reversed), + .data_out (scan_lo) + ); VX_lzc #( .N (N), .REVERSE (1) ) lzc ( - .data_in (reversed), - .data_out (index), - .valid_out (valid_out) + .data_in (reversed), + .data_out (index_out), + .valid_out(valid_out) ); - end else if (MODEL == 3) begin + assign onehot_out = scan_lo & {(~scan_lo[N-2:0]), 1'b1}; + + end else if (MODEL == 3) begin : g_model3 - assign onehot = reversed & -reversed; + assign onehot_out = reversed & -reversed; VX_lzc #( .N (N), .REVERSE (1) ) lzc ( .data_in (reversed), - .data_out (index), + .data_out (index_out), .valid_out (valid_out) ); - end else begin + end else begin : g_model0 - reg [LN-1:0] index_r; - reg [N-1:0] onehot_r; + reg [LN-1:0] index_w; + reg [N-1:0] onehot_w; always @(*) begin - index_r = 'x; - onehot_r = 'x; + index_w = 'x; + onehot_w = 'x; for (integer i = N-1; i >= 0; --i) begin if (reversed[i]) begin - index_r = LN'(i); - onehot_r = '0; - onehot_r[i] = 1'b1; + index_w = LN'(i); + onehot_w = N'(1) << i; end end - end + end - assign index = index_r; - assign onehot = onehot_r; - assign valid_out = (| reversed); + assign index_out = index_w; + assign onehot_out = onehot_w; + assign valid_out = (| reversed); - end + end endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_reduce.sv b/hw/rtl/libs/VX_reduce.sv index ac0117567..15c0f0228 100644 --- a/hw/rtl/libs/VX_reduce.sv +++ b/hw/rtl/libs/VX_reduce.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -14,7 +14,7 @@ `include "VX_platform.vh" `TRACING_OFF -module VX_reduce #( +module VX_reduce #( parameter DATAW_IN = 1, parameter DATAW_OUT = DATAW_IN, parameter N = 1, @@ -23,9 +23,9 @@ module VX_reduce #( input wire [N-1:0][DATAW_IN-1:0] data_in, output wire [DATAW_OUT-1:0] data_out ); - if (N == 1) begin + if (N == 1) begin : g_passthru assign data_out = DATAW_OUT'(data_in[0]); - end else begin + end else begin : g_reduce localparam int N_A = N / 2; localparam int N_B = N - N_A; @@ -33,40 +33,46 @@ module VX_reduce #( wire [N_B-1:0][DATAW_IN-1:0] in_B; wire [DATAW_OUT-1:0] out_A, out_B; - for (genvar i = 0; i < N_A; i++) begin + for (genvar i = 0; i < N_A; i++) begin : g_in_A assign in_A[i] = data_in[i]; end - for (genvar i = 0; i < N_B; i++) begin + for (genvar i = 0; i < N_B; i++) begin : g_in_B assign in_B[i] = data_in[N_A + i]; end VX_reduce #( - .DATAW_IN (DATAW_IN), + .DATAW_IN (DATAW_IN), .DATAW_OUT (DATAW_OUT), .N (N_A), .OP (OP) ) reduce_A ( - .data_in (in_A), + .data_in (in_A), .data_out (out_A) ); VX_reduce #( - .DATAW_IN (DATAW_IN), + .DATAW_IN (DATAW_IN), .DATAW_OUT (DATAW_OUT), .N (N_B), .OP (OP) ) reduce_B ( - .data_in (in_B), + .data_in (in_B), .data_out (out_B) ); - if (OP == "+") assign data_out = out_A + out_B; - else if (OP == "^") assign data_out = out_A ^ out_B; - else if (OP == "&") assign data_out = out_A & out_B; - else if (OP == "|") assign data_out = out_A | out_B; - else `ERROR(("invalid parameter")); + if (OP == "+") begin : g_plus + assign data_out = out_A + out_B; + end else if (OP == "^") begin : g_xor + assign data_out = out_A ^ out_B; + end else if (OP == "&") begin : g_and + assign data_out = out_A & out_B; + end else if (OP == "|") begin : g_or + assign data_out = out_A | out_B; + end else begin : g_error + `ERROR(("invalid parameter")); + end end - + endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_reset_relay.sv b/hw/rtl/libs/VX_reset_relay.sv index d7e735c25..0e2a7f4ca 100644 --- a/hw/rtl/libs/VX_reset_relay.sv +++ b/hw/rtl/libs/VX_reset_relay.sv @@ -22,19 +22,19 @@ module VX_reset_relay #( input wire reset, output wire [N-1:0] reset_o ); - if (MAX_FANOUT >= 0 && N > (MAX_FANOUT + MAX_FANOUT/2)) begin + if (MAX_FANOUT >= 0 && N > (MAX_FANOUT + MAX_FANOUT/2)) begin : g_relay localparam F = `UP(MAX_FANOUT); localparam R = N / F; `PRESERVE_NET reg [R-1:0] reset_r; - for (genvar i = 0; i < R; ++i) begin + for (genvar i = 0; i < R; ++i) begin : g_reset_r always @(posedge clk) begin reset_r[i] <= reset; end end - for (genvar i = 0; i < N; ++i) begin + for (genvar i = 0; i < N; ++i) begin : g_reset_o assign reset_o[i] = reset_r[i / F]; end - end else begin + end else begin : g_passthru `UNUSED_VAR (clk) assign reset_o = {N{reset}}; end diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index 52a981184..efe9838d6 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -28,7 +28,7 @@ module VX_rr_arbiter #( output wire grant_valid, input wire grant_ready ); - if (NUM_REQS == 1) begin + if (NUM_REQS == 1) begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) @@ -38,17 +38,19 @@ module VX_rr_arbiter #( assign grant_onehot = requests; assign grant_valid = requests[0]; - end else if (LUT_OPT && NUM_REQS == 2) begin + end else if (LUT_OPT && NUM_REQS == 2) begin : g_lut2 - reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [NUM_REQS-1:0] grant_onehot_r; - reg [LOG_NUM_REQS-1:0] state; + reg [LOG_NUM_REQS-1:0] grant_index_w; + reg [NUM_REQS-1:0] grant_onehot_w; + reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) 3'b0_01, - 3'b1_?1: begin grant_onehot_r = 2'b01; grant_index_r = LOG_NUM_REQS'(0); end - default: begin grant_onehot_r = 2'b10; grant_index_r = LOG_NUM_REQS'(1); end + 3'b1_?1: begin grant_onehot_w = 2'b01; grant_index_w = LOG_NUM_REQS'(0); end + 3'b0_1?, + 3'b1_10: begin grant_onehot_w = 2'b10; grant_index_w = LOG_NUM_REQS'(1); end + default: begin grant_onehot_w = 2'b00; grant_index_w = 'x; end endcase end @@ -56,29 +58,32 @@ module VX_rr_arbiter #( if (reset) begin state <= '0; end else if (grant_ready) begin - state <= grant_index_r; + state <= grant_index_w; end end - assign grant_index = grant_index_r; - assign grant_onehot = grant_onehot_r; + assign grant_index = grant_index_w; + assign grant_onehot = grant_onehot_w; assign grant_valid = (| requests); - end else if (LUT_OPT && NUM_REQS == 3) begin + end else if (LUT_OPT && NUM_REQS == 3) begin : g_lut3 - reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [NUM_REQS-1:0] grant_onehot_r; - reg [LOG_NUM_REQS-1:0] state; + reg [LOG_NUM_REQS-1:0] grant_index_w; + reg [NUM_REQS-1:0] grant_onehot_w; + reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) 5'b00_001, 5'b01_0?1, - 5'b10_??1: begin grant_onehot_r = 3'b001; grant_index_r = LOG_NUM_REQS'(0); end + 5'b10_??1: begin grant_onehot_w = 3'b001; grant_index_w = LOG_NUM_REQS'(0); end 5'b00_?1?, 5'b01_010, - 5'b10_?10: begin grant_onehot_r = 3'b010; grant_index_r = LOG_NUM_REQS'(1); end - default: begin grant_onehot_r = 3'b100; grant_index_r = LOG_NUM_REQS'(2); end + 5'b10_?10: begin grant_onehot_w = 3'b010; grant_index_w = LOG_NUM_REQS'(1); end + 5'b00_10?, + 5'b01_1??, + 5'b10_100: begin grant_onehot_w = 3'b100; grant_index_w = LOG_NUM_REQS'(2); end + default: begin grant_onehot_w = 3'b000; grant_index_w = 'x; end endcase end @@ -86,35 +91,39 @@ module VX_rr_arbiter #( if (reset) begin state <= '0; end else if (grant_ready) begin - state <= grant_index_r; + state <= grant_index_w; end end - assign grant_index = grant_index_r; - assign grant_onehot = grant_onehot_r; + assign grant_index = grant_index_w; + assign grant_onehot = grant_onehot_w; assign grant_valid = (| requests); - end else if (LUT_OPT && NUM_REQS == 4) begin + end else if (LUT_OPT && NUM_REQS == 4) begin : g_lut4 - reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [NUM_REQS-1:0] grant_onehot_r; - reg [LOG_NUM_REQS-1:0] state; + reg [LOG_NUM_REQS-1:0] grant_index_w; + reg [NUM_REQS-1:0] grant_onehot_w; + reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) 6'b00_0001, 6'b01_00?1, 6'b10_0??1, - 6'b11_???1: begin grant_onehot_r = 4'b0001; grant_index_r = LOG_NUM_REQS'(0); end + 6'b11_???1: begin grant_onehot_w = 4'b0001; grant_index_w = LOG_NUM_REQS'(0); end 6'b00_??1?, 6'b01_0010, 6'b10_0?10, - 6'b11_??10: begin grant_onehot_r = 4'b0010; grant_index_r = LOG_NUM_REQS'(1); end + 6'b11_??10: begin grant_onehot_w = 4'b0010; grant_index_w = LOG_NUM_REQS'(1); end 6'b00_?10?, 6'b01_?1??, 6'b10_0100, - 6'b11_?100: begin grant_onehot_r = 4'b0100; grant_index_r = LOG_NUM_REQS'(2); end - default: begin grant_onehot_r = 4'b1000; grant_index_r = LOG_NUM_REQS'(3); end + 6'b11_?100: begin grant_onehot_w = 4'b0100; grant_index_w = LOG_NUM_REQS'(2); end + 6'b00_100?, + 6'b01_10??, + 6'b10_1???, + 6'b11_1000: begin grant_onehot_w = 4'b1000; grant_index_w = LOG_NUM_REQS'(3); end + default: begin grant_onehot_w = 4'b0000; grant_index_w = 'x; end endcase end @@ -122,19 +131,19 @@ module VX_rr_arbiter #( if (reset) begin state <= '0; end else if (grant_ready) begin - state <= grant_index_r; + state <= grant_index_w; end end - assign grant_index = grant_index_r; - assign grant_onehot = grant_onehot_r; + assign grant_index = grant_index_w; + assign grant_onehot = grant_onehot_w; assign grant_valid = (| requests); - end else if (LUT_OPT && NUM_REQS == 5) begin + end else if (LUT_OPT && NUM_REQS == 5) begin : g_lut5 - reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [NUM_REQS-1:0] grant_onehot_r; - reg [LOG_NUM_REQS-1:0] state; + reg [LOG_NUM_REQS-1:0] grant_index_w; + reg [NUM_REQS-1:0] grant_onehot_w; + reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) @@ -142,23 +151,28 @@ module VX_rr_arbiter #( 8'b001_000?1, 8'b010_00??1, 8'b011_0???1, - 8'b100_????1: begin grant_onehot_r = 5'b00001; grant_index_r = LOG_NUM_REQS'(0); end + 8'b100_????1: begin grant_onehot_w = 5'b00001; grant_index_w = LOG_NUM_REQS'(0); end 8'b000_???1?, 8'b001_00010, 8'b010_00?10, 8'b011_0??10, - 8'b100_???10: begin grant_onehot_r = 5'b00010; grant_index_r = LOG_NUM_REQS'(1); end + 8'b100_???10: begin grant_onehot_w = 5'b00010; grant_index_w = LOG_NUM_REQS'(1); end 8'b000_??10?, 8'b001_??1??, 8'b010_00100, 8'b011_0?100, - 8'b100_??100: begin grant_onehot_r = 5'b00100; grant_index_r = LOG_NUM_REQS'(2); end + 8'b100_??100: begin grant_onehot_w = 5'b00100; grant_index_w = LOG_NUM_REQS'(2); end 8'b000_?100?, 8'b001_?10??, 8'b010_?1???, 8'b011_01000, - 8'b100_?1000: begin grant_onehot_r = 5'b01000; grant_index_r = LOG_NUM_REQS'(3); end - default: begin grant_onehot_r = 5'b10000; grant_index_r = LOG_NUM_REQS'(4); end + 8'b100_?1000: begin grant_onehot_w = 5'b01000; grant_index_w = LOG_NUM_REQS'(3); end + 8'b000_1000?, + 8'b001_100??, + 8'b010_10???, + 8'b011_1????, + 8'b100_10000: begin grant_onehot_w = 5'b10000; grant_index_w = LOG_NUM_REQS'(4); end + default: begin grant_onehot_w = 5'b00000; grant_index_w = 'x; end endcase end @@ -166,19 +180,19 @@ module VX_rr_arbiter #( if (reset) begin state <= '0; end else if (grant_ready) begin - state <= grant_index_r; + state <= grant_index_w; end end - assign grant_index = grant_index_r; - assign grant_onehot = grant_onehot_r; + assign grant_index = grant_index_w; + assign grant_onehot = grant_onehot_w; assign grant_valid = (| requests); - end else if (LUT_OPT && NUM_REQS == 6) begin + end else if (LUT_OPT && NUM_REQS == 6) begin : g_lut6 - reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [NUM_REQS-1:0] grant_onehot_r; - reg [LOG_NUM_REQS-1:0] state; + reg [LOG_NUM_REQS-1:0] grant_index_w; + reg [NUM_REQS-1:0] grant_onehot_w; + reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) @@ -187,32 +201,38 @@ module VX_rr_arbiter #( 9'b010_000??1, 9'b011_00???1, 9'b100_0????1, - 9'b101_?????1: begin grant_onehot_r = 6'b000001; grant_index_r = LOG_NUM_REQS'(0); end + 9'b101_?????1: begin grant_onehot_w = 6'b000001; grant_index_w = LOG_NUM_REQS'(0); end 9'b000_????1?, 9'b001_000010, 9'b010_000?10, 9'b011_00??10, 9'b100_0???10, - 9'b101_????10: begin grant_onehot_r = 6'b000010; grant_index_r = LOG_NUM_REQS'(1); end + 9'b101_????10: begin grant_onehot_w = 6'b000010; grant_index_w = LOG_NUM_REQS'(1); end 9'b000_???10?, 9'b001_???1??, 9'b010_000100, 9'b011_00?100, 9'b100_0??100, - 9'b101_???100: begin grant_onehot_r = 6'b000100; grant_index_r = LOG_NUM_REQS'(2); end + 9'b101_???100: begin grant_onehot_w = 6'b000100; grant_index_w = LOG_NUM_REQS'(2); end 9'b000_??100?, 9'b001_??10??, 9'b010_??1???, 9'b011_001000, 9'b100_0?1000, - 9'b101_??1000: begin grant_onehot_r = 6'b001000; grant_index_r = LOG_NUM_REQS'(3); end + 9'b101_??1000: begin grant_onehot_w = 6'b001000; grant_index_w = LOG_NUM_REQS'(3); end 9'b000_?1000?, 9'b001_?100??, 9'b010_?10???, 9'b011_?1????, 9'b100_010000, - 9'b101_?10000: begin grant_onehot_r = 6'b010000; grant_index_r = LOG_NUM_REQS'(4); end - default: begin grant_onehot_r = 6'b100000; grant_index_r = LOG_NUM_REQS'(5); end + 9'b101_?10000: begin grant_onehot_w = 6'b010000; grant_index_w = LOG_NUM_REQS'(4); end + 9'b000_10000?, + 9'b001_1000??, + 9'b010_100???, + 9'b011_10????, + 9'b100_1?????, + 9'b101_100000: begin grant_onehot_w = 6'b100000; grant_index_w = LOG_NUM_REQS'(5); end + default: begin grant_onehot_w = 6'b000000; grant_index_w = 'x; end endcase end @@ -220,65 +240,72 @@ module VX_rr_arbiter #( if (reset) begin state <= '0; end else if (grant_ready) begin - state <= grant_index_r; + state <= grant_index_w; end end - assign grant_index = grant_index_r; - assign grant_onehot = grant_onehot_r; + assign grant_index = grant_index_w; + assign grant_onehot = grant_onehot_w; assign grant_valid = (| requests); - end else if (LUT_OPT && NUM_REQS == 7) begin + end else if (LUT_OPT && NUM_REQS == 7) begin : g_lut7 - reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [NUM_REQS-1:0] grant_onehot_r; - reg [LOG_NUM_REQS-1:0] state; + reg [LOG_NUM_REQS-1:0] grant_index_w; + reg [NUM_REQS-1:0] grant_onehot_w; + reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) - 10'b000_000001, - 10'b001_0000?1, - 10'b010_000??1, - 10'b011_00???1, - 10'b100_00???1, - 10'b101_0????1, - 10'b110_?????1: begin grant_onehot_r = 7'b0000001; grant_index_r = LOG_NUM_REQS'(0); end + 10'b000_0000001, + 10'b001_00000?1, + 10'b010_0000??1, + 10'b011_000???1, + 10'b100_000???1, + 10'b101_00????1, + 10'b110_??????1: begin grant_onehot_w = 7'b0000001; grant_index_w = LOG_NUM_REQS'(0); end 10'b000_?????1?, 10'b001_0000010, 10'b010_0000?10, 10'b011_000??10, 10'b100_00???10, 10'b101_0????10, - 10'b110_?????10: begin grant_onehot_r = 7'b0000010; grant_index_r = LOG_NUM_REQS'(1); end + 10'b110_?????10: begin grant_onehot_w = 7'b0000010; grant_index_w = LOG_NUM_REQS'(1); end 10'b000_????10?, 10'b001_????1??, 10'b010_0000100, 10'b011_000?100, 10'b100_00??100, 10'b101_0???100, - 10'b110_????100: begin grant_onehot_r = 7'b0000100; grant_index_r = LOG_NUM_REQS'(2); end + 10'b110_????100: begin grant_onehot_w = 7'b0000100; grant_index_w = LOG_NUM_REQS'(2); end 10'b000_???100?, 10'b001_???10??, 10'b010_???1???, 10'b011_0001000, 10'b100_00?1000, 10'b101_0??1000, - 10'b110_???1000: begin grant_onehot_r = 7'b0001000; grant_index_r = LOG_NUM_REQS'(3); end + 10'b110_???1000: begin grant_onehot_w = 7'b0001000; grant_index_w = LOG_NUM_REQS'(3); end 10'b000_??1000?, 10'b001_??100??, 10'b010_??10???, 10'b011_??1????, 10'b100_0010000, 10'b101_0?10000, - 10'b110_??10000: begin grant_onehot_r = 7'b0010000; grant_index_r = LOG_NUM_REQS'(4); end + 10'b110_??10000: begin grant_onehot_w = 7'b0010000; grant_index_w = LOG_NUM_REQS'(4); end 10'b000_?10000?, 10'b001_?1000??, 10'b010_?100???, 10'b011_?10????, 10'b100_?1?????, 10'b101_0100000, - 10'b110_?100000: begin grant_onehot_r = 7'b0100000; grant_index_r = LOG_NUM_REQS'(5); end - default: begin grant_onehot_r = 7'b1000000; grant_index_r = LOG_NUM_REQS'(6); end + 10'b110_?100000: begin grant_onehot_w = 7'b0100000; grant_index_w = LOG_NUM_REQS'(5); end + 10'b000_100000?, + 10'b001_10000??, + 10'b010_1000???, + 10'b011_100????, + 10'b100_10?????, + 10'b101_1??????, + 10'b110_1000000: begin grant_onehot_w = 7'b1000000; grant_index_w = LOG_NUM_REQS'(6); end + default: begin grant_onehot_w = 7'b0000000; grant_index_w = 'x; end endcase end @@ -286,19 +313,19 @@ module VX_rr_arbiter #( if (reset) begin state <= '0; end else if (grant_ready) begin - state <= grant_index_r; + state <= grant_index_w; end end - assign grant_index = grant_index_r; - assign grant_onehot = grant_onehot_r; + assign grant_index = grant_index_w; + assign grant_onehot = grant_onehot_w; assign grant_valid = (| requests); - end else if (LUT_OPT && NUM_REQS == 8) begin + end else if (LUT_OPT && NUM_REQS == 8) begin : g_lut8 - reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [NUM_REQS-1:0] grant_onehot_r; - reg [LOG_NUM_REQS-1:0] state; + reg [LOG_NUM_REQS-1:0] grant_index_w; + reg [NUM_REQS-1:0] grant_onehot_w; + reg [LOG_NUM_REQS-1:0] state; always @(*) begin casez ({state, requests}) @@ -309,7 +336,7 @@ module VX_rr_arbiter #( 11'b100_000????1, 11'b101_00?????1, 11'b110_0??????1, - 11'b111_???????1: begin grant_onehot_r = 8'b00000001; grant_index_r = LOG_NUM_REQS'(0); end + 11'b111_???????1: begin grant_onehot_w = 8'b00000001; grant_index_w = LOG_NUM_REQS'(0); end 11'b000_??????1?, 11'b001_00000010, 11'b010_00000?10, @@ -317,7 +344,7 @@ module VX_rr_arbiter #( 11'b100_000???10, 11'b101_00????10, 11'b110_0?????10, - 11'b111_??????10: begin grant_onehot_r = 8'b00000010; grant_index_r = LOG_NUM_REQS'(1); end + 11'b111_??????10: begin grant_onehot_w = 8'b00000010; grant_index_w = LOG_NUM_REQS'(1); end 11'b000_?????10?, 11'b001_?????1??, 11'b010_00000100, @@ -325,7 +352,7 @@ module VX_rr_arbiter #( 11'b100_000??100, 11'b101_00???100, 11'b110_0????100, - 11'b111_?????100: begin grant_onehot_r = 8'b00000100; grant_index_r = LOG_NUM_REQS'(2); end + 11'b111_?????100: begin grant_onehot_w = 8'b00000100; grant_index_w = LOG_NUM_REQS'(2); end 11'b000_????100?, 11'b001_????10??, 11'b010_????1???, @@ -333,7 +360,7 @@ module VX_rr_arbiter #( 11'b100_000?1000, 11'b101_00??1000, 11'b110_0???1000, - 11'b111_????1000: begin grant_onehot_r = 8'b00001000; grant_index_r = LOG_NUM_REQS'(3); end + 11'b111_????1000: begin grant_onehot_w = 8'b00001000; grant_index_w = LOG_NUM_REQS'(3); end 11'b000_???1000?, 11'b001_???100??, 11'b010_???10???, @@ -341,7 +368,7 @@ module VX_rr_arbiter #( 11'b100_00010000, 11'b101_00?10000, 11'b110_0??10000, - 11'b111_???10000: begin grant_onehot_r = 8'b00010000; grant_index_r = LOG_NUM_REQS'(4); end + 11'b111_???10000: begin grant_onehot_w = 8'b00010000; grant_index_w = LOG_NUM_REQS'(4); end 11'b000_??10000?, 11'b001_??1000??, 11'b010_??100???, @@ -349,7 +376,7 @@ module VX_rr_arbiter #( 11'b100_??1?????, 11'b101_00100000, 11'b110_0?100000, - 11'b111_??100000: begin grant_onehot_r = 8'b00100000; grant_index_r = LOG_NUM_REQS'(5); end + 11'b111_??100000: begin grant_onehot_w = 8'b00100000; grant_index_w = LOG_NUM_REQS'(5); end 11'b000_?100000?, 11'b001_?10000??, 11'b010_?1000???, @@ -357,8 +384,16 @@ module VX_rr_arbiter #( 11'b100_?10?????, 11'b101_?1??????, 11'b110_01000000, - 11'b111_?1000000: begin grant_onehot_r = 8'b01000000; grant_index_r = LOG_NUM_REQS'(6); end - default: begin grant_onehot_r = 8'b10000000; grant_index_r = LOG_NUM_REQS'(7); end + 11'b111_?1000000: begin grant_onehot_w = 8'b01000000; grant_index_w = LOG_NUM_REQS'(6); end + 11'b000_1000000?, + 11'b001_100000??, + 11'b010_10000???, + 11'b011_1000????, + 11'b100_100?????, + 11'b101_10??????, + 11'b110_1???????, + 11'b111_10000000: begin grant_onehot_w = 8'b10000000; grant_index_w = LOG_NUM_REQS'(7); end + default: begin grant_onehot_w = 8'b00000000; grant_index_w = 'x; end endcase end @@ -366,81 +401,72 @@ module VX_rr_arbiter #( if (reset) begin state <= '0; end else if (grant_ready) begin - state <= grant_index_r; + state <= grant_index_w; end end - assign grant_index = grant_index_r; - assign grant_onehot = grant_onehot_r; + assign grant_index = grant_index_w; + assign grant_onehot = grant_onehot_w; assign grant_valid = (| requests); - end else if (MODEL == 1) begin + end else if (MODEL == 1) begin : g_model1 `IGNORE_UNOPTFLAT_BEGIN - wire [NUM_REQS-1:0] mask_higher_pri_regs, unmask_higher_pri_regs; + wire [NUM_REQS-1:0] masked_pri_reqs, unmasked_pri_reqs; `IGNORE_UNOPTFLAT_END - wire [NUM_REQS-1:0] grant_masked, grant_unmasked; + reg [NUM_REQS-1:0] reqs_mask; - reg [NUM_REQS-1:0] pointer_reg; + wire [NUM_REQS-1:0] masked_reqs = requests & reqs_mask; - wire [NUM_REQS-1:0] req_masked = requests & pointer_reg; - - assign mask_higher_pri_regs[0] = 1'b0; - for (genvar i = 1; i < NUM_REQS; ++i) begin - assign mask_higher_pri_regs[i] = mask_higher_pri_regs[i-1] | req_masked[i-1]; + assign masked_pri_reqs[0] = 1'b0; + for (genvar i = 1; i < NUM_REQS; ++i) begin : g_masked_pri_reqs + assign masked_pri_reqs[i] = masked_pri_reqs[i-1] | masked_reqs[i-1]; end - assign grant_masked[NUM_REQS-1:0] = req_masked[NUM_REQS-1:0] & ~mask_higher_pri_regs[NUM_REQS-1:0]; - - assign unmask_higher_pri_regs[0] = 1'b0; - for (genvar i = 1; i < NUM_REQS; ++i) begin - assign unmask_higher_pri_regs[i] = unmask_higher_pri_regs[i-1] | requests[i-1]; + assign unmasked_pri_reqs[0] = 1'b0; + for (genvar i = 1; i < NUM_REQS; ++i) begin : g_unmasked_pri_reqs + assign unmasked_pri_reqs[i] = unmasked_pri_reqs[i-1] | requests[i-1]; end - assign grant_unmasked[NUM_REQS-1:0] = requests[NUM_REQS-1:0] & ~unmask_higher_pri_regs[NUM_REQS-1:0]; + wire [NUM_REQS-1:0] grant_masked = masked_reqs & ~masked_pri_reqs; + wire [NUM_REQS-1:0] grant_unmasked = requests & ~unmasked_pri_reqs; - wire no_req_masked = ~(|req_masked); - assign grant_onehot = ({NUM_REQS{no_req_masked}} & grant_unmasked) | grant_masked; + wire has_masked_reqs = (| masked_reqs); + wire has_unmasked_reqs = (| requests); + + assign grant_onehot = has_masked_reqs ? grant_masked : grant_unmasked; always @(posedge clk) begin if (reset) begin - pointer_reg <= {NUM_REQS{1'b1}}; + reqs_mask <= {NUM_REQS{1'b1}}; end else if (grant_ready) begin - if (|req_masked) begin - pointer_reg <= mask_higher_pri_regs; - end else if (|requests) begin - pointer_reg <= unmask_higher_pri_regs; - end else begin - pointer_reg <= pointer_reg; + if (has_masked_reqs) begin + reqs_mask <= masked_pri_reqs; + end else if (has_unmasked_reqs) begin + reqs_mask <= unmasked_pri_reqs; end end end - assign grant_valid = (| requests); - - VX_onehot_encoder #( + VX_encoder #( .N (NUM_REQS) ) onehot_encoder ( .data_in (grant_onehot), .data_out (grant_index), - `UNUSED_PIN (valid_out) + .valid_out(grant_valid) ); - end else begin + end else if (MODEL == 2) begin : g_model2 - reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [NUM_REQS-1:0] grant_onehot_r; - reg [NUM_REQS-1:0] state; + reg [NUM_REQS-1:0][LOG_NUM_REQS-1:0] grant_table; + reg [LOG_NUM_REQS-1:0] state; - always @(*) begin - grant_index_r = 'x; - grant_onehot_r = 'x; - for (integer i = 0; i < NUM_REQS; ++i) begin - for (integer j = 0; j < NUM_REQS; ++j) begin - if (state[i] && requests[(j + 1) % NUM_REQS]) begin - grant_index_r = LOG_NUM_REQS'((j + 1) % NUM_REQS); - grant_onehot_r = '0; - grant_onehot_r[(j + 1) % NUM_REQS] = 1; + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_grant_table + always @(*) begin + grant_table[i] = 'x; + for (integer j = NUM_REQS-1; j >= 0; --j) begin + if (requests[(i+j+1) % NUM_REQS]) begin + grant_table[i] = LOG_NUM_REQS'((i+j+1) % NUM_REQS); end end end @@ -448,15 +474,24 @@ module VX_rr_arbiter #( always @(posedge clk) begin if (reset) begin - state <= '0; - end else if (grant_ready) begin - state <= grant_index_r; + state <= 0; + end else if (grant_valid && grant_ready) begin + state <= grant_index; end end - assign grant_index = grant_index_r; - assign grant_onehot = grant_onehot_r; - assign grant_valid = (| requests); + VX_decoder #( + .N (LOG_NUM_REQS), + .D (NUM_REQS) + ) grant_decoder ( + .data_in (grant_index), + .valid_in (grant_valid), + .data_out (grant_onehot) + ); + + assign grant_index = grant_table[state]; + assign grant_valid = (| requests); + end endmodule diff --git a/hw/rtl/libs/VX_scan.sv b/hw/rtl/libs/VX_scan.sv index f263dd218..6effd5814 100644 --- a/hw/rtl/libs/VX_scan.sv +++ b/hw/rtl/libs/VX_scan.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,8 +19,8 @@ `TRACING_OFF module VX_scan #( parameter N = 1, - parameter OP = 0, // 0: XOR, 1: AND, 2: OR - parameter REVERSE = 0 // 0: LO->HI, 1: HI->LO + parameter `STRING OP = "^", // ^: XOR, &: AND, |: OR + parameter REVERSE = 0 // 0: LO->HI, 1: HI->LO ) ( input wire [N-1:0] data_in, output wire [N-1:0] data_out @@ -28,48 +28,48 @@ module VX_scan #( localparam LOGN = `CLOG2(N); `IGNORE_UNOPTFLAT_BEGIN - wire [LOGN:0][N-1:0] t; + wire [LOGN:0][N-1:0] t; `IGNORE_UNOPTFLAT_END // reverses bits - if (REVERSE != 0) begin + if (REVERSE != 0) begin : g_data_in_reverse assign t[0] = data_in; - end else begin + end else begin : g_data_in_no_reverse assign t[0] = {<<{data_in}}; end // optimize for the common case of small and-scans - if ((N == 2) && (OP == 1)) begin + if ((N == 2) && (OP == "&")) begin : g_scan_n2_and assign t[LOGN] = {t[0][1], &t[0][1:0]}; - end else if ((N == 3) && (OP == 1)) begin + end else if ((N == 3) && (OP == "&")) begin : g_scan_n3_and assign t[LOGN] = {t[0][2], &t[0][2:1], &t[0][2:0]}; - end else if ((N == 4) && (OP == 1)) begin + end else if ((N == 4) && (OP == "&")) begin : g_scan_n4_and assign t[LOGN] = {t[0][3], &t[0][3:2], &t[0][3:1], &t[0][3:0]}; - end else begin + end else begin : g_scan // general case wire [N-1:0] fill; - for (genvar i = 0; i < LOGN; ++i) begin + for (genvar i = 0; i < LOGN; ++i) begin : g_i wire [N-1:0] shifted = N'({fill, t[i]} >> (1< 1) begin + if (N > 1) begin : g_switch reg req_out_r [N]; reg rsp_out_r; @@ -34,7 +34,7 @@ module VX_scope_switch #( req_out_r[i] <= 0; end rsp_out_r <= 0; - end else begin + end else begin for (integer i = 0; i < N; ++i) begin req_out_r[i] <= req_in; end @@ -46,10 +46,13 @@ module VX_scope_switch #( end end - assign req_out = req_out_r; + for (genvar i = 0; i < N; ++i) begin : g_req_out + assign req_out[i] = req_out_r[i]; + end + assign rsp_out = rsp_out_r; - - end else begin + + end else begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) diff --git a/hw/rtl/libs/VX_scope_tap.sv b/hw/rtl/libs/VX_scope_tap.sv index c5ba778a2..6a9b70ff1 100644 --- a/hw/rtl/libs/VX_scope_tap.sv +++ b/hw/rtl/libs/VX_scope_tap.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -14,300 +14,408 @@ `include "VX_platform.vh" `TRACING_OFF -module VX_scope_tap #( +module VX_scope_tap #( parameter SCOPE_ID = 0, // scope identifier parameter SCOPE_IDW = 8, // scope identifier width - parameter TRIGGERW = 0, // trigger signals width - parameter PROBEW = 0, // probe signal width - parameter SIZE = 256, // trace buffer size - parameter IDLE_CTRW = 16 // idle time between triggers counter width -) ( + parameter XTRIGGERW = 0, // changed trigger signals width + parameter HTRIGGERW = 0, // high trigger signals width + parameter PROBEW = 1, // probe signal width + parameter DEPTH = 256, // trace buffer depth + parameter IDLE_CTRW = 32, // idle time between triggers counter width + parameter TX_DATAW = 64 // transfer data width +) ( input wire clk, input wire reset, input wire start, input wire stop, - input wire [TRIGGERW-1:0] triggers, + input wire [`UP(XTRIGGERW)-1:0] xtriggers, + input wire [`UP(HTRIGGERW)-1:0] htriggers, input wire [PROBEW-1:0] probes, input wire bus_in, - output wire bus_out + output wire bus_out ); - localparam TX_DATAW = 64; - localparam TX_DATA_BITS = `LOG2UP(TX_DATAW); - localparam DATAW = PROBEW + TRIGGERW; - localparam DATA_BITS = `LOG2UP(DATAW); - localparam ADDRW = `CLOG2(SIZE); - localparam TRIGGER_ENABLE = (TRIGGERW != 0); - localparam MAX_IDLE_CTR = (2 ** IDLE_CTRW) - 1; - - localparam CTRL_STATE_IDLE = 2'd0; - localparam CTRL_STATE_RECV = 2'd1; - localparam CTRL_STATE_CMD = 2'd2; - localparam CTRL_STATE_SEND = 2'd3; - localparam CTRL_STATE_BITS = 2; - - localparam TAP_STATE_IDLE = 2'd0; - localparam TAP_STATE_WAIT = 2'd1; - localparam TAP_STATE_RUN = 2'd2; - localparam TAP_STATE_BITS = 2; - - localparam CMD_GET_WIDTH = 3'd0; - localparam CMD_GET_COUNT = 3'd1; - localparam CMD_GET_START = 3'd2; - localparam CMD_GET_DATA = 3'd3; - localparam CMD_SET_START = 3'd4; - localparam CMD_SET_STOP = 3'd5; - localparam CMD_TYPE_BITS = 3; - - localparam GET_TYPE_WIDTH = 2'd0; - localparam GET_TYPE_COUNT = 2'd1; - localparam GET_TYPE_START = 2'd2; - localparam GET_TYPE_DATA = 2'd3; - localparam GET_TYPE_BITS = 2; - - `NO_RW_RAM_CHECK reg [DATAW-1:0] data_store [SIZE-1:0]; - `NO_RW_RAM_CHECK reg [IDLE_CTRW-1:0] delta_store [SIZE-1:0]; - - reg [TRIGGERW-1:0] prev_triggers; - reg [IDLE_CTRW-1:0] delta; - reg [63:0] timestamp, start_time; + localparam HAS_TRIGGERS = XTRIGGERW != 0 || HTRIGGERW != 0; + localparam CTR_WIDTH = 64; + localparam SER_CTR_WIDTH = `LOG2UP(TX_DATAW); + localparam DATAW = PROBEW + XTRIGGERW + HTRIGGERW; + localparam ADDRW = `CLOG2(DEPTH); + localparam SIZEW = `CLOG2(DEPTH+1); + localparam MAX_IDLE_CTR = (2 ** IDLE_CTRW) - 1; + localparam DATA_BLOCKS = `CDIV(DATAW, TX_DATAW); + localparam BLOCK_IDX_WIDTH = `LOG2UP(DATA_BLOCKS); + + localparam CTRL_STATE_IDLE = 2'd0; + localparam CTRL_STATE_RECV = 2'd1; + localparam CTRL_STATE_CMD = 2'd2; + localparam CTRL_STATE_SEND = 2'd3; + localparam CTRL_STATE_BITS = 2; + + localparam TAP_STATE_IDLE = 2'd0; + localparam TAP_STATE_RUN = 2'd1; + localparam TAP_STATE_DONE = 2'd2; + localparam TAP_STATE_BITS = 2; - reg [ADDRW-1:0] waddr, waddr_end; + localparam CMD_GET_WIDTH = 3'd0; + localparam CMD_GET_COUNT = 3'd1; + localparam CMD_GET_START = 3'd2; + localparam CMD_GET_DATA = 3'd3; + localparam CMD_SET_START = 3'd4; + localparam CMD_SET_STOP = 3'd5; + localparam CMD_SET_DEPTH = 3'd6; + localparam CMD_TYPE_BITS = 3; - reg cmd_start, delta_flush; + localparam SEND_TYPE_WIDTH = 2'd0; + localparam SEND_TYPE_COUNT = 2'd1; + localparam SEND_TYPE_START = 2'd2; + localparam SEND_TYPE_DATA = 2'd3; + localparam SEND_TYPE_BITS = 2; - reg [63:0] start_delay, delay_cntr; + `STATIC_ASSERT ((IDLE_CTRW <= TX_DATAW), ("invalid parameter")) + `STATIC_ASSERT(`IS_POW2(DEPTH), ("depth must be a power of 2!")) reg [TAP_STATE_BITS-1:0] tap_state; reg [CTRL_STATE_BITS-1:0] ctrl_state; - reg [GET_TYPE_BITS-1:0] get_type; - - reg [TX_DATA_BITS-1:0] ser_tx_ctr; - reg [DATA_BITS-1:0] read_offset; + reg [SEND_TYPE_BITS-1:0] send_type; + + reg [CTR_WIDTH-1:0] timestamp, start_time; + reg [CTR_WIDTH-1:0] start_delay, stop_delay; + reg [`UP(XTRIGGERW)-1:0] prev_xtrig; + reg [`UP(HTRIGGERW)-1:0] prev_htrig; + reg [IDLE_CTRW-1:0] delta; + reg cmd_start, cmd_stop; + reg dflush; + + reg [SIZEW-1:0] waddr, waddr_end; + wire [DATAW-1:0] data_in; + + wire [DATAW-1:0] data_value; + wire [IDLE_CTRW-1:0] delta_value; reg [ADDRW-1:0] raddr; - reg read_data; // // trace capture // - wire [ADDRW-1:0] raddr_n = raddr + 1; + wire do_capture; + + wire write_en = (tap_state == TAP_STATE_RUN) && do_capture; + + if (HAS_TRIGGERS) begin : g_delta_store + if (XTRIGGERW != 0 && HTRIGGERW != 0) begin : g_data_in_pxh + assign data_in = {probes, xtriggers, htriggers}; + end else if (XTRIGGERW != 0) begin : g_data_in_px + assign data_in = {probes, xtriggers}; + end else begin : g_data_in_ph + assign data_in = {probes, htriggers}; + end + assign do_capture = dflush || (xtriggers != prev_xtrig) || (htriggers != prev_htrig) || (htriggers != '0); + VX_dp_ram #( + .DATAW (IDLE_CTRW), + .SIZE (DEPTH), + .OUT_REG (1), + .READ_ENABLE (0), + .NO_RWCHECK (1) + ) delta_store ( + .clk (clk), + .reset (reset), + .read (1'b1), + .wren (1'b1), + .write (write_en), + .waddr (waddr[ADDRW-1:0]), + .wdata (delta), + .raddr (raddr), + .rdata (delta_value) + ); + end else begin : g_no_delta_store + assign data_in = probes; + assign delta_value = '0; + assign do_capture = 1; + end - wire [ADDRW:0] count = (ADDRW+1)'(waddr) + 1; + VX_dp_ram #( + .DATAW (DATAW), + .SIZE (DEPTH), + .OUT_REG (1), + .READ_ENABLE (0), + .NO_RWCHECK (1) + ) data_store ( + .clk (clk), + .reset (reset), + .read (1'b1), + .wren (1'b1), + .write (write_en), + .waddr (waddr[ADDRW-1:0]), + .wdata (data_in), + .raddr (raddr), + .rdata (data_value) + ); always @(posedge clk) begin if (reset) begin - tap_state <= TAP_STATE_IDLE; - raddr <= '0; - waddr <= '0; - delta <= '0; - prev_triggers <= '0; - read_offset <= '0; - read_data <= 0; - timestamp <= '0; + timestamp <= '0; end else begin - timestamp <= timestamp + 1; + timestamp <= timestamp + CTR_WIDTH'(1); + end + end + always @(posedge clk) begin + if (reset) begin + tap_state <= TAP_STATE_IDLE; + delta <= '0; + dflush <= 0; + prev_xtrig <= '0; + prev_htrig <= '0; + waddr <= '0; + end else begin case (tap_state) TAP_STATE_IDLE: begin - if (start || cmd_start) begin - delta <= '0; - delta_flush <= 1; - if (0 == start_delay) begin - tap_state <= TAP_STATE_RUN; - start_time <= timestamp; - `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%d: *** scope #%0d: recording start - time=%0d\n", $time, SCOPE_ID, timestamp)); - `endif - end else begin - tap_state <= TAP_STATE_WAIT; - delay_cntr <= start_delay; - `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%d: *** scope #%0d: delayed start - time=%0d\n", $time, SCOPE_ID, start_delay)); - `endif - end - end - end - TAP_STATE_WAIT: begin - delay_cntr <= delay_cntr - 1; - if (1 == delay_cntr) begin + if (start || cmd_start) begin + dflush <= 1; tap_state <= TAP_STATE_RUN; start_time <= timestamp; `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%d: *** scope #%0d: recording start - time=%0d\n", $time, SCOPE_ID, timestamp)); + `TRACE(2, ("%t: scope_tap%0d: recording start - time=%0d\n", $time, SCOPE_ID, timestamp)) `endif end end TAP_STATE_RUN: begin - if (TRIGGER_ENABLE != 0) begin - if (delta_flush || (triggers != prev_triggers)) begin - data_store[waddr] <= {probes, triggers}; - delta_store[waddr] <= delta; - waddr <= waddr + 1; - delta <= '0; - delta_flush <= 0; - end else begin - delta <= delta + 1; - delta_flush <= (delta == (MAX_IDLE_CTR-1)); + dflush <= 0; + if (!(stop || cmd_stop) && (waddr < waddr_end)) begin + if (do_capture) begin + waddr <= waddr + SIZEW'(1); end - prev_triggers <= triggers; - end else begin - data_store[waddr] <= {probes, triggers}; - delta_store[waddr] <= '0; - waddr <= waddr + 1; - end - if (stop || (waddr >= waddr_end)) begin - waddr <= waddr; + if (HAS_TRIGGERS) begin + if (do_capture) begin + delta <= '0; + end else begin + delta <= delta + IDLE_CTRW'(1); + dflush <= (delta == IDLE_CTRW'(MAX_IDLE_CTR-1)); + end + prev_xtrig <= xtriggers; + prev_htrig <= htriggers; + end + end else begin + tap_state <= TAP_STATE_DONE; `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%d: *** scope #%0d: recording stop - waddr=(%0d, %0d)\n", $time, SCOPE_ID, waddr, waddr_end)); + `TRACE(2, ("%t: scope_tap%0d: recording stop - waddr=(%0d, %0d)\n", $time, SCOPE_ID, waddr, waddr_end)) `endif - tap_state <= TAP_STATE_IDLE; end end default:; endcase - - if (ctrl_state == CTRL_STATE_SEND - && get_type == GET_TYPE_DATA - && ser_tx_ctr == 0) begin - if (~read_data) begin - read_data <= 1; - end else begin - if (DATAW > TX_DATAW) begin - `IGNORE_WARNINGS_BEGIN - if (read_offset < DATA_BITS'(DATAW-TX_DATAW)) begin - read_offset <= read_offset + DATA_BITS'(TX_DATAW); - end else begin - raddr <= raddr_n; - read_data <= 0; - read_offset <= '0; - end - `IGNORE_WARNINGS_END - end else begin - raddr <= raddr_n; - read_data <= 0; - end - if (raddr_n == waddr) begin - raddr <= 0; - end - end - end end end // - // command controller + // trace controller // - + reg bus_out_r; - + reg [TX_DATAW-1:0] ser_buf_in; wire [TX_DATAW-1:0] ser_buf_in_n = {ser_buf_in[TX_DATAW-2:0], bus_in}; `UNUSED_VAR (ser_buf_in) + wire [DATA_BLOCKS-1:0][TX_DATAW-1:0] data_blocks; + logic [BLOCK_IDX_WIDTH-1:0] data_block_idx; + reg [SER_CTR_WIDTH-1:0] ser_tx_ctr; + reg is_read_data; + reg is_get_data; + wire [CMD_TYPE_BITS-1:0] cmd_type = ser_buf_in[CMD_TYPE_BITS-1:0]; wire [SCOPE_IDW-1:0] cmd_scope_id = ser_buf_in_n[CMD_TYPE_BITS +: SCOPE_IDW]; wire [TX_DATAW-CMD_TYPE_BITS-SCOPE_IDW-1:0] cmd_data = ser_buf_in[TX_DATAW-1:CMD_TYPE_BITS+SCOPE_IDW]; - wire [TX_DATAW-1:0] data_chunk = TX_DATAW'(DATAW'(data_store[raddr] >> read_offset)); - wire [TX_DATAW-1:0] get_data = read_data ? data_chunk : TX_DATAW'(delta_store[raddr]); - + for (genvar i = 0; i < DATA_BLOCKS; ++i) begin : g_data_blocks + for (genvar j = 0; j < TX_DATAW; ++j) begin : g_j + localparam k = i * TX_DATAW + j; + if (k < DATAW) begin : g_valid + assign data_blocks[i][j] = data_value[k]; + end else begin : g_padding + assign data_blocks[i][j] = '0; + end + end + end + + if (DATA_BLOCKS > 1) begin : g_data_block_idx + always @(posedge clk) begin + if (reset) begin + data_block_idx <= '0; + end else if ((ctrl_state == CTRL_STATE_SEND) + && (send_type == SEND_TYPE_DATA) + && (ser_tx_ctr == 0) + && is_read_data) begin + if (data_block_idx < BLOCK_IDX_WIDTH'(DATA_BLOCKS-1)) begin + data_block_idx <= data_block_idx + BLOCK_IDX_WIDTH'(1); + end else begin + data_block_idx <= '0; + end + end + end + end else begin : g_data_block_idx_0 + assign data_block_idx = 0; + end + always @(posedge clk) begin if (reset) begin ctrl_state <= CTRL_STATE_IDLE; + send_type <= SEND_TYPE_BITS'(SEND_TYPE_WIDTH); + waddr_end <= SIZEW'(DEPTH); cmd_start <= 0; + cmd_stop <= 0; start_delay <= '0; - waddr_end <= ADDRW'(SIZE-1); - bus_out_r <= 0; + stop_delay <= '0; + bus_out_r <= 0; + raddr <= '0; + is_read_data<= 0; + ser_tx_ctr <= '0; + is_get_data <= 0; end else begin - bus_out_r <= 0; - cmd_start <= 0; + bus_out_r <= 0; + is_get_data <= 0; + + if (start_delay != 0) begin + start_delay <= start_delay - CTR_WIDTH'(1); + end + + if (stop_delay != 0) begin + stop_delay <= stop_delay - CTR_WIDTH'(1); + end + + cmd_start <= (start_delay == CTR_WIDTH'(1)); + cmd_stop <= (stop_delay == CTR_WIDTH'(1)); case (ctrl_state) CTRL_STATE_IDLE: begin if (bus_in) begin + ser_tx_ctr <= SER_CTR_WIDTH'(TX_DATAW-1); ctrl_state <= CTRL_STATE_RECV; end - ser_tx_ctr <= TX_DATA_BITS'(TX_DATAW-1); end CTRL_STATE_RECV: begin - ser_tx_ctr <= ser_tx_ctr - 1; + ser_tx_ctr <= ser_tx_ctr - SER_CTR_WIDTH'(1); ser_buf_in <= ser_buf_in_n; if (ser_tx_ctr == 0) begin + // check if command is for this scope ctrl_state <= (cmd_scope_id == SCOPE_ID) ? CTRL_STATE_CMD : CTRL_STATE_IDLE; end end - CTRL_STATE_CMD: begin + CTRL_STATE_CMD: begin ctrl_state <= CTRL_STATE_IDLE; - case (cmd_type) + case (cmd_type) CMD_SET_START: begin - start_delay <= 64'(cmd_data); - cmd_start <= 1; + start_delay <= CTR_WIDTH'(cmd_data); + cmd_start <= (cmd_data == 0); end CMD_SET_STOP: begin - waddr_end <= ADDRW'(cmd_data); + stop_delay <= CTR_WIDTH'(cmd_data); + cmd_stop <= (cmd_data == 0); + end + CMD_SET_DEPTH: begin + waddr_end <= SIZEW'(cmd_data); end CMD_GET_WIDTH, CMD_GET_START, CMD_GET_COUNT, - CMD_GET_DATA: begin - ctrl_state <= CTRL_STATE_SEND; - get_type <= GET_TYPE_BITS'(cmd_type); - ser_tx_ctr <= TX_DATA_BITS'(TX_DATAW-1); + CMD_GET_DATA: begin + send_type <= SEND_TYPE_BITS'(cmd_type); + ser_tx_ctr <= SER_CTR_WIDTH'(TX_DATAW-1); + ctrl_state <= CTRL_STATE_SEND; bus_out_r <= 1; end default:; - endcase + endcase `ifdef DBG_TRACE_SCOPE - `TRACE(2, ("%d: *** scope #%0d: CMD: type=%0d\n", $time, SCOPE_ID, cmd_type)); + `TRACE(2, ("%t: scope_tap%0d: CMD: type=%0d\n", $time, SCOPE_ID, cmd_type)) `endif end CTRL_STATE_SEND: begin - ser_tx_ctr <= ser_tx_ctr - 1; - case (get_type) - GET_TYPE_WIDTH: begin + case (send_type) + SEND_TYPE_WIDTH: begin bus_out_r <= 1'(DATAW >> ser_tx_ctr); `ifdef DBG_TRACE_SCOPE if (ser_tx_ctr == 0) begin - `TRACE(2, ("%d: *** scope #%0d: SEND width=%0d\n", $time, SCOPE_ID, DATAW)); - end - `endif + `TRACE(2, ("%t: scope_tap%0d: SEND width=%0d\n", $time, SCOPE_ID, DATAW)) + end + `endif end - GET_TYPE_COUNT: begin - bus_out_r <= 1'(count >> ser_tx_ctr); + SEND_TYPE_COUNT: begin + bus_out_r <= 1'(waddr >> ser_tx_ctr); `ifdef DBG_TRACE_SCOPE if (ser_tx_ctr == 0) begin - `TRACE(2, ("%d: *** scope #%0d: SEND count=%0d\n", $time, SCOPE_ID, count)); - end - `endif + `TRACE(2, ("%t: scope_tap%0d: SEND count=%0d\n", $time, SCOPE_ID, waddr)) + end + `endif end - GET_TYPE_START: begin - bus_out_r <= 1'(start_time >> ser_tx_ctr); + SEND_TYPE_START: begin + bus_out_r <= 1'(start_time >> ser_tx_ctr); `ifdef DBG_TRACE_SCOPE if (ser_tx_ctr == 0) begin - `TRACE(2, ("%d: *** scope #%0d: SEND start=%0d\n", $time, SCOPE_ID, start_time)); - end - `endif + `TRACE(2, ("%t: scope_tap%0d: SEND start=%0d\n", $time, SCOPE_ID, start_time)) + end + `endif end - GET_TYPE_DATA: begin - bus_out_r <= 1'(get_data >> ser_tx_ctr); + SEND_TYPE_DATA: begin + is_get_data <= 1; + if (ser_tx_ctr == 0) begin + if (is_read_data) begin + if (data_block_idx == BLOCK_IDX_WIDTH'(DATA_BLOCKS-1)) begin + raddr <= raddr + ADDRW'(1); + is_read_data <= 0; // switch to delta mode + end + end else begin + is_read_data <= 1; // switch to data mode + end + end `ifdef DBG_TRACE_SCOPE if (ser_tx_ctr == 0) begin - `TRACE(2, ("%d: *** scope #%0d: SEND data=%0d\n", $time, SCOPE_ID, get_data)); - end - `endif + if (is_read_data) begin + `TRACE(2, ("%t: scope_tap%0d: SEND data=0x%0h\n", $time, SCOPE_ID, get_data)) + end else begin + `TRACE(2, ("%t: scope_tap%0d: SEND delta=0x%0h\n", $time, SCOPE_ID, get_data)) + end + end + `endif end default:; endcase + ser_tx_ctr <= ser_tx_ctr - SER_CTR_WIDTH'(1); if (ser_tx_ctr == 0) begin ctrl_state <= CTRL_STATE_IDLE; - end + end end default:; endcase - end + end end - assign bus_out = bus_out_r; + wire [BLOCK_IDX_WIDTH-1:0] data_block_idx_r; + wire [SER_CTR_WIDTH-1:0] ser_tx_ctr_r; + wire is_read_data_r; + + VX_pipe_register #( + .DATAW (1 + SER_CTR_WIDTH + BLOCK_IDX_WIDTH) + ) data_sel_buf ( + .clk (clk), + .reset (reset), + .enable (1'b1), + .data_in ({is_read_data, ser_tx_ctr, data_block_idx}), + .data_out ({is_read_data_r, ser_tx_ctr_r, data_block_idx_r}) + ); + + wire [TX_DATAW-1:0] get_data = is_read_data_r ? data_blocks[data_block_idx_r] : TX_DATAW'(delta_value); + wire bus_out_w = is_get_data ? get_data[ser_tx_ctr_r] : bus_out_r; + + VX_pipe_register #( + .DATAW (1), + .DEPTH (1) + ) buf_out ( + .clk (clk), + .reset (reset), + .enable (1'b1), + .data_in (bus_out_w), + .data_out (bus_out) + ); endmodule `TRACING_ON diff --git a/hw/rtl/libs/VX_serial_div.sv b/hw/rtl/libs/VX_serial_div.sv index e7af40009..593be2d9a 100644 --- a/hw/rtl/libs/VX_serial_div.sv +++ b/hw/rtl/libs/VX_serial_div.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -29,7 +29,7 @@ module VX_serial_div #( input wire is_signed, input wire [LANES-1:0][WIDTHN-1:0] numer, - input wire [LANES-1:0][WIDTHD-1:0] denom, + input wire [LANES-1:0][WIDTHD-1:0] denom, output wire [LANES-1:0][WIDTHQ-1:0] quotient, output wire [LANES-1:0][WIDTHR-1:0] remainder @@ -49,14 +49,14 @@ module VX_serial_div #( reg [CNTRW-1:0] cntr; reg busy_r; - for (genvar i = 0; i < LANES; ++i) begin + for (genvar i = 0; i < LANES; ++i) begin : g_setup wire negate_numer = is_signed && numer[i][WIDTHN-1]; wire negate_denom = is_signed && denom[i][WIDTHD-1]; assign numer_qual[i] = negate_numer ? -$signed(numer[i]) : numer[i]; assign denom_qual[i] = negate_denom ? -$signed(denom[i]) : denom[i]; assign sub_result[i] = working[i][WIDTHN + MIN_ND : WIDTHN] - denom_r[i]; end - + always @(posedge clk) begin if (reset) begin busy_r <= 0; @@ -74,18 +74,21 @@ module VX_serial_div #( end end - for (genvar i = 0; i < LANES; ++i) begin + for (genvar i = 0; i < LANES; ++i) begin : g_div always @(posedge clk) begin if (strobe) begin working[i] <= {{WIDTHD{1'b0}}, numer_qual[i], 1'b0}; denom_r[i] <= denom_qual[i]; inv_quot[i] <= (denom[i] != 0) && is_signed && (numer[i][31] ^ denom[i][31]); inv_rem[i] <= is_signed && numer[i][31]; - end else if (busy_r) begin + end else if (busy_r) begin working[i] <= sub_result[i][WIDTHD] ? {working[i][WIDTHN+MIN_ND-1:0], 1'b0} : {sub_result[i][WIDTHD-1:0], working[i][WIDTHN-1:0], 1'b1}; end end + end + + for (genvar i = 0; i < LANES; ++i) begin : g_output wire [WIDTHQ-1:0] q = working[i][WIDTHQ-1:0]; wire [WIDTHR-1:0] r = working[i][WIDTHN+WIDTHR:WIDTHN+1]; assign quotient[i] = inv_quot[i] ? -$signed(q) : q; diff --git a/hw/rtl/libs/VX_serial_mul.sv b/hw/rtl/libs/VX_serial_mul.sv index 9369dfd10..d847b7111 100644 --- a/hw/rtl/libs/VX_serial_mul.sv +++ b/hw/rtl/libs/VX_serial_mul.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -13,7 +13,7 @@ `include "VX_platform.vh" -// Iterative integer multiplier +// Iterative integer multiplier // An adaptation of ZipCPU algorithm for a multi-lane elastic architecture. // https://zipcpu.com/zipcpu/2021/07/03/slowmpy.html @@ -65,7 +65,7 @@ module VX_serial_mul #( end end - for (genvar i = 0; i < LANES; ++i) begin + for (genvar i = 0; i < LANES; ++i) begin : g_mul wire [X_WIDTH-1:0] axb = b[i][0] ? a[i] : '0; always @(posedge clk) begin @@ -73,12 +73,12 @@ module VX_serial_mul #( if (SIGNED) begin a[i] <= X_WIDTH'($signed(dataa[i])); b[i] <= Y_WIDTH'($signed(datab[i])); - end else begin + end else begin a[i] <= dataa[i]; b[i] <= datab[i]; end p[i] <= 0; - end else if (busy_r) begin + end else if (busy_r) begin b[i] <= (b[i] >> 1); p[i][Y_WIDTH-2:0] <= p[i][Y_WIDTH-1:1]; if (SIGNED) begin @@ -93,9 +93,9 @@ module VX_serial_mul #( end end - if (SIGNED) begin + if (SIGNED) begin : g_signed assign result[i] = R_WIDTH'(p[i][P_WIDTH-1:0] + {1'b1, {(X_WIDTH-2){1'b0}}, 1'b1, {(Y_WIDTH){1'b0}}}); - end else begin + end else begin : g_unsigned assign result[i] = R_WIDTH'(p[i]); end end diff --git a/hw/rtl/libs/VX_shift_register.sv b/hw/rtl/libs/VX_shift_register.sv index 56726d2cb..b4809fe90 100644 --- a/hw/rtl/libs/VX_shift_register.sv +++ b/hw/rtl/libs/VX_shift_register.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -14,13 +14,13 @@ `include "VX_platform.vh" `TRACING_OFF -module VX_shift_register #( +module VX_shift_register #( parameter DATAW = 1, parameter RESETW = 0, parameter DEPTH = 1, - parameter NUM_TAPS = 1, + parameter NUM_TAPS = 1, parameter TAP_START = 0, - parameter TAP_STRIDE = 1 + parameter TAP_STRIDE = 1 ) ( input wire clk, input wire reset, @@ -28,7 +28,7 @@ module VX_shift_register #( input wire [DATAW-1:0] data_in, output wire [NUM_TAPS-1:0][DATAW-1:0] data_out ); - if (DEPTH != 0) begin + if (DEPTH != 0) begin : g_shift_register reg [DEPTH-1:0][DATAW-1:0] entries; always @(posedge clk) begin @@ -36,7 +36,7 @@ module VX_shift_register #( if ((i >= (DATAW-RESETW)) && reset) begin for (integer j = 0; j < DEPTH; ++j) entries[j][i] <= 0; - end else if (enable) begin + end else if (enable) begin for (integer j = 1; j < DEPTH; ++j) entries[j-1][i] <= entries[j][i]; entries[DEPTH-1][i] <= data_in[i]; @@ -44,10 +44,10 @@ module VX_shift_register #( end end - for (genvar i = 0; i < NUM_TAPS; ++i) begin + for (genvar i = 0; i < NUM_TAPS; ++i) begin : g_data_out assign data_out[i] = entries[i * TAP_STRIDE + TAP_START]; end - end else begin + end else begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) `UNUSED_VAR (enable) diff --git a/hw/rtl/libs/VX_skid_buffer.sv b/hw/rtl/libs/VX_skid_buffer.sv index 53c213622..b77cce2a4 100644 --- a/hw/rtl/libs/VX_skid_buffer.sv +++ b/hw/rtl/libs/VX_skid_buffer.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,19 +19,19 @@ module VX_skid_buffer #( parameter PASSTHRU = 0, parameter HALF_BW = 0, parameter OUT_REG = 0 -) ( +) ( input wire clk, input wire reset, - + input wire valid_in, - output wire ready_in, + output wire ready_in, input wire [DATAW-1:0] data_in, output wire [DATAW-1:0] data_out, input wire ready_out, output wire valid_out ); - if (PASSTHRU != 0) begin + if (PASSTHRU != 0) begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) @@ -40,7 +40,7 @@ module VX_skid_buffer #( assign data_out = data_in; assign ready_in = ready_out; - end else if (HALF_BW != 0) begin + end else if (HALF_BW != 0) begin : g_half_bw VX_toggle_buffer #( .DATAW (DATAW) @@ -55,7 +55,7 @@ module VX_skid_buffer #( .ready_out (ready_out) ); - end else begin + end else begin : g_full_bw VX_stream_buffer #( .DATAW (DATAW), diff --git a/hw/rtl/libs/VX_sp_ram.sv b/hw/rtl/libs/VX_sp_ram.sv index 4ab2a9b7a..efce4b5f2 100644 --- a/hw/rtl/libs/VX_sp_ram.sv +++ b/hw/rtl/libs/VX_sp_ram.sv @@ -17,13 +17,14 @@ module VX_sp_ram #( parameter DATAW = 1, parameter SIZE = 1, - parameter ADDR_MIN = 0, parameter WRENW = 1, parameter OUT_REG = 0, + parameter LUTRAM = 0, parameter NO_RWCHECK = 0, parameter RW_ASSERT = 0, - parameter LUTRAM = 0, parameter RESET_RAM = 0, + parameter RESET_OUT = 0, + parameter READ_ENABLE = 0, parameter INIT_ENABLE = 0, parameter INIT_FILE = "", parameter [DATAW-1:0] INIT_VALUE = 0, @@ -39,19 +40,20 @@ module VX_sp_ram #( output wire [DATAW-1:0] rdata ); VX_dp_ram #( - .DATAW (DATAW), - .SIZE (SIZE), - .ADDR_MIN (ADDR_MIN), - .WRENW (WRENW), - .OUT_REG (OUT_REG), + .DATAW (DATAW), + .SIZE (SIZE), + .WRENW (WRENW), + .OUT_REG (OUT_REG), + .LUTRAM (LUTRAM), .NO_RWCHECK (NO_RWCHECK), - .RW_ASSERT (RW_ASSERT), - .LUTRAM (LUTRAM), - .RESET_RAM (RESET_RAM), - .INIT_ENABLE (INIT_ENABLE), - .INIT_FILE (INIT_FILE), + .RW_ASSERT (RW_ASSERT), + .RESET_RAM (RESET_RAM), + .RESET_OUT (RESET_OUT), + .READ_ENABLE(READ_ENABLE), + .INIT_ENABLE(INIT_ENABLE), + .INIT_FILE (INIT_FILE), .INIT_VALUE (INIT_VALUE), - .ADDRW (ADDRW) + .ADDRW (ADDRW) ) dp_ram ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_stream_arb.sv b/hw/rtl/libs/VX_stream_arb.sv index 98fed5859..ba824236e 100644 --- a/hw/rtl/libs/VX_stream_arb.sv +++ b/hw/rtl/libs/VX_stream_arb.sv @@ -21,7 +21,6 @@ module VX_stream_arb #( parameter `STRING ARBITER = "R", parameter MAX_FANOUT = `MAX_FANOUT, parameter OUT_BUF = 0, - parameter LUTRAM = 0, parameter NUM_REQS = `CDIV(NUM_INPUTS, NUM_OUTPUTS), parameter LOG_NUM_REQS = `CLOG2(NUM_REQS), parameter NUM_REQS_W = `UP(LOG_NUM_REQS) @@ -38,31 +37,28 @@ module VX_stream_arb #( output wire [NUM_OUTPUTS-1:0][NUM_REQS_W-1:0] sel_out, input wire [NUM_OUTPUTS-1:0] ready_out ); - if (NUM_INPUTS > NUM_OUTPUTS) begin + if (NUM_INPUTS > NUM_OUTPUTS) begin : g_more_inputs - if (NUM_OUTPUTS > 1) begin + if (NUM_OUTPUTS > 1) begin : g_multiple_outputs // (#inputs > #outputs) and (#outputs > 1) - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_arb_slices localparam SLICE_BEGIN = i * NUM_REQS; localparam SLICE_END = `MIN(SLICE_BEGIN + NUM_REQS, NUM_INPUTS); localparam SLICE_SIZE = SLICE_END - SLICE_BEGIN; - `RESET_RELAY (slice_reset, reset); - VX_stream_arb #( .NUM_INPUTS (SLICE_SIZE), .NUM_OUTPUTS (1), .DATAW (DATAW), .ARBITER (ARBITER), .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (OUT_BUF), - .LUTRAM (LUTRAM) + .OUT_BUF (OUT_BUF) ) arb_slice ( .clk (clk), - .reset (slice_reset), + .reset (reset), .valid_in (valid_in[SLICE_END-1: SLICE_BEGIN]), .ready_in (ready_in[SLICE_END-1: SLICE_BEGIN]), .data_in (data_in[SLICE_END-1: SLICE_BEGIN]), @@ -73,7 +69,7 @@ module VX_stream_arb #( ); end - end else if (MAX_FANOUT != 0 && (NUM_INPUTS > (MAX_FANOUT + MAX_FANOUT /2))) begin + end else if (MAX_FANOUT != 0 && (NUM_INPUTS > (MAX_FANOUT + MAX_FANOUT /2))) begin : g_fanout // (#inputs > max_fanout) and (#outputs == 1) @@ -85,7 +81,7 @@ module VX_stream_arb #( wire [NUM_SLICES-1:0][DATAW+LOG_NUM_REQS2-1:0] data_tmp; wire [NUM_SLICES-1:0] ready_tmp; - for (genvar i = 0; i < NUM_SLICES; ++i) begin + for (genvar i = 0; i < NUM_SLICES; ++i) begin : g_fanout_slice_arbs localparam SLICE_BEGIN = i * MAX_FANOUT; localparam SLICE_END = `MIN(SLICE_BEGIN + MAX_FANOUT, NUM_INPUTS); @@ -94,29 +90,24 @@ module VX_stream_arb #( wire [DATAW-1:0] data_tmp_u; wire [`LOG2UP(SLICE_SIZE)-1:0] sel_tmp_u; - `RESET_RELAY (slice_reset, reset); - - if (MAX_FANOUT != 1) begin - VX_stream_arb #( - .NUM_INPUTS (SLICE_SIZE), - .NUM_OUTPUTS (1), - .DATAW (DATAW), - .ARBITER (ARBITER), - .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (3), // registered output - .LUTRAM (LUTRAM) - ) fanout_slice_arb ( - .clk (clk), - .reset (slice_reset), - .valid_in (valid_in[SLICE_END-1: SLICE_BEGIN]), - .data_in (data_in[SLICE_END-1: SLICE_BEGIN]), - .ready_in (ready_in[SLICE_END-1: SLICE_BEGIN]), - .valid_out (valid_tmp[i]), - .data_out (data_tmp_u), - .sel_out (sel_tmp_u), - .ready_out (ready_tmp[i]) - ); - end + VX_stream_arb #( + .NUM_INPUTS (SLICE_SIZE), + .NUM_OUTPUTS (1), + .DATAW (DATAW), + .ARBITER (ARBITER), + .MAX_FANOUT (MAX_FANOUT), + .OUT_BUF (3) + ) fanout_slice_arb ( + .clk (clk), + .reset (reset), + .valid_in (valid_in[SLICE_END-1: SLICE_BEGIN]), + .data_in (data_in[SLICE_END-1: SLICE_BEGIN]), + .ready_in (ready_in[SLICE_END-1: SLICE_BEGIN]), + .valid_out (valid_tmp[i]), + .data_out (data_tmp_u), + .sel_out (sel_tmp_u), + .ready_out (ready_tmp[i]) + ); assign data_tmp[i] = {data_tmp_u, LOG_NUM_REQS2'(sel_tmp_u)}; end @@ -130,8 +121,7 @@ module VX_stream_arb #( .DATAW (DATAW + LOG_NUM_REQS2), .ARBITER (ARBITER), .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (OUT_BUF), - .LUTRAM (LUTRAM) + .OUT_BUF (OUT_BUF) ) fanout_join_arb ( .clk (clk), .reset (reset), @@ -147,13 +137,13 @@ module VX_stream_arb #( assign data_out = data_out_u[LOG_NUM_REQS2 +: DATAW]; assign sel_out = {sel_out_u, data_out_u[0 +: LOG_NUM_REQS2]}; - end else begin + end else begin : g_one_output // (#inputs <= max_fanout) and (#outputs == 1) - wire valid_in_r; - wire [DATAW-1:0] data_in_r; - wire ready_in_r; + wire valid_in_w; + wire [DATAW-1:0] data_in_w; + wire ready_in_w; wire arb_valid; wire [NUM_REQS_W-1:0] arb_index; @@ -173,56 +163,53 @@ module VX_stream_arb #( .grant_ready (arb_ready) ); - assign valid_in_r = arb_valid; - assign data_in_r = data_in[arb_index]; - assign arb_ready = ready_in_r; + assign valid_in_w = arb_valid; + assign data_in_w = data_in[arb_index]; + assign arb_ready = ready_in_w; - for (genvar i = 0; i < NUM_REQS; ++i) begin - assign ready_in[i] = ready_in_r && arb_onehot[i]; + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_ready_in + assign ready_in[i] = ready_in_w && arb_onehot[i]; end VX_elastic_buffer #( .DATAW (LOG_NUM_REQS + DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)), - .LUTRAM (LUTRAM) + .LUTRAM (`TO_OUT_BUF_LUTRAM(OUT_BUF)) ) out_buf ( .clk (clk), .reset (reset), - .valid_in (valid_in_r), - .ready_in (ready_in_r), - .data_in ({arb_index, data_in_r}), + .valid_in (valid_in_w), + .ready_in (ready_in_w), + .data_in ({arb_index, data_in_w}), .data_out ({sel_out, data_out}), .valid_out (valid_out), .ready_out (ready_out) ); end - end else if (NUM_OUTPUTS > NUM_INPUTS) begin + end else if (NUM_OUTPUTS > NUM_INPUTS) begin : g_more_outputs - if (NUM_INPUTS > 1) begin + if (NUM_INPUTS > 1) begin : g_multiple_inputs // (#inputs > 1) and (#outputs > #inputs) - for (genvar i = 0; i < NUM_INPUTS; ++i) begin + for (genvar i = 0; i < NUM_INPUTS; ++i) begin : g_arb_slices localparam SLICE_BEGIN = i * NUM_REQS; localparam SLICE_END = `MIN(SLICE_BEGIN + NUM_REQS, NUM_OUTPUTS); localparam SLICE_SIZE = SLICE_END - SLICE_BEGIN; - `RESET_RELAY (slice_reset, reset); - VX_stream_arb #( .NUM_INPUTS (1), .NUM_OUTPUTS (SLICE_SIZE), .DATAW (DATAW), .ARBITER (ARBITER), .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (OUT_BUF), - .LUTRAM (LUTRAM) + .OUT_BUF (OUT_BUF) ) arb_slice ( .clk (clk), - .reset (slice_reset), + .reset (reset), .valid_in (valid_in[i]), .ready_in (ready_in[i]), .data_in (data_in[i]), @@ -232,12 +219,12 @@ module VX_stream_arb #( `UNUSED_PIN (sel_out) ); - for (genvar j = SLICE_BEGIN; j < SLICE_END; ++j) begin + for (genvar j = SLICE_BEGIN; j < SLICE_END; ++j) begin : g_sel_out assign sel_out[j] = i; end end - end else if (MAX_FANOUT != 0 && (NUM_OUTPUTS > (MAX_FANOUT + MAX_FANOUT /2))) begin + end else if (MAX_FANOUT != 0 && (NUM_OUTPUTS > (MAX_FANOUT + MAX_FANOUT /2))) begin : g_fanout // (#inputs == 1) and (#outputs > max_fanout) @@ -253,8 +240,7 @@ module VX_stream_arb #( .DATAW (DATAW), .ARBITER (ARBITER), .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (3), // registered output - .LUTRAM (LUTRAM) + .OUT_BUF (3) ) fanout_fork_arb ( .clk (clk), .reset (reset), @@ -267,25 +253,22 @@ module VX_stream_arb #( `UNUSED_PIN (sel_out) ); - for (genvar i = 0; i < NUM_SLICES; ++i) begin + for (genvar i = 0; i < NUM_SLICES; ++i) begin : g_fanout_slice_arbs localparam SLICE_BEGIN = i * MAX_FANOUT; localparam SLICE_END = `MIN(SLICE_BEGIN + MAX_FANOUT, NUM_OUTPUTS); localparam SLICE_SIZE = SLICE_END - SLICE_BEGIN; - `RESET_RELAY (slice_reset, reset); - VX_stream_arb #( .NUM_INPUTS (1), .NUM_OUTPUTS (SLICE_SIZE), .DATAW (DATAW), .ARBITER (ARBITER), .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (OUT_BUF), - .LUTRAM (LUTRAM) + .OUT_BUF (OUT_BUF) ) fanout_slice_arb ( .clk (clk), - .reset (slice_reset), + .reset (reset), .valid_in (valid_tmp[i]), .ready_in (ready_tmp[i]), .data_in (data_tmp[i]), @@ -296,11 +279,11 @@ module VX_stream_arb #( ); end - end else begin + end else begin : g_one_input // (#inputs == 1) and (#outputs <= max_fanout) - wire [NUM_OUTPUTS-1:0] ready_in_r; + wire [NUM_OUTPUTS-1:0] ready_in_w; wire [NUM_OUTPUTS-1:0] arb_requests; wire arb_valid; @@ -320,21 +303,21 @@ module VX_stream_arb #( .grant_ready (arb_ready) ); - assign arb_requests = ready_in_r; + assign arb_requests = ready_in_w; assign arb_ready = valid_in[0]; assign ready_in = arb_valid; - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_out_buf VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)), - .LUTRAM (LUTRAM) + .LUTRAM (`TO_OUT_BUF_LUTRAM(OUT_BUF)) ) out_buf ( .clk (clk), .reset (reset), .valid_in (valid_in && arb_onehot[i]), - .ready_in (ready_in_r[i]), + .ready_in (ready_in_w[i]), .data_in (data_in), .data_out (data_out[i]), .valid_out (valid_out[i]), @@ -345,22 +328,19 @@ module VX_stream_arb #( assign sel_out = 0; - end else begin + end else begin : g_passthru // #Inputs == #Outputs - `RESET_RELAY_EX (out_buf_reset, reset, NUM_OUTPUTS, `MAX_FANOUT); - - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin - + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_out_buf VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)), - .LUTRAM (LUTRAM) + .LUTRAM (`TO_OUT_BUF_LUTRAM(OUT_BUF)) ) out_buf ( .clk (clk), - .reset (out_buf_reset[i]), + .reset (reset), .valid_in (valid_in[i]), .ready_in (ready_in[i]), .data_in (data_in[i]), diff --git a/hw/rtl/libs/VX_stream_buffer.sv b/hw/rtl/libs/VX_stream_buffer.sv index bebe8ec71..4b77df83d 100644 --- a/hw/rtl/libs/VX_stream_buffer.sv +++ b/hw/rtl/libs/VX_stream_buffer.sv @@ -1,18 +1,18 @@ // Copyright 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at -// +// // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// A stream elastic buffer operates at full-bandwidth where push and pop can happen simultaneously +// A stream elastic buffer operates at full-bandwidth where fire_in and fire_out can happen simultaneously // It has the following benefits: // + full-bandwidth throughput // + ready_in and ready_out are decoupled @@ -27,102 +27,99 @@ module VX_stream_buffer #( parameter DATAW = 1, parameter OUT_REG = 0, parameter PASSTHRU = 0 -) ( +) ( input wire clk, input wire reset, input wire valid_in, - output wire ready_in, + output wire ready_in, input wire [DATAW-1:0] data_in, output wire [DATAW-1:0] data_out, input wire ready_out, output wire valid_out -); - if (PASSTHRU != 0) begin +); + if (PASSTHRU != 0) begin : g_passthru + `UNUSED_VAR (clk) `UNUSED_VAR (reset) assign ready_in = ready_out; - assign valid_out = valid_in; + assign valid_out = valid_in; assign data_out = data_in; - end else begin - if (OUT_REG != 0) begin - - reg [DATAW-1:0] data_out_r; - reg [DATAW-1:0] buffer; - reg valid_out_r; - reg use_buffer; - - wire push = valid_in && ready_in; - wire stall_out = valid_out_r && ~ready_out; - - always @(posedge clk) begin - if (reset) begin - valid_out_r <= 0; - use_buffer <= 0; - end else begin - if (ready_out) begin - use_buffer <= 0; - end else if (valid_in && valid_out) begin - use_buffer <= 1; - end - if (~stall_out) begin - valid_out_r <= valid_in || use_buffer; - end - end - end - always @(posedge clk) begin - if (push) begin - buffer <= data_in; + end else if (OUT_REG != 0) begin : g_out_reg + + reg [DATAW-1:0] data_out_r; + reg [DATAW-1:0] buffer; + reg valid_out_r; + reg no_buffer; + + wire fire_in = valid_in && ready_in; + wire flow_out = ready_out || ~valid_out; + + always @(posedge clk) begin + if (reset) begin + valid_out_r <= 0; + no_buffer <= 1; + end else begin + if (flow_out) begin + no_buffer <= 1; + end else if (valid_in) begin + no_buffer <= 0; end - if (~stall_out) begin - data_out_r <= use_buffer ? buffer : data_in; + if (flow_out) begin + valid_out_r <= valid_in || ~no_buffer; end end + end - assign ready_in = ~use_buffer; - assign valid_out = valid_out_r; - assign data_out = data_out_r; - - end else begin - - reg [1:0][DATAW-1:0] shift_reg; - reg valid_out_r, ready_in_r, rd_ptr_r; - - wire push = valid_in && ready_in; - wire pop = valid_out_r && ready_out; - - always @(posedge clk) begin - if (reset) begin - valid_out_r <= 0; - ready_in_r <= 1; - rd_ptr_r <= 1; - end else begin - if (push) begin - if (!pop) begin - ready_in_r <= rd_ptr_r; - valid_out_r <= 1; - end - end else if (pop) begin - ready_in_r <= 1; - valid_out_r <= rd_ptr_r; - end - rd_ptr_r <= rd_ptr_r ^ (push ^ pop); - end + always @(posedge clk) begin + if (fire_in) begin + buffer <= data_in; + end + if (flow_out) begin + data_out_r <= no_buffer ? data_in : buffer; end + end - always @(posedge clk) begin - if (push) begin - shift_reg[1] <= shift_reg[0]; - shift_reg[0] <= data_in; - end + assign ready_in = no_buffer; + assign valid_out = valid_out_r; + assign data_out = data_out_r; + + end else begin : g_no_out_reg + + reg [1:0][DATAW-1:0] shift_reg; + reg [1:0] fifo_state, fifo_state_n; + + wire fire_in = valid_in && ready_in; + wire fire_out = valid_out && ready_out; + + always @(*) begin + case ({fire_in, fire_out}) + 2'b10: fifo_state_n = {fifo_state[0], 1'b1}; // 00 -> 01, 01 -> 10 + 2'b01: fifo_state_n = {1'b0, fifo_state[1]}; // 10 -> 01, 01 -> 00 + default: fifo_state_n = fifo_state; + endcase + end + + always @(posedge clk) begin + if (reset) begin + fifo_state <= 2'b00; + end else begin + fifo_state <= fifo_state_n; end + end - assign ready_in = ready_in_r; - assign valid_out = valid_out_r; - assign data_out = shift_reg[rd_ptr_r]; + always @(posedge clk) begin + if (fire_in) begin + shift_reg[1] <= shift_reg[0]; + shift_reg[0] <= data_in; + end end - end + + assign ready_in = ~fifo_state[1]; + assign valid_out = fifo_state[0]; + assign data_out = shift_reg[fifo_state[1]]; + + end endmodule `TRACING_ON - diff --git a/hw/rtl/libs/VX_stream_pack.sv b/hw/rtl/libs/VX_stream_pack.sv index 7f024b184..944b120c2 100644 --- a/hw/rtl/libs/VX_stream_pack.sv +++ b/hw/rtl/libs/VX_stream_pack.sv @@ -38,7 +38,8 @@ module VX_stream_pack #( output wire [TAG_WIDTH-1:0] tag_out, input wire ready_out ); - if (NUM_REQS > 1) begin + if (NUM_REQS > 1) begin : g_pack + localparam LOG_NUM_REQS = `CLOG2(NUM_REQS); wire [LOG_NUM_REQS-1:0] grant_index; @@ -62,11 +63,11 @@ module VX_stream_pack #( wire [NUM_REQS-1:0] tag_matches; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_tag_matches assign tag_matches[i] = (tag_in[i][TAG_SEL_BITS-1:0] == tag_sel[TAG_SEL_BITS-1:0]); end - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_ready_in assign ready_in[i] = grant_ready & tag_matches[i]; end @@ -87,7 +88,7 @@ module VX_stream_pack #( .ready_out (ready_out) ); - end else begin + end else begin : g_passthru `UNUSED_VAR (clk) `UNUSED_VAR (reset) diff --git a/hw/rtl/libs/VX_stream_switch.sv b/hw/rtl/libs/VX_stream_switch.sv index 3a905cb1d..e3848e4c3 100644 --- a/hw/rtl/libs/VX_stream_switch.sv +++ b/hw/rtl/libs/VX_stream_switch.sv @@ -36,117 +36,112 @@ module VX_stream_switch #( output wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out, input wire [NUM_OUTPUTS-1:0] ready_out ); - if (NUM_INPUTS > NUM_OUTPUTS) begin + if (NUM_INPUTS > NUM_OUTPUTS) begin : g_more_inputs + wire [NUM_OUTPUTS-1:0][NUM_REQS-1:0] valid_in_w; + wire [NUM_OUTPUTS-1:0][NUM_REQS-1:0][DATAW-1:0] data_in_w; - wire [NUM_OUTPUTS-1:0][NUM_REQS-1:0] valid_in_r; - wire [NUM_OUTPUTS-1:0][NUM_REQS-1:0][DATAW-1:0] data_in_r; - - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin - for (genvar j = 0; j < NUM_REQS; ++j) begin + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_data_in + for (genvar j = 0; j < NUM_REQS; ++j) begin : g_j localparam ii = i * NUM_REQS + j; - if (ii < NUM_INPUTS) begin - assign valid_in_r[i][j] = valid_in[ii]; - assign data_in_r[i][j] = data_in[ii]; - end else begin - assign valid_in_r[i][j] = 0; - assign data_in_r[i][j] = '0; + if (ii < NUM_INPUTS) begin : g_valid + assign valid_in_w[i][j] = valid_in[ii]; + assign data_in_w[i][j] = data_in[ii]; + end else begin : g_padding + assign valid_in_w[i][j] = 0; + assign data_in_w[i][j] = '0; end end end - wire [NUM_OUTPUTS-1:0] valid_out_r; - wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out_r; - wire [NUM_OUTPUTS-1:0] ready_out_r; + wire [NUM_OUTPUTS-1:0] valid_out_w; + wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out_w; + wire [NUM_OUTPUTS-1:0] ready_out_w; - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin - assign valid_out_r[i] = valid_in_r[i][sel_in[i]]; - assign data_out_r[i] = data_in_r[i][sel_in[i]]; + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_data_out_w + assign valid_out_w[i] = valid_in_w[i][sel_in[i]]; + assign data_out_w[i] = data_in_w[i][sel_in[i]]; end - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin - for (genvar j = 0; j < NUM_REQS; ++j) begin + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_ready_out_w + for (genvar j = 0; j < NUM_REQS; ++j) begin : g_j localparam ii = i * NUM_REQS + j; - if (ii < NUM_INPUTS) begin - assign ready_in[ii] = ready_out_r[i] & (sel_in[i] == LOG_NUM_REQS'(j)); + if (ii < NUM_INPUTS) begin : g_valid + assign ready_in[ii] = ready_out_w[i] && (sel_in[i] == LOG_NUM_REQS'(j)); end end end - `RESET_RELAY_EX (out_buf_reset, reset, NUM_OUTPUTS, `MAX_FANOUT); - - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_out_buf VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)) ) out_buf ( .clk (clk), - .reset (out_buf_reset[i]), - .valid_in (valid_out_r[i]), - .ready_in (ready_out_r[i]), - .data_in (data_out_r[i]), + .reset (reset), + .valid_in (valid_out_w[i]), + .ready_in (ready_out_w[i]), + .data_in (data_out_w[i]), .data_out (data_out[i]), .valid_out (valid_out[i]), .ready_out (ready_out[i]) ); end - end else if (NUM_OUTPUTS > NUM_INPUTS) begin + end else if (NUM_OUTPUTS > NUM_INPUTS) begin : g_more_outputs - wire [NUM_INPUTS-1:0][NUM_REQS-1:0] valid_out_r; - wire [NUM_INPUTS-1:0][NUM_REQS-1:0] ready_out_r; + wire [NUM_INPUTS-1:0][NUM_REQS-1:0] valid_out_w; + wire [NUM_INPUTS-1:0][NUM_REQS-1:0] ready_out_w; - for (genvar i = 0; i < NUM_INPUTS; ++i) begin - for (genvar j = 0; j < NUM_REQS; ++j) begin - assign valid_out_r[i][j] = valid_in[i] & (sel_in[i] == LOG_NUM_REQS'(j)); + for (genvar i = 0; i < NUM_INPUTS; ++i) begin : g_valid_out_w + for (genvar j = 0; j < NUM_REQS; ++j) begin : g_j + assign valid_out_w[i][j] = valid_in[i] && (sel_in[i] == LOG_NUM_REQS'(j)); end - assign ready_in[i] = ready_out_r[i][sel_in[i]]; end - `RESET_RELAY_EX (out_buf_reset, reset, NUM_OUTPUTS, `MAX_FANOUT); + for (genvar i = 0; i < NUM_INPUTS; ++i) begin : g_ready_in + assign ready_in[i] = ready_out_w[i][sel_in[i]]; + end - for (genvar i = 0; i < NUM_INPUTS; ++i) begin - for (genvar j = 0; j < NUM_REQS; ++j) begin + for (genvar i = 0; i < NUM_INPUTS; ++i) begin : g_out_buf + for (genvar j = 0; j < NUM_REQS; ++j) begin : g_j localparam ii = i * NUM_REQS + j; - if (ii < NUM_OUTPUTS) begin + if (ii < NUM_OUTPUTS) begin : g_valid VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)) ) out_buf ( .clk (clk), - .reset (out_buf_reset[ii]), - .valid_in (valid_out_r[i][j]), - .ready_in (ready_out_r[i][j]), + .reset (reset), + .valid_in (valid_out_w[i][j]), + .ready_in (ready_out_w[i][j]), .data_in (data_in[i]), .data_out (data_out[ii]), .valid_out (valid_out[ii]), .ready_out (ready_out[ii]) ); - end else begin - `UNUSED_VAR (out_buf_reset[ii]) - `UNUSED_VAR (valid_out_r[i][j]) - assign ready_out_r[i][j] = '0; + end else begin : g_padding + `UNUSED_VAR (valid_out_w[i][j]) + assign ready_out_w[i][j] = '0; end end end - end else begin + end else begin : g_passthru // #Inputs == #Outputs `UNUSED_VAR (sel_in) - `RESET_RELAY_EX (out_buf_reset, reset, NUM_OUTPUTS, `MAX_FANOUT); - - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_out_buf VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)) ) out_buf ( .clk (clk), - .reset (out_buf_reset[i]), + .reset (reset), .valid_in (valid_in[i]), .ready_in (ready_in[i]), .data_in (data_in[i]), diff --git a/hw/rtl/libs/VX_stream_unpack.sv b/hw/rtl/libs/VX_stream_unpack.sv index e8b905cdf..b0cca961a 100644 --- a/hw/rtl/libs/VX_stream_unpack.sv +++ b/hw/rtl/libs/VX_stream_unpack.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -15,8 +15,8 @@ `TRACING_OFF module VX_stream_unpack #( - parameter NUM_REQS = 1, - parameter DATA_WIDTH = 1, + parameter NUM_REQS = 1, + parameter DATA_WIDTH = 1, parameter TAG_WIDTH = 1, parameter OUT_BUF = 0 ) ( @@ -31,36 +31,32 @@ module VX_stream_unpack #( output wire ready_in, // output - output wire [NUM_REQS-1:0] valid_out, + output wire [NUM_REQS-1:0] valid_out, output wire [NUM_REQS-1:0][DATA_WIDTH-1:0] data_out, output wire [NUM_REQS-1:0][TAG_WIDTH-1:0] tag_out, input wire [NUM_REQS-1:0] ready_out ); - if (NUM_REQS > 1) begin + if (NUM_REQS > 1) begin : g_unpack - reg [NUM_REQS-1:0] sent_mask; - wire [NUM_REQS-1:0] ready_out_r; + reg [NUM_REQS-1:0] rem_mask_r; + wire [NUM_REQS-1:0] ready_out_w; - wire [NUM_REQS-1:0] sent_mask_n = sent_mask | ready_out_r; - wire sent_all = ~(| (mask_in & ~sent_mask_n)); + wire [NUM_REQS-1:0] rem_mask_n = rem_mask_r & ~ready_out_w; + wire sent_all = ~(| (mask_in & rem_mask_n)); always @(posedge clk) begin if (reset) begin - sent_mask <= '0; + rem_mask_r <= '1; end else begin if (valid_in) begin - if (sent_all) begin - sent_mask <= '0; - end else begin - sent_mask <= sent_mask_n; - end + rem_mask_r <= sent_all ? '1 : rem_mask_n; end end end assign ready_in = sent_all; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_outbuf VX_elastic_buffer #( .DATAW (DATA_WIDTH + TAG_WIDTH), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), @@ -68,21 +64,21 @@ module VX_stream_unpack #( ) out_buf ( .clk (clk), .reset (reset), - .valid_in (valid_in && mask_in[i] && ~sent_mask[i]), - .ready_in (ready_out_r[i]), + .valid_in (valid_in && mask_in[i] && rem_mask_r[i]), + .ready_in (ready_out_w[i]), .data_in ({data_in[i], tag_in}), .data_out ({data_out[i], tag_out[i]}), .valid_out (valid_out[i]), .ready_out (ready_out[i]) ); end - - end else begin - + + end else begin : g_passthru + `UNUSED_VAR (clk) `UNUSED_VAR (reset) `UNUSED_VAR (mask_in) - assign valid_out = valid_in; + assign valid_out = valid_in; assign data_out = data_in; assign tag_out = tag_in; assign ready_in = ready_out; diff --git a/hw/rtl/libs/VX_stream_xbar.sv b/hw/rtl/libs/VX_stream_xbar.sv index b7bdcbf5e..febfd0465 100644 --- a/hw/rtl/libs/VX_stream_xbar.sv +++ b/hw/rtl/libs/VX_stream_xbar.sv @@ -22,7 +22,6 @@ module VX_stream_xbar #( parameter OUT_WIDTH = `LOG2UP(NUM_OUTPUTS), parameter ARBITER = "R", parameter OUT_BUF = 0, - parameter LUTRAM = 0, parameter MAX_FANOUT = `MAX_FANOUT, parameter PERF_CTR_BITS = `CLOG2(NUM_INPUTS+1) ) ( @@ -44,35 +43,58 @@ module VX_stream_xbar #( `UNUSED_VAR (clk) `UNUSED_VAR (reset) - if (NUM_INPUTS != 1) begin + if (NUM_INPUTS != 1) begin : g_multiple_inputs - if (NUM_OUTPUTS != 1) begin + if (NUM_OUTPUTS != 1) begin : g_multiple_outputs // (#inputs > 1) and (#outputs > 1) - wire [NUM_OUTPUTS-1:0][NUM_INPUTS-1:0] per_output_ready_in; + wire [NUM_INPUTS-1:0][NUM_OUTPUTS-1:0] per_output_valid_in; + wire [NUM_OUTPUTS-1:0][NUM_INPUTS-1:0] per_output_valid_in_w; - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin + wire [NUM_OUTPUTS-1:0][NUM_INPUTS-1:0] per_output_ready_in; + wire [NUM_INPUTS-1:0][NUM_OUTPUTS-1:0] per_output_ready_in_w; + + VX_transpose #( + .N (NUM_OUTPUTS), + .M (NUM_INPUTS) + ) rdy_in_transpose ( + .data_in (per_output_ready_in), + .data_out (per_output_ready_in_w) + ); - wire [NUM_INPUTS-1:0] valid_in_q; - for (genvar j = 0; j < NUM_INPUTS; ++j) begin - assign valid_in_q[j] = valid_in[j] && (sel_in[j] == i); - end + for (genvar i = 0; i < NUM_INPUTS; ++i) begin : g_sel_in_decoders + VX_decoder #( + .N (OUT_WIDTH), + .D (NUM_OUTPUTS) + ) sel_in_decoder ( + .data_in (sel_in[i]), + .valid_in (valid_in[i]), + .data_out (per_output_valid_in[i]) + ); + assign ready_in[i] = | per_output_ready_in_w[i]; + end - `RESET_RELAY (slice_reset, reset); + VX_transpose #( + .N (NUM_INPUTS), + .M (NUM_OUTPUTS) + ) val_in_transpose ( + .data_in (per_output_valid_in), + .data_out (per_output_valid_in_w) + ); + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_xbar_arbs VX_stream_arb #( .NUM_INPUTS (NUM_INPUTS), .NUM_OUTPUTS (1), .DATAW (DATAW), .ARBITER (ARBITER), .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (OUT_BUF), - .LUTRAM (LUTRAM) + .OUT_BUF (OUT_BUF) ) xbar_arb ( .clk (clk), - .reset (slice_reset), - .valid_in (valid_in_q), + .reset (reset), + .valid_in (per_output_valid_in_w[i]), .data_in (data_in), .ready_in (per_output_ready_in[i]), .valid_out (valid_out[i]), @@ -82,11 +104,7 @@ module VX_stream_xbar #( ); end - for (genvar i = 0; i < NUM_INPUTS; ++i) begin - assign ready_in[i] = per_output_ready_in[sel_in[i]][i]; - end - - end else begin + end else begin : g_one_output // (#inputs >= 1) and (#outputs == 1) @@ -96,8 +114,7 @@ module VX_stream_xbar #( .DATAW (DATAW), .ARBITER (ARBITER), .MAX_FANOUT (MAX_FANOUT), - .OUT_BUF (OUT_BUF), - .LUTRAM (LUTRAM) + .OUT_BUF (OUT_BUF) ) xbar_arb ( .clk (clk), .reset (reset), @@ -113,33 +130,37 @@ module VX_stream_xbar #( `UNUSED_VAR (sel_in) end - end else if (NUM_OUTPUTS != 1) begin + end else if (NUM_OUTPUTS != 1) begin : g_one_input // (#inputs == 1) and (#outputs > 1) - logic [NUM_OUTPUTS-1:0] valid_out_r, ready_out_r; - logic [NUM_OUTPUTS-1:0][DATAW-1:0] data_out_r; - always @(*) begin - valid_out_r = '0; - valid_out_r[sel_in] = valid_in; - end - assign data_out_r = {NUM_OUTPUTS{data_in}}; - assign ready_in = ready_out_r[sel_in]; + wire [NUM_OUTPUTS-1:0] valid_out_w, ready_out_w; + wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out_w; - `RESET_RELAY_EX (out_buf_reset, reset, NUM_OUTPUTS, `MAX_FANOUT); + VX_decoder #( + .N (OUT_WIDTH), + .D (NUM_OUTPUTS) + ) sel_in_decoder ( + .data_in (sel_in[0]), + .valid_in (valid_in[0]), + .data_out (valid_out_w) + ); + + assign ready_in[0] = ready_out_w[sel_in[0]]; + assign data_out_w = {NUM_OUTPUTS{data_in[0]}}; - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_out_buf VX_elastic_buffer #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)), - .LUTRAM (LUTRAM) + .LUTRAM (`TO_OUT_BUF_LUTRAM(OUT_BUF)) ) out_buf ( .clk (clk), - .reset (out_buf_reset[i]), - .valid_in (valid_out_r[i]), - .ready_in (ready_out_r[i]), - .data_in (data_out_r[i]), + .reset (reset), + .valid_in (valid_out_w[i]), + .ready_in (ready_out_w[i]), + .data_in (data_out_w[i]), .data_out (data_out[i]), .valid_out (valid_out[i]), .ready_out (ready_out[i]) @@ -148,7 +169,7 @@ module VX_stream_xbar #( assign sel_out = 0; - end else begin + end else begin : g_passthru // (#inputs == 1) and (#outputs == 1) @@ -156,7 +177,7 @@ module VX_stream_xbar #( .DATAW (DATAW), .SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)), .OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)), - .LUTRAM (LUTRAM) + .LUTRAM (`TO_OUT_BUF_LUTRAM(OUT_BUF)) ) out_buf ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_toggle_buffer.sv b/hw/rtl/libs/VX_toggle_buffer.sv index fb24a7f79..9d6b42720 100644 --- a/hw/rtl/libs/VX_toggle_buffer.sv +++ b/hw/rtl/libs/VX_toggle_buffer.sv @@ -1,11 +1,11 @@ // Copyright 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at -// +// // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -26,23 +26,26 @@ module VX_toggle_buffer #( parameter DATAW = 1, parameter PASSTHRU = 0 -) ( +) ( input wire clk, input wire reset, input wire valid_in, - output wire ready_in, + output wire ready_in, input wire [DATAW-1:0] data_in, output wire [DATAW-1:0] data_out, input wire ready_out, output wire valid_out -); - if (PASSTHRU != 0) begin +); + if (PASSTHRU != 0) begin : g_passthru + `UNUSED_VAR (clk) `UNUSED_VAR (reset) assign ready_in = ready_out; - assign valid_out = valid_in; + assign valid_out = valid_in; assign data_out = data_in; - end else begin + + end else begin : g_buffer + reg [DATAW-1:0] buffer; reg has_data; @@ -54,7 +57,7 @@ module VX_toggle_buffer #( has_data <= valid_in; end else if (ready_out) begin has_data <= 0; - end + end end if (~has_data) begin buffer <= data_in; diff --git a/hw/syn/xilinx/test/kernel/main.c b/hw/rtl/libs/VX_transpose.sv similarity index 53% rename from hw/syn/xilinx/test/kernel/main.c rename to hw/rtl/libs/VX_transpose.sv index 4fcfd99c0..2fc0bd695 100644 --- a/hw/syn/xilinx/test/kernel/main.c +++ b/hw/rtl/libs/VX_transpose.sv @@ -1,36 +1,32 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#include -#include +`include "VX_platform.vh" -typedef struct { - uint32_t count; - uint32_t src_addr; - uint32_t dst_addr; -} kernel_arg_t; +`TRACING_OFF +module VX_transpose #( + parameter DATAW = 1, + parameter N = 1, + parameter M = 1 +) ( + input wire [N-1:0][M-1:0][DATAW-1:0] data_in, + output wire [M-1:0][N-1:0][DATAW-1:0] data_out +); + for (genvar i = 0; i < N; ++i) begin : g_i + for (genvar j = 0; j < M; ++j) begin : g_j + assign data_out[j][i] = data_in[i][j]; + end + end -int main() { - kernel_arg_t* arg = (kernel_arg_t*)csr_read(VX_CSR_MSCRATCH); - uint32_t count = arg->count; - int32_t* src_ptr = (int32_t*)arg->src_addr; - int32_t* dst_ptr = (int32_t*)arg->dst_addr; - - uint32_t offset = vx_core_id() * count; - - for (uint32_t i = 0; i < count; ++i) { - dst_ptr[offset + i] = src_ptr[offset + i]; - } - - return 0; -} +endmodule +`TRACING_ON diff --git a/hw/rtl/mem/VX_gbar_arb.sv b/hw/rtl/mem/VX_gbar_arb.sv index 9ff761ec2..2b0856980 100644 --- a/hw/rtl/mem/VX_gbar_arb.sv +++ b/hw/rtl/mem/VX_gbar_arb.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -33,7 +33,7 @@ module VX_gbar_arb #( wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_in; wire [NUM_REQS-1:0] req_ready_in; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_req_data_in assign req_valid_in[i] = bus_in_if[i].req_valid; assign req_data_in[i] = {bus_in_if[i].req_id, bus_in_if[i].req_size_m1, bus_in_if[i].req_core_id}; assign bus_in_if[i].req_ready = req_ready_in[i]; @@ -71,7 +71,7 @@ module VX_gbar_arb #( rsp_id <= bus_out_if.rsp_id; end - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_bus_in_if assign bus_in_if[i].rsp_valid = rsp_valid; assign bus_in_if[i].rsp_id = rsp_id; end diff --git a/hw/rtl/mem/VX_gbar_unit.sv b/hw/rtl/mem/VX_gbar_unit.sv index a6e5d9baa..c9707748f 100644 --- a/hw/rtl/mem/VX_gbar_unit.sv +++ b/hw/rtl/mem/VX_gbar_unit.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -13,7 +13,7 @@ `include "VX_define.vh" -module VX_gbar_unit #( +module VX_gbar_unit #( parameter `STRING INSTANCE_ID = "" ) ( input wire clk, @@ -26,7 +26,7 @@ module VX_gbar_unit #( reg [`NB_WIDTH-1:0][`NUM_CORES-1:0] barrier_masks; wire [`CLOG2(`NUM_CORES+1)-1:0] active_barrier_count; wire [`NUM_CORES-1:0] curr_barrier_mask = barrier_masks[gbar_bus_if.req_id]; - + `POP_COUNT(active_barrier_count, curr_barrier_mask); `UNUSED_VAR (active_barrier_count) @@ -56,15 +56,15 @@ module VX_gbar_unit #( assign gbar_bus_if.rsp_valid = rsp_valid; assign gbar_bus_if.rsp_id = rsp_bar_id; assign gbar_bus_if.req_ready = 1; // global barrier unit is always ready (no dependencies) - + `ifdef DBG_TRACE_GBAR always @(posedge clk) begin if (gbar_bus_if.req_valid && gbar_bus_if.req_ready) begin - `TRACE(1, ("%d: %s-acquire: bar_id=%0d, size=%0d, core_id=%0d\n", - $time, INSTANCE_ID, gbar_bus_if.req_id, gbar_bus_if.req_size_m1, gbar_bus_if.req_core_id)); + `TRACE(1, ("%t: %s acquire: bar_id=%0d, size=%0d, core_id=%0d\n", + $time, INSTANCE_ID, gbar_bus_if.req_id, gbar_bus_if.req_size_m1, gbar_bus_if.req_core_id)) end if (gbar_bus_if.rsp_valid) begin - `TRACE(1, ("%d: %s-release: bar_id=%0d\n", $time, INSTANCE_ID, gbar_bus_if.rsp_id)); + `TRACE(1, ("%t: %s release: bar_id=%0d\n", $time, INSTANCE_ID, gbar_bus_if.rsp_id)) end end `endif diff --git a/hw/rtl/mem/VX_lmem_switch.sv b/hw/rtl/mem/VX_lmem_switch.sv new file mode 100644 index 000000000..345f357a3 --- /dev/null +++ b/hw/rtl/mem/VX_lmem_switch.sv @@ -0,0 +1,133 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_define.vh" + +module VX_lmem_switch import VX_gpu_pkg::*; #( + parameter REQ0_OUT_BUF = 0, + parameter REQ1_OUT_BUF = 0, + parameter RSP_OUT_BUF = 0, + parameter `STRING ARBITER = "R" +) ( + input wire clk, + input wire reset, + VX_lsu_mem_if.slave lsu_in_if, + VX_lsu_mem_if.master global_out_if, + VX_lsu_mem_if.master local_out_if +); + localparam REQ_DATAW = `NUM_LSU_LANES + 1 + `NUM_LSU_LANES * (LSU_WORD_SIZE + LSU_ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH; + localparam RSP_DATAW = `NUM_LSU_LANES + `NUM_LSU_LANES * (LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH; + + wire [`NUM_LSU_LANES-1:0] is_addr_local_mask; + wire req_global_ready; + wire req_local_ready; + + for (genvar i = 0; i < `NUM_LSU_LANES; ++i) begin : g_is_addr_local_mask + assign is_addr_local_mask[i] = lsu_in_if.req_data.flags[i][`MEM_REQ_FLAG_LOCAL]; + end + + wire is_addr_global = | (lsu_in_if.req_data.mask & ~is_addr_local_mask); + wire is_addr_local = | (lsu_in_if.req_data.mask & is_addr_local_mask); + + assign lsu_in_if.req_ready = (req_global_ready && is_addr_global) + || (req_local_ready && is_addr_local); + + VX_elastic_buffer #( + .DATAW (REQ_DATAW), + .SIZE (`TO_OUT_BUF_SIZE(REQ0_OUT_BUF)), + .OUT_REG (`TO_OUT_BUF_REG(REQ0_OUT_BUF)) + ) req_global_buf ( + .clk (clk), + .reset (reset), + .valid_in (lsu_in_if.req_valid && is_addr_global), + .data_in ({ + lsu_in_if.req_data.mask & ~is_addr_local_mask, + lsu_in_if.req_data.rw, + lsu_in_if.req_data.addr, + lsu_in_if.req_data.data, + lsu_in_if.req_data.byteen, + lsu_in_if.req_data.flags, + lsu_in_if.req_data.tag + }), + .ready_in (req_global_ready), + .valid_out (global_out_if.req_valid), + .data_out ({ + global_out_if.req_data.mask, + global_out_if.req_data.rw, + global_out_if.req_data.addr, + global_out_if.req_data.data, + global_out_if.req_data.byteen, + global_out_if.req_data.flags, + global_out_if.req_data.tag + }), + .ready_out (global_out_if.req_ready) + ); + + VX_elastic_buffer #( + .DATAW (REQ_DATAW), + .SIZE (`TO_OUT_BUF_SIZE(REQ1_OUT_BUF)), + .OUT_REG (`TO_OUT_BUF_REG(REQ1_OUT_BUF)) + ) req_local_buf ( + .clk (clk), + .reset (reset), + .valid_in (lsu_in_if.req_valid && is_addr_local), + .data_in ({ + lsu_in_if.req_data.mask & is_addr_local_mask, + lsu_in_if.req_data.rw, + lsu_in_if.req_data.addr, + lsu_in_if.req_data.data, + lsu_in_if.req_data.byteen, + lsu_in_if.req_data.flags, + lsu_in_if.req_data.tag + }), + .ready_in (req_local_ready), + .valid_out (local_out_if.req_valid), + .data_out ({ + local_out_if.req_data.mask, + local_out_if.req_data.rw, + local_out_if.req_data.addr, + local_out_if.req_data.data, + local_out_if.req_data.byteen, + local_out_if.req_data.flags, + local_out_if.req_data.tag + }), + .ready_out (local_out_if.req_ready) + ); + + VX_stream_arb #( + .NUM_INPUTS (2), + .DATAW (RSP_DATAW), + .ARBITER (ARBITER), + .OUT_BUF (RSP_OUT_BUF) + ) rsp_arb ( + .clk (clk), + .reset (reset), + .valid_in ({ + local_out_if.rsp_valid, + global_out_if.rsp_valid + }), + .ready_in ({ + local_out_if.rsp_ready, + global_out_if.rsp_ready + }), + .data_in ({ + local_out_if.rsp_data, + global_out_if.rsp_data + }), + .data_out (lsu_in_if.rsp_data), + .valid_out (lsu_in_if.rsp_valid), + .ready_out (lsu_in_if.rsp_ready), + `UNUSED_PIN (sel_out) + ); + +endmodule diff --git a/hw/rtl/mem/VX_local_mem.sv b/hw/rtl/mem/VX_local_mem.sv index 3dce0ec43..7131c3f21 100644 --- a/hw/rtl/mem/VX_local_mem.sv +++ b/hw/rtl/mem/VX_local_mem.sv @@ -43,7 +43,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( // PERF `ifdef PERF_ENABLE - output cache_perf_t cache_perf, + output cache_perf_t lmem_perf, `endif VX_mem_bus_if.slave mem_bus_if [NUM_REQS] @@ -67,20 +67,20 @@ module VX_local_mem import VX_gpu_pkg::*; #( // bank selection wire [NUM_REQS-1:0][BANK_SEL_WIDTH-1:0] req_bank_idx; - if (NUM_BANKS > 1) begin - for (genvar i = 0; i < NUM_REQS; ++i) begin + if (NUM_BANKS > 1) begin : g_req_bank_idx + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_req_bank_idxs assign req_bank_idx[i] = mem_bus_if[i].req_data.addr[0 +: BANK_SEL_BITS]; end - end else begin + end else begin : g_req_bank_idx_0 assign req_bank_idx = 0; end // bank addressing wire [NUM_REQS-1:0][BANK_ADDR_WIDTH-1:0] req_bank_addr; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_req_bank_addr assign req_bank_addr[i] = mem_bus_if[i].req_data.addr[BANK_SEL_BITS +: BANK_ADDR_WIDTH]; - `UNUSED_VAR (mem_bus_if[i].req_data.atype) + `UNUSED_VAR (mem_bus_if[i].req_data.flags) end // bank requests dispatch @@ -104,7 +104,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( wire [`PERF_CTR_BITS-1:0] perf_collisions; `endif - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_req_data_in assign req_valid_in[i] = mem_bus_if[i].req_valid; assign req_data_in[i] = { mem_bus_if[i].req_data.rw, @@ -121,7 +121,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( .NUM_OUTPUTS (NUM_BANKS), .DATAW (REQ_DATAW), .PERF_CTR_BITS (`PERF_CTR_BITS), - .ARBITER ("F"), + .ARBITER ("P"), .OUT_BUF (3) // output should be registered for the data_store addressing ) req_xbar ( .clk (clk), @@ -141,7 +141,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( .ready_out (per_bank_req_ready) ); - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_per_bank_req_data_soa assign { per_bank_req_rw[i], per_bank_req_addr[i], @@ -159,33 +159,32 @@ module VX_local_mem import VX_gpu_pkg::*; #( wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_rsp_tag; wire [NUM_BANKS-1:0] per_bank_rsp_ready; - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_data_store wire bank_rsp_valid, bank_rsp_ready; - wire [WORD_WIDTH-1:0] bank_rsp_data; - - `RESET_RELAY_EN (bram_reset, reset, (NUM_BANKS > 1)); VX_sp_ram #( .DATAW (WORD_WIDTH), .SIZE (WORDS_PER_BANK), .WRENW (WORD_SIZE), + .OUT_REG (1), + .READ_ENABLE (0), .NO_RWCHECK (1) ) data_store ( .clk (clk), - .reset (bram_reset), + .reset (reset), .read (per_bank_req_valid[i] && per_bank_req_ready[i] && ~per_bank_req_rw[i]), .write (per_bank_req_valid[i] && per_bank_req_ready[i] && per_bank_req_rw[i]), .wren (per_bank_req_byteen[i]), .addr (per_bank_req_addr[i]), .wdata (per_bank_req_data[i]), - .rdata (bank_rsp_data) + .rdata (per_bank_rsp_data[i]) ); // read-during-write hazard detection reg [BANK_ADDR_WIDTH-1:0] last_wr_addr; reg last_wr_valid; always @(posedge clk) begin - if (bram_reset) begin + if (reset) begin last_wr_valid <= 0; end else begin last_wr_valid <= per_bank_req_valid[i] && per_bank_req_ready[i] && per_bank_req_rw[i]; @@ -194,20 +193,20 @@ module VX_local_mem import VX_gpu_pkg::*; #( end wire is_rdw_hazard = last_wr_valid && ~per_bank_req_rw[i] && (per_bank_req_addr[i] == last_wr_addr); - // drop write response and stall on read-during-write hazard + // drop write response assign bank_rsp_valid = per_bank_req_valid[i] && ~per_bank_req_rw[i] && ~is_rdw_hazard; assign per_bank_req_ready[i] = (bank_rsp_ready || per_bank_req_rw[i]) && ~is_rdw_hazard; // register BRAM output VX_pipe_buffer #( - .DATAW (REQ_SEL_WIDTH + WORD_WIDTH + TAG_WIDTH) + .DATAW (REQ_SEL_WIDTH + TAG_WIDTH) ) bram_buf ( .clk (clk), - .reset (bram_reset), + .reset (reset), .valid_in (bank_rsp_valid), .ready_in (bank_rsp_ready), - .data_in ({per_bank_req_idx[i], bank_rsp_data, per_bank_req_tag[i]}), - .data_out ({per_bank_rsp_idx[i], per_bank_rsp_data[i], per_bank_rsp_tag[i]}), + .data_in ({per_bank_req_idx[i], per_bank_req_tag[i]}), + .data_out ({per_bank_rsp_idx[i], per_bank_rsp_tag[i]}), .valid_out (per_bank_rsp_valid[i]), .ready_out (per_bank_rsp_ready[i]) ); @@ -217,7 +216,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( wire [NUM_BANKS-1:0][RSP_DATAW-1:0] per_bank_rsp_data_aos; - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_per_bank_rsp_data_aos assign per_bank_rsp_data_aos[i] = {per_bank_rsp_data[i], per_bank_rsp_tag[i]}; end @@ -245,7 +244,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( `UNUSED_PIN (sel_out) ); - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_mem_bus_if assign mem_bus_if[i].rsp_valid = rsp_valid_out[i]; assign mem_bus_if[i].rsp_data = rsp_data_out[i]; assign rsp_ready_out[i] = mem_bus_if[i].rsp_ready; @@ -258,7 +257,7 @@ module VX_local_mem import VX_gpu_pkg::*; #( wire [`CLOG2(NUM_REQS+1)-1:0] perf_crsp_stall_per_cycle; wire [NUM_REQS-1:0] req_rw; - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_req_rw assign req_rw[i] = mem_bus_if[i].req_data.rw; end @@ -288,14 +287,14 @@ module VX_local_mem import VX_gpu_pkg::*; #( end end - assign cache_perf.reads = perf_reads; - assign cache_perf.writes = perf_writes; - assign cache_perf.read_misses = '0; - assign cache_perf.write_misses = '0; - assign cache_perf.bank_stalls = perf_collisions; - assign cache_perf.mshr_stalls = '0; - assign cache_perf.mem_stalls = '0; - assign cache_perf.crsp_stalls = perf_crsp_stalls; + assign lmem_perf.reads = perf_reads; + assign lmem_perf.writes = perf_writes; + assign lmem_perf.read_misses = '0; + assign lmem_perf.write_misses = '0; + assign lmem_perf.bank_stalls = perf_collisions; + assign lmem_perf.mshr_stalls = '0; + assign lmem_perf.mem_stalls = '0; + assign lmem_perf.crsp_stalls = perf_crsp_stalls; `endif @@ -304,11 +303,11 @@ module VX_local_mem import VX_gpu_pkg::*; #( wire [NUM_REQS-1:0][`UP(UUID_WIDTH)-1:0] req_uuid; wire [NUM_REQS-1:0][`UP(UUID_WIDTH)-1:0] rsp_uuid; - for (genvar i = 0; i < NUM_REQS; ++i) begin - if (UUID_WIDTH != 0) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_req_uuid + if (UUID_WIDTH != 0) begin : g_uuid assign req_uuid[i] = mem_bus_if[i].req_data.tag[TAG_WIDTH-1 -: UUID_WIDTH]; assign rsp_uuid[i] = mem_bus_if[i].rsp_data.tag[TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin + end else begin : g_no_uuid assign req_uuid[i] = 0; assign rsp_uuid[i] = 0; end @@ -317,48 +316,48 @@ module VX_local_mem import VX_gpu_pkg::*; #( wire [NUM_BANKS-1:0][`UP(UUID_WIDTH)-1:0] per_bank_req_uuid; wire [NUM_BANKS-1:0][`UP(UUID_WIDTH)-1:0] per_bank_rsp_uuid; - for (genvar i = 0; i < NUM_BANKS; ++i) begin - if (UUID_WIDTH != 0) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_per_bank_req_uuid + if (UUID_WIDTH != 0) begin : g_uuid assign per_bank_req_uuid[i] = per_bank_req_tag[i][TAG_WIDTH-1 -: UUID_WIDTH]; assign per_bank_rsp_uuid[i] = per_bank_rsp_tag[i][TAG_WIDTH-1 -: UUID_WIDTH]; - end else begin + end else begin : g_no_uuid assign per_bank_req_uuid[i] = 0; assign per_bank_rsp_uuid[i] = 0; end end - for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin : g_req_trace always @(posedge clk) begin if (mem_bus_if[i].req_valid && mem_bus_if[i].req_ready) begin if (mem_bus_if[i].req_data.rw) begin - `TRACE(1, ("%d: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=%h, data=0x%h (#%0d)\n", - $time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.data, req_uuid[i])); + `TRACE(1, ("%t: %s wr-req: req_idx=%0d, addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", + $time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, mem_bus_if[i].req_data.byteen, mem_bus_if[i].req_data.data, req_uuid[i])) end else begin - `TRACE(1, ("%d: %s rd-req: req_idx=%0d, addr=0x%0h, tag=0x%0h (#%0d)\n", - $time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, req_uuid[i])); + `TRACE(1, ("%t: %s rd-req: req_idx=%0d, addr=0x%0h, tag=0x%0h (#%0d)\n", + $time, INSTANCE_ID, i, mem_bus_if[i].req_data.addr, mem_bus_if[i].req_data.tag, req_uuid[i])) end end if (mem_bus_if[i].rsp_valid && mem_bus_if[i].rsp_ready) begin - `TRACE(1, ("%d: %s rd-rsp: req_idx=%0d, tag=0x%0h, data=0x%h (#%0d)\n", - $time, INSTANCE_ID, i, mem_bus_if[i].rsp_data.tag, mem_bus_if[i].rsp_data.data[i], rsp_uuid[i])); + `TRACE(1, ("%t: %s rd-rsp: req_idx=%0d, tag=0x%0h, data=0x%h (#%0d)\n", + $time, INSTANCE_ID, i, mem_bus_if[i].rsp_data.tag, mem_bus_if[i].rsp_data.data[i], rsp_uuid[i])) end end end - for (genvar i = 0; i < NUM_BANKS; ++i) begin + for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_bank_trace always @(posedge clk) begin if (per_bank_req_valid[i] && per_bank_req_ready[i]) begin if (per_bank_req_rw[i]) begin - `TRACE(2, ("%d: %s-bank%0d wr-req: addr=0x%0h, tag=0x%0h, byteen=%h, data=0x%h (#%0d)\n", - $time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_byteen[i], per_bank_req_data[i], per_bank_req_uuid[i])); + `TRACE(2, ("%t: %s-bank%0d wr-req: addr=0x%0h, tag=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", + $time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_byteen[i], per_bank_req_data[i], per_bank_req_uuid[i])) end else begin - `TRACE(2, ("%d: %s-bank%0d rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n", - $time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_uuid[i])); + `TRACE(2, ("%t: %s-bank%0d rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n", + $time, INSTANCE_ID, i, per_bank_req_addr[i], per_bank_req_tag[i], per_bank_req_uuid[i])) end end if (per_bank_rsp_valid[i] && per_bank_rsp_ready[i]) begin - `TRACE(2, ("%d: %s-bank%0d rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n", - $time, INSTANCE_ID, i, per_bank_rsp_tag[i], per_bank_rsp_data[i], per_bank_rsp_uuid[i])); + `TRACE(2, ("%t: %s-bank%0d rd-rsp: tag=0x%0h, data=0x%h (#%0d)\n", + $time, INSTANCE_ID, i, per_bank_rsp_tag[i], per_bank_rsp_data[i], per_bank_rsp_uuid[i])) end end end diff --git a/hw/rtl/mem/VX_local_mem_top.sv b/hw/rtl/mem/VX_local_mem_top.sv index e576d32ec..fda15cde2 100644 --- a/hw/rtl/mem/VX_local_mem_top.sv +++ b/hw/rtl/mem/VX_local_mem_top.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,15 +17,13 @@ module VX_local_mem_top import VX_gpu_pkg::*; #( parameter `STRING INSTANCE_ID = "", // Size of cache in bytes - parameter SIZE = (1024*16*8), - + parameter SIZE = (1024*16*8), + // Number of Word requests per cycle - parameter NUM_REQS = 4, + parameter NUM_REQS = 4, // Number of banks parameter NUM_BANKS = 4, - // Address width - parameter ADDR_WIDTH = `CLOG2(SIZE), // Size of a word in bytes parameter WORD_SIZE = `XLEN/8, @@ -33,8 +31,14 @@ module VX_local_mem_top import VX_gpu_pkg::*; #( parameter UUID_WIDTH = 0, // Request tag size - parameter TAG_WIDTH = 16 - ) ( + parameter TAG_WIDTH = 16, + + // Address width + parameter NUM_WORDS = SIZE / WORD_SIZE, + parameter WORDS_PER_BANK = NUM_WORDS / NUM_BANKS, + parameter BANK_ADDR_WIDTH = `CLOG2(WORDS_PER_BANK), + parameter ADDR_WIDTH = BANK_ADDR_WIDTH + `CLOG2(NUM_BANKS) + ) ( input wire clk, input wire reset, @@ -43,7 +47,7 @@ module VX_local_mem_top import VX_gpu_pkg::*; #( input wire [NUM_REQS-1:0] mem_req_rw, input wire [NUM_REQS-1:0][WORD_SIZE-1:0] mem_req_byteen, input wire [NUM_REQS-1:0][ADDR_WIDTH-1:0] mem_req_addr, - input wire [NUM_REQS-1:0][`ADDR_TYPE_WIDTH-1:0] mem_req_atype, + input wire [NUM_REQS-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] mem_req_flags, input wire [NUM_REQS-1:0][WORD_SIZE*8-1:0] mem_req_data, input wire [NUM_REQS-1:0][TAG_WIDTH-1:0] mem_req_tag, output wire [NUM_REQS-1:0] mem_req_ready, @@ -56,7 +60,8 @@ module VX_local_mem_top import VX_gpu_pkg::*; #( ); VX_mem_bus_if #( .DATA_SIZE (WORD_SIZE), - .TAG_WIDTH (TAG_WIDTH) + .TAG_WIDTH (TAG_WIDTH), + .ADDR_WIDTH(ADDR_WIDTH) ) mem_bus_if[NUM_REQS](); // memory request @@ -65,7 +70,7 @@ module VX_local_mem_top import VX_gpu_pkg::*; #( assign mem_bus_if[i].req_data.rw = mem_req_rw[i]; assign mem_bus_if[i].req_data.byteen = mem_req_byteen[i]; assign mem_bus_if[i].req_data.addr = mem_req_addr[i]; - assign mem_bus_if[i].req_data.atype = mem_req_atype[i]; + assign mem_bus_if[i].req_data.flags = mem_req_flags[i]; assign mem_bus_if[i].req_data.data = mem_req_data[i]; assign mem_bus_if[i].req_data.tag = mem_req_tag[i]; assign mem_req_ready[i] = mem_bus_if[i].req_ready; @@ -86,9 +91,10 @@ module VX_local_mem_top import VX_gpu_pkg::*; #( .NUM_BANKS (NUM_BANKS), .WORD_SIZE (WORD_SIZE), .ADDR_WIDTH (ADDR_WIDTH), - .UUID_WIDTH (UUID_WIDTH), - .TAG_WIDTH (TAG_WIDTH) - ) local_mem ( + .UUID_WIDTH (UUID_WIDTH), + .TAG_WIDTH (TAG_WIDTH), + .OUT_BUF (3) + ) local_mem ( .clk (clk), .reset (reset), .mem_bus_if (mem_bus_if) diff --git a/hw/rtl/core/VX_lsu_adapter.sv b/hw/rtl/mem/VX_lsu_adapter.sv similarity index 90% rename from hw/rtl/core/VX_lsu_adapter.sv rename to hw/rtl/mem/VX_lsu_adapter.sv index 21d43d280..eb5dd102a 100644 --- a/hw/rtl/core/VX_lsu_adapter.sv +++ b/hw/rtl/mem/VX_lsu_adapter.sv @@ -29,7 +29,7 @@ module VX_lsu_adapter import VX_gpu_pkg::*; #( VX_mem_bus_if.master mem_bus_if [NUM_LANES] ); localparam REQ_ADDR_WIDTH = `MEM_ADDR_WIDTH - `CLOG2(DATA_SIZE); - localparam REQ_DATA_WIDTH = 1 + DATA_SIZE + REQ_ADDR_WIDTH + `ADDR_TYPE_WIDTH + DATA_SIZE * 8; + localparam REQ_DATA_WIDTH = 1 + DATA_SIZE + REQ_ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + DATA_SIZE * 8; localparam RSP_DATA_WIDTH = DATA_SIZE * 8; // handle request unpacking @@ -41,29 +41,16 @@ module VX_lsu_adapter import VX_gpu_pkg::*; #( wire [NUM_LANES-1:0][TAG_WIDTH-1:0] req_tag_out; wire [NUM_LANES-1:0] req_ready_out; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_req_data_in assign req_data_in[i] = { lsu_mem_if.req_data.rw, - lsu_mem_if.req_data.byteen[i], lsu_mem_if.req_data.addr[i], - lsu_mem_if.req_data.atype[i], - lsu_mem_if.req_data.data[i] + lsu_mem_if.req_data.data[i], + lsu_mem_if.req_data.byteen[i], + lsu_mem_if.req_data.flags[i] }; end - for (genvar i = 0; i < NUM_LANES; ++i) begin - assign mem_bus_if[i].req_valid = req_valid_out[i]; - assign { - mem_bus_if[i].req_data.rw, - mem_bus_if[i].req_data.byteen, - mem_bus_if[i].req_data.addr, - mem_bus_if[i].req_data.atype, - mem_bus_if[i].req_data.data - } = req_data_out[i]; - assign mem_bus_if[i].req_data.tag = req_tag_out[i]; - assign req_ready_out[i] = mem_bus_if[i].req_ready; - end - VX_stream_unpack #( .NUM_REQS (NUM_LANES), .DATA_WIDTH (REQ_DATA_WIDTH), @@ -83,6 +70,19 @@ module VX_lsu_adapter import VX_gpu_pkg::*; #( .ready_out (req_ready_out) ); + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mem_bus_req + assign mem_bus_if[i].req_valid = req_valid_out[i]; + assign { + mem_bus_if[i].req_data.rw, + mem_bus_if[i].req_data.addr, + mem_bus_if[i].req_data.data, + mem_bus_if[i].req_data.byteen, + mem_bus_if[i].req_data.flags + } = req_data_out[i]; + assign mem_bus_if[i].req_data.tag = req_tag_out[i]; + assign req_ready_out[i] = mem_bus_if[i].req_ready; + end + // handle response packing wire [NUM_LANES-1:0] rsp_valid_out; @@ -90,7 +90,7 @@ module VX_lsu_adapter import VX_gpu_pkg::*; #( wire [NUM_LANES-1:0][TAG_WIDTH-1:0] rsp_tag_out; wire [NUM_LANES-1:0] rsp_ready_out; - for (genvar i = 0; i < NUM_LANES; ++i) begin + for (genvar i = 0; i < NUM_LANES; ++i) begin : g_mem_bus_rsp assign rsp_valid_out[i] = mem_bus_if[i].rsp_valid; assign rsp_data_out[i] = mem_bus_if[i].rsp_data.data; assign rsp_tag_out[i] = mem_bus_if[i].rsp_data.tag; diff --git a/hw/rtl/interfaces/VX_lsu_mem_if.sv b/hw/rtl/mem/VX_lsu_mem_if.sv similarity index 92% rename from hw/rtl/interfaces/VX_lsu_mem_if.sv rename to hw/rtl/mem/VX_lsu_mem_if.sv index 661071eb6..0789bcb13 100644 --- a/hw/rtl/interfaces/VX_lsu_mem_if.sv +++ b/hw/rtl/mem/VX_lsu_mem_if.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -16,19 +16,19 @@ interface VX_lsu_mem_if #( parameter NUM_LANES = 1, parameter DATA_SIZE = 1, - parameter ATYPE_WIDTH= `ADDR_TYPE_WIDTH, + parameter FLAGS_WIDTH= `MEM_REQ_FLAGS_WIDTH, parameter TAG_WIDTH = 1, parameter MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH, parameter ADDR_WIDTH = MEM_ADDR_WIDTH - `CLOG2(DATA_SIZE) ) (); typedef struct packed { - logic rw; + logic rw; logic [NUM_LANES-1:0] mask; - logic [NUM_LANES-1:0][DATA_SIZE-1:0] byteen; logic [NUM_LANES-1:0][ADDR_WIDTH-1:0] addr; - logic [NUM_LANES-1:0][ATYPE_WIDTH-1:0] atype; logic [NUM_LANES-1:0][DATA_SIZE*8-1:0] data; + logic [NUM_LANES-1:0][DATA_SIZE-1:0] byteen; + logic [NUM_LANES-1:0][FLAGS_WIDTH-1:0] flags; logic [TAG_WIDTH-1:0] tag; } req_data_t; diff --git a/hw/rtl/mem/VX_mem_arb.sv b/hw/rtl/mem/VX_mem_arb.sv index ef51e2387..321bbb270 100644 --- a/hw/rtl/mem/VX_mem_arb.sv +++ b/hw/rtl/mem/VX_mem_arb.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -13,14 +13,14 @@ `include "VX_define.vh" -module VX_mem_arb #( - parameter NUM_INPUTS = 1, +module VX_mem_arb #( + parameter NUM_INPUTS = 1, parameter NUM_OUTPUTS = 1, parameter DATA_SIZE = 1, - parameter MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH, + parameter MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH, parameter ADDR_WIDTH = (MEM_ADDR_WIDTH-`CLOG2(DATA_SIZE)), - parameter TAG_WIDTH = 1, - parameter TAG_SEL_IDX = 0, + parameter TAG_WIDTH = 1, + parameter TAG_SEL_IDX = 0, parameter REQ_OUT_BUF = 0, parameter RSP_OUT_BUF = 0, parameter `STRING ARBITER = "R" @@ -30,10 +30,10 @@ module VX_mem_arb #( VX_mem_bus_if.slave bus_in_if [NUM_INPUTS], VX_mem_bus_if.master bus_out_if [NUM_OUTPUTS] -); +); localparam DATA_WIDTH = (8 * DATA_SIZE); localparam LOG_NUM_REQS = `ARB_SEL_BITS(NUM_INPUTS, NUM_OUTPUTS); - localparam REQ_DATAW = TAG_WIDTH + ADDR_WIDTH + `ADDR_TYPE_WIDTH + 1 + DATA_SIZE + DATA_WIDTH; + localparam REQ_DATAW = TAG_WIDTH + ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + 1 + DATA_SIZE + DATA_WIDTH; localparam RSP_DATAW = TAG_WIDTH + DATA_WIDTH; `STATIC_ASSERT ((NUM_INPUTS >= NUM_OUTPUTS), ("invalid parameter")) @@ -47,20 +47,20 @@ module VX_mem_arb #( wire [NUM_OUTPUTS-1:0][`UP(LOG_NUM_REQS)-1:0] req_sel_out; wire [NUM_OUTPUTS-1:0] req_ready_out; - for (genvar i = 0; i < NUM_INPUTS; ++i) begin + for (genvar i = 0; i < NUM_INPUTS; ++i) begin : g_req_data_in assign req_valid_in[i] = bus_in_if[i].req_valid; assign req_data_in[i] = { bus_in_if[i].req_data.rw, bus_in_if[i].req_data.byteen, bus_in_if[i].req_data.addr, - bus_in_if[i].req_data.atype, + bus_in_if[i].req_data.flags, bus_in_if[i].req_data.data, bus_in_if[i].req_data.tag }; assign bus_in_if[i].req_ready = req_ready_in[i]; end - VX_stream_arb #( + VX_stream_arb #( .NUM_INPUTS (NUM_INPUTS), .NUM_OUTPUTS (NUM_OUTPUTS), .DATAW (REQ_DATAW), @@ -78,9 +78,9 @@ module VX_mem_arb #( .ready_out (req_ready_out) ); - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_bus_out_if wire [TAG_WIDTH-1:0] req_tag_out; - VX_bits_insert #( + VX_bits_insert #( .N (TAG_WIDTH), .S (LOG_NUM_REQS), .POS (TAG_SEL_IDX) @@ -94,8 +94,8 @@ module VX_mem_arb #( bus_out_if[i].req_data.rw, bus_out_if[i].req_data.byteen, bus_out_if[i].req_data.addr, - bus_out_if[i].req_data.atype, - bus_out_if[i].req_data.data, + bus_out_if[i].req_data.flags, + bus_out_if[i].req_data.data, req_tag_out } = req_data_out[i]; assign req_ready_out[i] = bus_out_if[i].req_ready; @@ -111,13 +111,13 @@ module VX_mem_arb #( wire [NUM_OUTPUTS-1:0][RSP_DATAW-1:0] rsp_data_in; wire [NUM_OUTPUTS-1:0] rsp_ready_in; - if (NUM_INPUTS > NUM_OUTPUTS) begin + if (NUM_INPUTS > NUM_OUTPUTS) begin : g_rsp_enabled wire [NUM_OUTPUTS-1:0][LOG_NUM_REQS-1:0] rsp_sel_in; - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_rsp_data_in wire [TAG_WIDTH-1:0] rsp_tag_out; - VX_bits_remove #( + VX_bits_remove #( .N (TAG_WIDTH + LOG_NUM_REQS), .S (LOG_NUM_REQS), .POS (TAG_SEL_IDX) @@ -130,12 +130,12 @@ module VX_mem_arb #( assign rsp_data_in[i] = {rsp_tag_out, bus_out_if[i].rsp_data.data}; assign bus_out_if[i].rsp_ready = rsp_ready_in[i]; - if (NUM_INPUTS > 1) begin + if (NUM_INPUTS > 1) begin : g_rsp_sel_in assign rsp_sel_in[i] = bus_out_if[i].rsp_data.tag[TAG_SEL_IDX +: LOG_NUM_REQS]; - end else begin + end else begin : g_no_rsp_sel_in assign rsp_sel_in[i] = '0; end - end + end VX_stream_switch #( .NUM_INPUTS (NUM_OUTPUTS), @@ -154,12 +154,12 @@ module VX_mem_arb #( .ready_out (rsp_ready_out) ); - end else begin - - for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin + end else begin : g_passthru + + for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_rsp_data_in assign rsp_valid_in[i] = bus_out_if[i].rsp_valid; assign rsp_data_in[i] = { - bus_out_if[i].rsp_data.tag, + bus_out_if[i].rsp_data.tag, bus_out_if[i].rsp_data.data }; assign bus_out_if[i].rsp_ready = rsp_ready_in[i]; @@ -184,11 +184,11 @@ module VX_mem_arb #( ); end - - for (genvar i = 0; i < NUM_INPUTS; ++i) begin + + for (genvar i = 0; i < NUM_INPUTS; ++i) begin : g_output assign bus_in_if[i].rsp_valid = rsp_valid_out[i]; assign { - bus_in_if[i].rsp_data.tag, + bus_in_if[i].rsp_data.tag, bus_in_if[i].rsp_data.data } = rsp_data_out[i]; assign rsp_ready_out[i] = bus_in_if[i].rsp_ready; diff --git a/hw/rtl/mem/VX_mem_bus_if.sv b/hw/rtl/mem/VX_mem_bus_if.sv index 1b7fca777..15f226690 100644 --- a/hw/rtl/mem/VX_mem_bus_if.sv +++ b/hw/rtl/mem/VX_mem_bus_if.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -15,7 +15,7 @@ interface VX_mem_bus_if #( parameter DATA_SIZE = 1, - parameter ATYPE_WIDTH= `ADDR_TYPE_WIDTH, + parameter FLAGS_WIDTH= `MEM_REQ_FLAGS_WIDTH, parameter TAG_WIDTH = 1, parameter MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH, parameter ADDR_WIDTH = MEM_ADDR_WIDTH - `CLOG2(DATA_SIZE) @@ -23,10 +23,10 @@ interface VX_mem_bus_if #( typedef struct packed { logic rw; - logic [DATA_SIZE-1:0] byteen; logic [ADDR_WIDTH-1:0] addr; - logic [ATYPE_WIDTH-1:0] atype; logic [DATA_SIZE*8-1:0] data; + logic [DATA_SIZE-1:0] byteen; + logic [FLAGS_WIDTH-1:0] flags; logic [TAG_WIDTH-1:0] tag; } req_data_t; diff --git a/hw/rtl/mem/VX_mem_switch.sv b/hw/rtl/mem/VX_mem_switch.sv index fd26c2aa8..21ec7278a 100644 --- a/hw/rtl/mem/VX_mem_switch.sv +++ b/hw/rtl/mem/VX_mem_switch.sv @@ -31,7 +31,7 @@ module VX_mem_switch import VX_gpu_pkg::*; #( VX_mem_bus_if.master bus_out_if [NUM_REQS] ); localparam DATA_WIDTH = (8 * DATA_SIZE); - localparam REQ_DATAW = TAG_WIDTH + ADDR_WIDTH + `ADDR_TYPE_WIDTH + 1 + DATA_SIZE + DATA_WIDTH; + localparam REQ_DATAW = TAG_WIDTH + ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + 1 + DATA_SIZE + DATA_WIDTH; localparam RSP_DATAW = TAG_WIDTH + DATA_WIDTH; // handle requests //////////////////////////////////////////////////////// diff --git a/hw/scripts/ip_gen.sh b/hw/scripts/altera_ip_gen.sh similarity index 100% rename from hw/scripts/ip_gen.sh rename to hw/scripts/altera_ip_gen.sh diff --git a/hw/scripts/bin2coe.py b/hw/scripts/bin2coe.py new file mode 100755 index 000000000..eaaa3619e --- /dev/null +++ b/hw/scripts/bin2coe.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 + +# Copyright © 2019-2023 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os + +def parse_binfile_option(option): + addr, path = option.split(':') + return int(addr, 0), path + +def parse_value_option(option): + addr, value = option.split(':') + return int(addr, 0), value + +def load_binary_data(addr, path, word_size, memory, little_endian): + with open(path, 'rb') as f: + binary_data = f.read() + + word_count = len(binary_data) // word_size + if len(binary_data) % word_size != 0: + word_count += 1 + + for i in range(word_count): + word_data = binary_data[i * word_size: (i + 1) * word_size] + if little_endian: + word_data = word_data[::-1] # Reverse the byte order for little-endian + hex_value = word_data.hex().zfill(word_size * 2) + memory[addr + i] = hex_value + +def add_value_data(addr, value, memory, word_size): + value = value.zfill(word_size * 2) + memory[addr] = value + +def binary_to_coe(output_file, word_size, depth, default_value, memory): + if depth == 0: + depth = max(memory.keys()) + 1 + + with open(output_file, 'w') as coe_file: + coe_file.write("; This file was generated from binary blobs and/or values\n") + coe_file.write("memory_initialization_radix=16;\n") + coe_file.write("memory_initialization_vector=\n") + + for addr in range(depth): + hex_value = memory.get(addr, default_value) + coe_file.write(f"{hex_value},\n") + + coe_file.seek(coe_file.tell() - 2) + coe_file.write(";\n") + +def main(): + parser = argparse.ArgumentParser(description="Convert binaries and values to a Xilinx COE file.") + parser.add_argument("--binfile", action='append', help="Binary file with starting address in the format :") + parser.add_argument("--value", action='append', help="Hex value with starting address in the format :") + parser.add_argument("--out", default="output.coe", help="Output file (optional).") + parser.add_argument("--wordsize", type=int, default=4, help="Word size in bytes (default 4).") + parser.add_argument("--depth", type=int, default=0, help="Address size (optional).") + parser.add_argument("--default", default="00", help="Default hex value as string (optional).") + parser.add_argument("--little_endian", action='store_true', help="Interpret binary files as little-endian (default is big-endian).") + + args = parser.parse_args() + + if args.binfile is None and args.value is None: + raise ValueError("At least one --binfile or --value must be provided.") + + # Initialize memory dictionary + memory = {} + + # Process binary files + if args.binfile: + for option in args.binfile: + addr, path = parse_binfile_option(option) + load_binary_data(addr, path, args.wordsize, memory, args.little_endian) + + # Process individual values + if args.value: + for option in args.value: + addr, value = parse_value_option(option) + add_value_data(addr, value, memory, args.wordsize) + + # Generate the COE file + binary_to_coe(args.out, args.wordsize, args.depth, args.default.zfill(args.wordsize * 2), memory) + +if __name__ == "__main__": + main() diff --git a/hw/scripts/gen_sources.sh b/hw/scripts/gen_sources.sh index 0748b3632..ed9143eb3 100755 --- a/hw/scripts/gen_sources.sh +++ b/hw/scripts/gen_sources.sh @@ -1,18 +1,20 @@ #!/bin/bash # Copyright © 2019-2023 -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" + defines=() includes=() externs=() @@ -21,40 +23,47 @@ output_file="" define_header="" top_module="" copy_folder="" -prepropressor=0 +preprocessor=0 defines_str="" params_str="" includes_str="" -script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +# Helper function to append options +add_option() { + if [ -n "$1" ]; then + echo "$1 $2" + else + echo "$2" + fi +} -# parse command arguments +# Parse command arguments while getopts D:G:T:I:J:O:H:C:Ph flag do case "${flag}" in D) defines+=( ${OPTARG} ) - defines_str+="-D${OPTARG} " + defines_str=$(add_option "$defines_str" "-D${OPTARG}") ;; - G) params_str+="-G${OPTARG} " + G) params_str=$(add_option "$params_str" "-G${OPTARG}") ;; - T) top_module=( ${OPTARG} ) + T) top_module="${OPTARG}" ;; I) includes+=( ${OPTARG} ) - includes_str+="-I${OPTARG} " + includes_str=$(add_option "$includes_str" "-I${OPTARG}") ;; J) externs+=( ${OPTARG} ) - includes_str+="-I${OPTARG} " + includes_str=$(add_option "$includes_str" "-I${OPTARG}") ;; - O) output_file=( ${OPTARG} ) + O) output_file="${OPTARG}" ;; - H) define_header=( ${OPTARG} ) + H) define_header="${OPTARG}" ;; - C) copy_folder=( ${OPTARG} ) + C) copy_folder="${OPTARG}" ;; - P) prepropressor=1 + P) preprocessor=1 ;; - h) echo "Usage: [-D] [-G=] [-T] [-I] [-J] [-O] [-C: copy to] [-H] [-P: macro prepropressing] [-h help]" + h) echo "Usage: [-D] [-G=] [-T] [-I] [-J] [-O] [-C: copy to] [-H] [-P: macro preprocessing] [-h help]" exit 0 ;; \?) echo "Invalid option: -$OPTARG" 1>&2 @@ -70,33 +79,32 @@ if [ "$define_header" != "" ]; then # dump defines into a header file for value in ${defines[@]}; do arrNV=(${value//=/ }) - if (( ${#arrNV[@]} > 1 )); - then + if (( ${#arrNV[@]} > 1 )); then echo "\`define ${arrNV[0]} ${arrNV[1]}" else echo "\`define $value" - fi + fi done - } > $define_header + } > "$define_header" fi if [ "$copy_folder" != "" ]; then - # copy source files - mkdir -p $copy_folder + # copy source files + mkdir -p "$copy_folder" for dir in ${includes[@]}; do find "$dir" -maxdepth 1 -type f | while read -r file; do file_ext="${file##*.}" - file_name=$(basename -- $file) - if [ $prepropressor != 0 ] && { [ "$file_ext" == "v" ] || [ "$file_ext" == "sv" ]; }; then + file_name=$(basename -- "$file") + if [ $preprocessor != 0 ] && { [ "$file_ext" == "v" ] || [ "$file_ext" == "sv" ]; }; then if [[ -n "$params_str" && $file_name == "$top_module."* ]]; then temp_file=$(mktemp) - $script_dir/repl_params.py $params_str -T$top_module $file > $temp_file - verilator $defines_str $includes_str -E -P $temp_file > $copy_folder/$file_name + $script_dir/repl_params.py $params_str -T$top_module "$file" > "$temp_file" + verilator $defines_str $includes_str -E -P "$temp_file" > "$copy_folder/$file_name" else - verilator $defines_str $includes_str -E -P $file > $copy_folder/$file_name - fi + verilator $defines_str $includes_str -E -P "$file" > "$copy_folder/$file_name" + fi else - cp $file $copy_folder + cp "$file" "$copy_folder" fi done done @@ -112,7 +120,7 @@ if [ "$output_file" != "" ]; then fi for dir in ${externs[@]}; do - echo "+incdir+$(realpath $dir)" + echo "+incdir+$(realpath "$dir")" done for dir in ${externs[@]}; do @@ -124,24 +132,24 @@ if [ "$output_file" != "" ]; then if [ "$copy_folder" != "" ]; then # dump include directories - echo "+incdir+$(realpath $copy_folder)" + echo "+incdir+$(realpath "$copy_folder")" # dump source files - find "$(realpath $copy_folder)" -maxdepth 1 -type f -name "*_pkg.sv" -print - find "$(realpath $copy_folder)" -maxdepth 1 -type f \( -name "*.v" -o -name "*.sv" \) ! -name "*_pkg.sv" -print + find "$(realpath "$copy_folder")" -maxdepth 1 -type f -name "*_pkg.sv" -print + find "$(realpath "$copy_folder")" -maxdepth 1 -type f \( -name "*.v" -o -name "*.sv" \) ! -name "*_pkg.sv" -print else # dump include directories for dir in ${includes[@]}; do - echo "+incdir+$(realpath $dir)" + echo "+incdir+$(realpath "$dir")" done - + # dump source files for dir in ${includes[@]}; do - find "$(realpath $dir)" -maxdepth 1 -type f -name "*_pkg.sv" -print + find "$(realpath "$dir")" -maxdepth 1 -type f -name "*_pkg.sv" -print done for dir in ${includes[@]}; do - find "$(realpath $dir)" -maxdepth 1 -type f \( -name "*.v" -o -name "*.sv" \) ! -name "*_pkg.sv" -print + find "$(realpath "$dir")" -maxdepth 1 -type f \( -name "*.v" -o -name "*.sv" \) ! -name "*_pkg.sv" -print done fi - } > $output_file -fi + } > "$output_file" +fi \ No newline at end of file diff --git a/hw/scripts/ila_insert.tcl b/hw/scripts/ila_insert.tcl new file mode 100644 index 000000000..de9f0eec0 --- /dev/null +++ b/hw/scripts/ila_insert.tcl @@ -0,0 +1,231 @@ +###################################################################### +# Automatically inserts ILA instances in a batch flow, and calls "implement_debug_core". Can also be used in a GUI flow +# This should ONLY be invoked after synthesis, and before opt_design. If opt_design is called first, marked nets may be missing and not found +# Warning: Currently will skip a net if it has no obvious clock domain on the driver. Nets connected to input buffers will be dropped unless "mark_debug_clock" is attached to the net. +# Nets attached to VIO cores have the "mark_debug" attribute, and will be filtered out unless the "mark_debug_valid" attribute is attached. +# Supports the following additional attributes beyond "mark_debug" +# attribute mark_debug_valid of X : signal is "true"; -- Marks a net for ILA capture, even if net is also attached to a VIO core +# attribute mark_debug_clock of X : signal is "inst1_bufg/clock"; -- Specifies clock net to use for capturing this net. May create a new ILA core for that clock domain +# attribute mark_debug_depth of X : signal is "4096"; -- overrides default depth for this ILA core. valid values: 1024, 2048, ... 132072. Last attribute that is scanned will win. +# attribute mark_debug_adv_trigger of X : signal is "true"; -- specifies that advanced trigger capability will be added to ILA core +# Engineer: J. McCluskey +proc insert_ila { depth } { + # sequence through debug nets and organize them by clock in the + # clock_list array. Also create max and min array for bus indices + set dbgs [get_nets -hierarchical -filter {MARK_DEBUG}] + if {[llength $dbgs] == 0} { + puts "No debug net found. No ILA cores created" + return + } + + # process list of nets to find and reject nets that are attached to VIO cores. + # This has a side effect that VIO nets can't be monitored with an ILA + # This can be overridden by using the attribute "mark_debug_valid" = "true" on a net like this. + set net_list {} + foreach net $dbgs { + if { [get_property -quiet MARK_DEBUG_VALID $net] != "true" } { + set pin_list [get_pins -of_objects [get_nets -segments $net]] + set not_vio_net 1 + foreach pin $pin_list { + if { [get_property IS_DEBUG_CORE [get_cells -of_object $pin]] == 1 } { + # It seems this net is attached to a debug core (i.e. VIO core) already, so we should skip adding it to the netlist + set not_vio_net 0 + break + } + } + if { $not_vio_net == 1 } { lappend net_list $net; } + } else { + lappend net_list $net + } + } + + # check again to see if we have any nets left now + if {[llength $net_list] == 0} { + puts "All nets with MARK_DEBUG are already connected to VIO cores. No ILA cores created" + return + } + + # Now that the netlist has been filtered, determine bus names and clock domains + foreach d $net_list { + # name is root name of a bus, index is the bit index in the bus + set name [regsub {\[[[:digit:]]+\]$} $d {}] + set index [regsub {^.*\[([[:digit:]]+)\]$} $d {\1}] + if {[string is integer -strict $index]} { + if {![info exists max($name)]} { + set max($name) $index + set min($name) $index + } elseif {$index > $max($name)} { + set max($name) $index + } elseif {$index < $min($name)} { + set min($name) $index + } + } else { + set max($name) -1 + } + # Now we search for the local clock net associated with the target net. + # There may be ambiguities or no answer in some cases + if {![info exists clocks($name)]} { + # does MARK_DEBUG_CLOCK decorate this net? If not, then search backwards to the driver cell + set clk_name [get_property -quiet MARK_DEBUG_CLOCK $d] + if { [llength $clk_name] == 0 } { + # trace to the clock net, tracing backwards via the driver pin. + set driver_pin [get_pins -filter {DIRECTION == "OUT" && IS_LEAF == TRUE } -of_objects [ get_nets -segments $d ]] + set driver_cell [get_cells -of_objects $driver_pin] + if { [get_property IS_SEQUENTIAL $driver_cell] == 1 } { + set timing_arc [get_timing_arcs -to $driver_pin] + set cell_clock_pin [get_pins -filter {IS_CLOCK} [get_property FROM_PIN $timing_arc]] + if { [llength $cell_clock_pin] > 1 } { + puts "Error: in insert_ila. Found more than 1 clock pin in driver cell $driver_cell with timing arc $timing_arc for net $d" + continue + } + } else { + # our driver cell is a LUT or LUTMEM in combinatorial mode, we need to trace further. + set paths [get_timing_paths -quiet -through $driver_pin ] + if { [llength $paths] > 0 } { + # note that here we arbitrarily select the start point of the FIRST timing path... there might be multiple clocks with timing paths for this net. + # use MARK_DEBUG_CLOCK to specify another clock in this case. + set cell_clock_pin [get_pins [get_property STARTPOINT_PIN [lindex $paths 0]]] + } else { + # Can't find any timing path, so skip the net, and warn the user. + puts "Critical Warning: from insert_ila.tcl Can't trace any clock domain on driver of net $d" + puts "Please attach the attribute MARK_DEBUG_CLOCK with a string containing the net name of the desired sampling clock, .i.e." + puts "attribute mark_debug_clock of $d : signal is \"inst_bufg/clk\";" + continue + } + } + # clk_net will usually be a list of net segments, which needs filtering to determine the net connected to the driver pin + set clk_net [get_nets -segments -of_objects $cell_clock_pin] + } else { + set clk_net [get_nets -segments $clk_name] + if { [llength $clk_net] == 0 } { puts "MARK_DEBUG_CLOCK attribute on net $d does not match any known net. Please fix."; continue; } + } + # trace forward to net actually connected to clock buffer output, not any of the lower level segment names + set clocks($name) [get_nets -of_objects [get_pins -filter {DIRECTION == "OUT" && IS_LEAF == TRUE } -of_objects $clk_net]] + if { [llength $clocks($name)] == 0 } { + puts "Critical Warning: from insert_ila.tcl Can't trace any clock domain on driver of net $d" + puts "Please attach the attribute MARK_DEBUG_CLOCK with a string containing the net name of the desired sampling clock, .i.e." + puts "attribute mark_debug_clock of $d : signal is \"inst_bufg/clk\";" + continue + } + if {![info exists clock_list($clocks($name))]} { + # found a new clock + puts "New clock found is $clocks($name)" + set clock_list($clocks($name)) [list $name] + set ila_depth($clocks($name)) $depth + set ila_adv_trigger($clocks($name)) false + } else { + lappend clock_list($clocks($name)) $name + } + # Does this net have a "MARK_DEBUG_DEPTH" attribute attached? + set clk_depth [get_property -quiet MARK_DEBUG_DEPTH $d] + if { [llength $clk_depth] != 0 } { + set ila_depth($clocks($name)) $clk_depth + } + # Does this net have a "MARK_DEBUG_ADV_TRIGGER" attribute attached? + set trigger [get_property -quiet MARK_DEBUG_ADV_TRIGGER $d] + if { $trigger == "true" } { + set ila_adv_trigger($clocks($name)) true + } + } + } + + set ila_count 0 + set trig_out "" + set trig_out_ack "" + + if { [llength [array names clock_list]] > 1 } { + set enable_trigger true + } else { + set enable_trigger false + } + + foreach c [array names clock_list] { + # Now build and connect an ILA core for each clock domain + [incr ila_count ] + set ila_inst "ila_$ila_count" + # first verify if depth is a member of the set, 1024, 2048, 4096, 8192, ... 131072 + if { $ila_depth($c) < 1024 || [expr $ila_depth($c) & ($ila_depth($c) - 1)] || $ila_depth($c) > 131072 } { + # Depth is not right... lets fix it, and continue + if { $ila_depth($c) < 1024 } { + set new_depth 1024 + } elseif { $ila_depth($c) > 131072 } { + set new_depth 131072 + } else { + # round value to next highest power of 2, (in log space) + set new_depth [expr 1 << int( log($ila_depth($c))/log(2) + .9999 )] + } + puts "Can't create ILA core $ila_inst with depth of $ila_depth($c)! Changed capture depth to $new_depth" + set ila_depth($c) $new_depth + } + # create ILA and connect its clock + puts "Creating ILA $ila_inst with clock $c, capture depth $ila_depth($c) and advanced trigger = $ila_adv_trigger($c)" + create_debug_core $ila_inst ila + if { $ila_adv_trigger($c) } { set mu_cnt 4; } else { set mu_cnt 2; } + set_property C_DATA_DEPTH $ila_depth($c) [get_debug_cores $ila_inst] + set_property C_TRIGIN_EN $enable_trigger [get_debug_cores $ila_inst] + set_property C_TRIGOUT_EN $enable_trigger [get_debug_cores $ila_inst] + set_property C_ADV_TRIGGER $ila_adv_trigger($c) [get_debug_cores $ila_inst] + set_property C_INPUT_PIPE_STAGES 1 [get_debug_cores $ila_inst] + set_property C_EN_STRG_QUAL true [get_debug_cores $ila_inst] + set_property ALL_PROBE_SAME_MU true [get_debug_cores $ila_inst] + set_property ALL_PROBE_SAME_MU_CNT $mu_cnt [get_debug_cores $ila_inst] + set_property port_width 1 [get_debug_ports $ila_inst/clk] + connect_debug_port $ila_inst/clk $c + # hookup trigger ports in a circle if more than one ILA is created + if { $enable_trigger == true } { + create_debug_port $ila_inst trig_in + create_debug_port $ila_inst trig_in_ack + create_debug_port $ila_inst trig_out + create_debug_port $ila_inst trig_out_ack + if { $trig_out != "" } { + connect_debug_port $ila_inst/trig_in [get_nets $trig_out] + } + if { $trig_out_ack != "" } { + connect_debug_port $ila_inst/trig_in_ack [get_nets $trig_out_ack] + } + set trig_out ${ila_inst}_trig_out_$ila_count + create_net $trig_out + connect_debug_port $ila_inst/trig_out [get_nets $trig_out] + set trig_out_ack ${ila_inst}_trig_out_ack_$ila_count + create_net $trig_out_ack + connect_debug_port $ila_inst/trig_out_ack [get_nets $trig_out_ack] + } + # add probes + set nprobes 0 + foreach n [lsort $clock_list($c)] { + set nets {} + if {$max($n) < 0} { + lappend nets [get_nets $n] + } else { + # n is a bus name + for {set i $min($n)} {$i <= $max($n)} {incr i} { + lappend nets [get_nets $n[$i]] + } + } + set prb probe$nprobes + if {$nprobes > 0} { + create_debug_port $ila_inst probe + } + set_property port_width [llength $nets] [get_debug_ports $ila_inst/$prb] + connect_debug_port $ila_inst/$prb $nets + incr nprobes + } + } + + # at this point, we need to complete the circular connection of trigger outputs and acks + if { $enable_trigger == true } { + connect_debug_port ila_1/trig_in [get_nets $trig_out] + connect_debug_port ila_1/trig_in_ack [get_nets $trig_out_ack] + } + set project_found [get_projects -quiet] + if { $project_found != "New Project" } { + puts "Saving constraints now in project [current_project -quiet]" + save_constraints_as debug_constraints.xdc + } + + # run ILA cores implementation + implement_debug_core + + # write out probe info file + write_debug_probes -force debug_nets.ltx +} \ No newline at end of file diff --git a/hw/scripts/scope.py b/hw/scripts/scope.py index 5361e8afe..f6d93961b 100755 --- a/hw/scripts/scope.py +++ b/hw/scripts/scope.py @@ -1,12 +1,12 @@ #!/usr/bin/env python3 # Copyright © 2019-2023 -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,9 +19,9 @@ import re import json -vl_int_re = re.compile(r"\d+'s*h([\da-fA-F]+)") +vl_int_re = re.compile(r"\d+'s*h([\da-fA-F]+)") -def parse_vl_int(text): +def parse_vl_int(text): str_hex = re.sub(vl_int_re, r'\1', text) return int(str_hex, 16) @@ -33,16 +33,18 @@ def source_loc(xml_doc, xml_loc): end_line = loc[3] end_col = loc[4] file = xml_doc.find(".//file/[@id='" + file_id + "']").get("filename") - return file + " (" + start_line + ":" + start_col + "-" + end_line + ":" + end_col + ")" - + return f"{file} ({start_line}:{start_col}-{end_line}:{end_col})" + def parse_dtype_width(xml_doc, dtype_id): xml_type = xml_doc.find(".//typetable/*[@id='" + dtype_id + "']") - if xml_type.tag == "packarraydtype" or xml_type.tag == "unpackarraydtype": + if xml_type.tag in ["packarraydtype", "unpackarraydtype"]: sub_dtype_id = xml_type.get("sub_dtype_id") base_width = parse_dtype_width(xml_doc, sub_dtype_id) - const = xml_type.iter("const") - left = parse_vl_int(next(const).get("name")) - right = parse_vl_int(next(const).get("name")) + const_iter = xml_type.iter("const") + first_const = next(const_iter) + second_const = next(const_iter) + left = parse_vl_int(first_const.get("name")) + right = parse_vl_int(second_const.get("name")) return base_width * (left - right + 1) elif xml_type.tag == "structdtype": width = 0 @@ -65,31 +67,77 @@ def parse_dtype_width(xml_doc, dtype_id): if left != None and right != None: return int(left) - int(right) + 1 return 1 - + def parse_var_name(xml_doc, xml_node): if xml_node.tag == "varref": return xml_node.get("name") elif xml_node.tag == "varxref": name = xml_node.get("name") dotted = xml_node.get("dotted") - return dotted + '.' + name + return f"{dotted}.{name}" + elif xml_node.tag == "arraysel": + return parse_arraysel_name(xml_doc, xml_node) else: - raise ET.ParseError("invalid probe entry" + source_loc(xml_doc, xml_node.get("loc"))) + raise ET.ParseError("invalid probe entry: tag=" + xml_node.tag + ", " + source_loc(xml_doc, xml_node.get("loc"))) return name +def parse_sel_field(xml_doc, dtype_id, offset, width): + xml_type = xml_doc.find(".//typetable/*[@id='" + dtype_id + "']") + name = xml_type.get("name") + if xml_type.tag == "structdtype": + bit_offset = 0 + members = list(xml_type.findall("memberdtype")) + members.reverse() + for member in members: + sub_dtype_id = member.get("sub_dtype_id") + member_name = member.get("name") + member_width = parse_dtype_width(xml_doc, sub_dtype_id) + if bit_offset <= offset < bit_offset + member_width: + if width != member_width and sub_dtype_id: + sub_field = parse_sel_field(xml_doc, sub_dtype_id, offset - bit_offset, width) + return f".{member_name}{sub_field}" + else: + return f".{member_name}" + bit_offset += member_width + raise ET.ParseError("invalid probe entry: " + source_loc(xml_doc, xml_type.get("loc"))) + elif xml_type.tag in ["packarraydtype", "unpackarraydtype"]: + sub_dtype_id = xml_type.get("sub_dtype_id") + base_width = parse_dtype_width(xml_doc, sub_dtype_id) + if width > base_width: + return "" + array_index = offset // base_width + sub_offset = offset % base_width + array_sel_name = f"_{array_index}" # array indexing is not supported in VCD + sub_field = parse_sel_field(xml_doc, sub_dtype_id, sub_offset, width) + return f"{array_sel_name}{sub_field}" + elif xml_type.tag == "basicdtype": + if width == 1: + return F"[{offset}]" + end = width - 1 + offset + return F"[{end}:{offset}]" + else: + raise ET.ParseError("invalid probe entry: tag=" + xml_type.tag + ", " + source_loc(xml_doc, xml_type.get("loc"))) + return None + def parse_sel_name(xml_doc, xml_node): - name = parse_var_name(xml_doc, xml_node.find("*")) - const = xml_node.iter("const") - offset = parse_vl_int(next(const).get("name")) - #size = parse_vl_int(next(const).get("name")) - return name + '_' + str(offset) + first_child = xml_node.find("*") + name = parse_var_name(xml_doc, first_child) + dtype_id = first_child.get("dtype_id") + const_iter = xml_node.iter("const") + first_const = next(const_iter) + second_const = next(const_iter) + offset = parse_vl_int(first_const.get("name")) + width = parse_vl_int(second_const.get("name")) + return name + parse_sel_field(xml_doc, dtype_id, offset, width) -def parse_array_name(xml_doc, xml_node): +def parse_arraysel_name(xml_doc, xml_node): if xml_node.tag == "arraysel": - name = parse_array_name(xml_doc, xml_node.find("*")) - xml_size = xml_node.find("const").get("name") - array_size = parse_vl_int(xml_size) - name = name + '_' + str(array_size) + first_child = xml_node.find("*") + name = parse_arraysel_name(xml_doc, first_child) + const_iter = xml_node.iter("const") + first_const = next(const_iter) + offset = parse_vl_int(first_const.get("name")) + name = f"{name}_{offset}" # array indexing is not supported in VCD else: name = parse_var_name(xml_doc, xml_node) return name @@ -97,9 +145,10 @@ def parse_array_name(xml_doc, xml_node): def parse_vl_port(xml_doc, xml_node, signals): total_width = 0 if xml_node.tag == "concat": - for xml_child in xml_node.findall("*"): + child_nodes = xml_node.findall("*") + for xml_child in child_nodes: total_width = total_width + parse_vl_port(xml_doc, xml_child, signals) - elif xml_node.tag == "varref" or xml_node.tag == "varxref": + elif xml_node.tag in ["varref", "varxref"]: name = parse_var_name(xml_doc, xml_node) dtype_id = xml_node.get("dtype_id") signal_width = parse_dtype_width(xml_doc, dtype_id) @@ -112,64 +161,84 @@ def parse_vl_port(xml_doc, xml_node, signals): signals.append([name, signal_width]) total_width = total_width + signal_width elif xml_node.tag == "arraysel": - name = parse_array_name(xml_doc, xml_node) + name = parse_arraysel_name(xml_doc, xml_node) dtype_id = xml_node.get("dtype_id") signal_width = parse_dtype_width(xml_doc, dtype_id) signals.append([name, signal_width]) total_width = total_width + signal_width else: - raise ET.ParseError("invalid probe entry: " + source_loc(xml_doc, xml_node.get("loc"))) + raise ET.ParseError("invalid probe entry: tag=" + xml_node.tag + ", " + source_loc(xml_doc, xml_node.get("loc"))) + # Check for duplicate signal names + signal_names = [signal[0] for signal in signals] + duplicates = set([name for name in signal_names if signal_names.count(name) > 1]) + if len(duplicates) > 0: + raise ET.ParseError("duplicate signal names: " + ", ".join(duplicates)) return total_width def parse_xml(filename, max_taps): xml_doc = ET.parse(filename) modules = {} xml_modules = xml_doc.findall(".//module/[@origName='VX_scope_tap']") - for xml_module in xml_modules: + for xml_module in xml_modules: scope_id = parse_vl_int(xml_module.find(".//var/[@name='SCOPE_ID']/const").get("name")) - triggerw = parse_vl_int(xml_module.find(".//var/[@name='TRIGGERW']/const").get("name")) + xtriggerw = parse_vl_int(xml_module.find(".//var/[@name='XTRIGGERW']/const").get("name")) + htriggerw = parse_vl_int(xml_module.find(".//var/[@name='HTRIGGERW']/const").get("name")) probew = parse_vl_int(xml_module.find(".//var/[@name='PROBEW']/const").get("name")) module_name = xml_module.get("name") - modules[module_name] = [scope_id, triggerw, probew] + modules[module_name] = [scope_id, xtriggerw, htriggerw, probew] taps = [] - xml_instances = xml_doc.iter("instance") - for xml_instance in xml_instances: + xml_instances = xml_doc.iter("instance") + for xml_instance in xml_instances: if (max_taps != -1 and len(taps) >= max_taps): - break + break defName = xml_instance.get("defName") module = modules.get(defName) if module is None: continue - triggers = [] - probes = [] - w = parse_vl_port(xml_doc, xml_instance.find(".//port/[@name='triggers']/*"), triggers) - if w != module[1]: - raise ET.ParseError("invalid triggers width: actual=" + str(w) + ", expected=" + str(module[1])) + + xtriggers = [] + htriggers = [] + probes = [] + + if module[1] > 0: + w = parse_vl_port(xml_doc, xml_instance.find(".//port/[@name='xtriggers']/*"), xtriggers) + if w != module[1]: + raise ET.ParseError("invalid xtriggers width: actual=" + str(w) + ", expected=" + str(module[1])) + + if module[2] > 0: + w = parse_vl_port(xml_doc, xml_instance.find(".//port/[@name='htriggers']/*"), htriggers) + if w != module[2]: + raise ET.ParseError("invalid htriggers width: actual=" + str(w) + ", expected=" + str(module[2])) + w = parse_vl_port(xml_doc, xml_instance.find(".//port/[@name='probes']/*"), probes) - if w != module[2]: - raise ET.ParseError("invalid probes width: actual=" + str(w) + ", expected=" + str(module[2])) + if w != module[3]: + raise ET.ParseError("invalid probes width: actual=" + str(w) + ", expected=" + str(module[3])) + signals = probes - for trigger in triggers: - signals.append(trigger) + for xtrigger in xtriggers: + signals.append(xtrigger) + for htrigger in htriggers: + signals.append(htrigger) + loc = xml_instance.get("loc") hier = xml_doc.find(".//cell/[@loc='" + loc + "']").get("hier") path = hier.rsplit(".", 1)[0] taps.append({"id":module[0], - "width":module[1] + module[2], - "signals":signals, + "width":module[1] + module[2] + module[3], + "signals":signals, "path":path}) return {"version":"0.1.0", "taps":taps} -def main(): +def main(): parser = argparse.ArgumentParser(description='Scope headers generator.') parser.add_argument('-o', nargs='?', default='scope.json', metavar='o', help='Output JSON manifest') parser.add_argument('-n', nargs='?', default=-1, metavar='n', type=int, help='Maximum number of taps to read') parser.add_argument('xml', help='Design XML descriptor file') args = parser.parse_args() #print("args=", args) - scope_taps = parse_xml(args.xml, args.n) + scope_taps = parse_xml(args.xml, args.n) with open(args.o, "w") as f: json.dump(scope_taps, f, ensure_ascii=False, indent=4) diff --git a/hw/syn/xilinx/xrt/scripts/gen_ip.tcl b/hw/scripts/xilinx_ip_gen.tcl similarity index 86% rename from hw/syn/xilinx/xrt/scripts/gen_ip.tcl rename to hw/scripts/xilinx_ip_gen.tcl index 5aae6db74..a1048fc77 100644 --- a/hw/syn/xilinx/xrt/scripts/gen_ip.tcl +++ b/hw/scripts/xilinx_ip_gen.tcl @@ -1,31 +1,36 @@ # Copyright © 2019-2023 -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -if { $::argc != 1 } { - puts "ERROR: Program \"$::argv0\" requires 1 arguments!\n" - puts "Usage: $::argv0 \n" +if { $::argc < 1 || $::argc > 2 } { + puts "ERROR: Program \"$::argv0\" requires 1 or 2 arguments!\n" + puts "Usage: $::argv0 []\n" exit } set ip_dir [lindex $::argv 0] +# create_ip requires that a project is open in memory. +if { $::argc == 2 } { + set device_part [lindex $::argv 1] + create_project -in_memory -part $device_part +} else { + # Create project without specifying a device part + create_project -in_memory +} + # IP folder does not exist. Create IP folder file mkdir ${ip_dir} -# create_ip requires that a project is open in memory. -# Create project but don't do anything with it -create_project -in_memory - create_ip -name floating_point -vendor xilinx.com -library ip -version 7.1 -module_name xil_fdiv -dir ${ip_dir} set_property -dict [list CONFIG.Component_Name {xil_fdiv} CONFIG.Operation_Type {Divide} CONFIG.Flow_Control {NonBlocking} CONFIG.Has_ACLKEN {true} CONFIG.C_Has_UNDERFLOW {true} CONFIG.C_Has_OVERFLOW {true} CONFIG.C_Has_INVALID_OP {true} CONFIG.C_Has_DIVIDE_BY_ZERO {true} CONFIG.A_Precision_Type {Single} CONFIG.C_A_Exponent_Width {8} CONFIG.C_A_Fraction_Width {24} CONFIG.Result_Precision_Type {Single} CONFIG.C_Result_Exponent_Width {8} CONFIG.C_Result_Fraction_Width {24} CONFIG.C_Mult_Usage {No_Usage} CONFIG.Has_RESULT_TREADY {false} CONFIG.C_Latency {28} CONFIG.C_Rate {1}] [get_ips xil_fdiv] diff --git a/hw/syn/altera/README b/hw/syn/altera/README index 11d048442..3f9168d5c 100644 --- a/hw/syn/altera/README +++ b/hw/syn/altera/README @@ -10,10 +10,10 @@ cd build_fpga && qsub-synth # check last 10 lines in build log for possible errors tail -n 10 ./build_arria10_fpga_1c/build.log -# Check if the job is submitted to the queue and running. Status should be R +# Check if the job is submitted to the queue and running. Status should be R qstat | grep -# Constantly monitoring the job submitted to the queue. Stop this using Ctrl+C +# Constantly monitoring the job submitted to the queue. Stop this using Ctrl+C watch ‘qstat | grep ’ # @@ -35,7 +35,7 @@ fpgaconf --bus 0xaf /synth/vortex_afu.gbs # get portid fpgainfo port -# Running the Test case +# Running the Test case cd /driver/tests/basic make run-fpga @@ -54,13 +54,9 @@ TARGET=asesim make -C runtime/opae PREFIX=build_base CONFIGS="-DEXT_F_DISABLE -DL1_DISABLE -DSM_DISABLE -DNUM_WARPS=2 -DNUM_THREADS=2" TARGET=asesim make # ASE test runs -./run_ase.sh build_base_arria10_asesim_1c ../../../../tests/regression/basic/basic -n1 -t0 -./run_ase.sh build_base_arria10_asesim_1c ../../../../tests/regression/basic/basic -n1 -t1 -./run_ase.sh build_base_arria10_asesim_1c ../../../../tests/regression/basic/basic -n16 -./run_ase.sh build_base_arria10_asesim_1c ../../../../tests/regression/demo/demo -n16 -./run_ase.sh build_base_arria10_asesim_1c ../../../../tests/regression/dogfood/dogfood -n16 -./run_ase.sh build_base_arria10_asesim_1c ../../../../tests/opencl/vecadd/vecadd -./run_ase.sh build_base_arria10_asesim_1c ../../../../tests/opencl/sgemm/sgemm -n4 +start_ase.sh +ASE_LOG=0 ASE_WORKDIR=/synth/work TARGET=asesim ./ci/blackbox.sh --driver=opae --app=vecadd +stop_ase.sh # modify "vsim_run.tcl" to dump VCD trace vcd file trace.vcd diff --git a/hw/syn/altera/quartus/Makefile b/hw/syn/altera/dut/Makefile similarity index 70% rename from hw/syn/altera/quartus/Makefile rename to hw/syn/altera/dut/Makefile index d0a2999bd..173408eca 100644 --- a/hw/syn/altera/quartus/Makefile +++ b/hw/syn/altera/dut/Makefile @@ -9,26 +9,26 @@ SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts IP_CACHE_DIR := $(ROOT_DIR)/hw/syn/altera/ip_cache/$(DEVICE_FAMILY) -.PHONY: dogfood unittest pipeline lmem cache fpu core issue vortex top test +.PHONY: unittest scope mem_unit lmem cache fpu core issue vortex top ip-gen: $(IP_CACHE_DIR)/ip_gen.log $(IP_CACHE_DIR)/ip_gen.log: - $(SCRIPT_DIR)/ip_gen.sh $(IP_CACHE_DIR) - -dogfood: - mkdir -p dogfood/$(BUILD_DIR) - cp dogfood/Makefile dogfood/$(BUILD_DIR) - $(MAKE) -C dogfood/$(BUILD_DIR) clean && $(MAKE) -C dogfood/$(BUILD_DIR) > dogfood/$(BUILD_DIR)/build.log 2>&1 & + $(SCRIPT_DIR)/altera_ip_gen.sh $(IP_CACHE_DIR) unittest: mkdir -p unittest/$(BUILD_DIR) cp unittest/Makefile unittest/$(BUILD_DIR) $(MAKE) -C unittest/$(BUILD_DIR) clean && $(MAKE) -C unittest/$(BUILD_DIR) > unittest/$(BUILD_DIR)/build.log 2>&1 & -pipeline: - mkdir -p pipeline/$(BUILD_DIR) - cp pipeline/Makefile pipeline/$(BUILD_DIR) - $(MAKE) -C pipeline/$(BUILD_DIR) clean && $(MAKE) -C pipeline/$(BUILD_DIR) > pipeline/$(BUILD_DIR)/build.log 2>&1 & +scope: + mkdir -p scope/$(BUILD_DIR) + cp scope/Makefile scope/$(BUILD_DIR) + $(MAKE) -C scope/$(BUILD_DIR) clean && $(MAKE) -C scope/$(BUILD_DIR) > scope/$(BUILD_DIR)/build.log 2>&1 & + +mem_unit: + mkdir -p mem_unit/$(BUILD_DIR) + cp mem_unit/Makefile mem_unit/$(BUILD_DIR) + $(MAKE) -C mem_unit/$(BUILD_DIR) clean && $(MAKE) -C mem_unit/$(BUILD_DIR) > mem_unit/$(BUILD_DIR)/build.log 2>&1 & lmem: mkdir -p lmem/$(BUILD_DIR) @@ -63,9 +63,4 @@ vortex: ip-gen top: ip-gen mkdir -p top/$(BUILD_DIR) cp top/Makefile top/$(BUILD_DIR) - $(MAKE) -C top/$(BUILD_DIR) clean && $(MAKE) -C top/$(BUILD_DIR) > top/$(BUILD_DIR)/build.log 2>&1 & - -test: ip-gen - mkdir -p test/$(BUILD_DIR) - cp test/Makefile test/$(BUILD_DIR) - $(MAKE) -C test/$(BUILD_DIR) clean && $(MAKE) -C test/$(BUILD_DIR) > test/$(BUILD_DIR)/build.log 2>&1 & + $(MAKE) -C top/$(BUILD_DIR) clean && $(MAKE) -C top/$(BUILD_DIR) > top/$(BUILD_DIR)/build.log 2>&1 & \ No newline at end of file diff --git a/hw/syn/altera/quartus/cache/Makefile b/hw/syn/altera/dut/cache/Makefile similarity index 100% rename from hw/syn/altera/quartus/cache/Makefile rename to hw/syn/altera/dut/cache/Makefile diff --git a/hw/syn/altera/quartus/common.mk b/hw/syn/altera/dut/common.mk similarity index 97% rename from hw/syn/altera/quartus/common.mk rename to hw/syn/altera/dut/common.mk index 3890dcfe8..1adcb3d49 100644 --- a/hw/syn/altera/quartus/common.mk +++ b/hw/syn/altera/dut/common.mk @@ -1,7 +1,7 @@ ROOT_DIR := $(realpath ../../../../../..) include $(ROOT_DIR)/config.mk -SRC_DIR := $(VORTEX_HOME)/hw/syn/altera/quartus +SRC_DIR := $(VORTEX_HOME)/hw/syn/altera/dut RTL_DIR := $(VORTEX_HOME)/hw/rtl AFU_DIR := $(RTL_DIR)/afu/opae @@ -21,7 +21,6 @@ endif CONFIGS += -DNDEBUG CONFIGS += -DQUARTUS CONFIGS += -DSYNTHESIS -CONFIGS += -DNOGLOBALS PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf diff --git a/hw/syn/altera/quartus/core/Makefile b/hw/syn/altera/dut/core/Makefile similarity index 66% rename from hw/syn/altera/quartus/core/Makefile rename to hw/syn/altera/dut/core/Makefile index eeeaa5233..c78c4a651 100644 --- a/hw/syn/altera/quartus/core/Makefile +++ b/hw/syn/altera/dut/core/Makefile @@ -9,6 +9,6 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(IP_CACHE_DIR) $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/altera/quartus/fpu/Makefile b/hw/syn/altera/dut/fpu/Makefile similarity index 58% rename from hw/syn/altera/quartus/fpu/Makefile rename to hw/syn/altera/dut/fpu/Makefile index b7826dc68..38d5c718c 100644 --- a/hw/syn/altera/quartus/fpu/Makefile +++ b/hw/syn/altera/dut/fpu/Makefile @@ -6,6 +6,6 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src endif RTL_INCLUDE = $(FPU_INCLUDE) -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(IP_CACHE_DIR) diff --git a/hw/syn/altera/quartus/issue/Makefile b/hw/syn/altera/dut/issue/Makefile similarity index 66% rename from hw/syn/altera/quartus/issue/Makefile rename to hw/syn/altera/dut/issue/Makefile index c1804a398..45f6981d6 100644 --- a/hw/syn/altera/quartus/issue/Makefile +++ b/hw/syn/altera/dut/issue/Makefile @@ -9,6 +9,6 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem $(FPU_INCLUDE) -I$(IP_CACHE_DIR) $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/altera/quartus/lmem/Makefile b/hw/syn/altera/dut/lmem/Makefile similarity index 100% rename from hw/syn/altera/quartus/lmem/Makefile rename to hw/syn/altera/dut/lmem/Makefile diff --git a/hw/syn/altera/dut/mem_unit/Makefile b/hw/syn/altera/dut/mem_unit/Makefile new file mode 100755 index 000000000..209492265 --- /dev/null +++ b/hw/syn/altera/dut/mem_unit/Makefile @@ -0,0 +1,7 @@ +PROJECT = VX_mem_unit_top +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv + +include ../../common.mk + +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/core -I$(RTL_DIR)/fpu diff --git a/hw/syn/altera/quartus/project.sdc b/hw/syn/altera/dut/project.sdc similarity index 100% rename from hw/syn/altera/quartus/project.sdc rename to hw/syn/altera/dut/project.sdc diff --git a/hw/syn/altera/quartus/project.tcl b/hw/syn/altera/dut/project.tcl similarity index 100% rename from hw/syn/altera/quartus/project.tcl rename to hw/syn/altera/dut/project.tcl diff --git a/hw/syn/altera/dut/scope/Makefile b/hw/syn/altera/dut/scope/Makefile new file mode 100755 index 000000000..405f05e8a --- /dev/null +++ b/hw/syn/altera/dut/scope/Makefile @@ -0,0 +1,7 @@ +PROJECT = VX_scope_tap +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv + +include ../../common.mk + +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs diff --git a/hw/syn/altera/quartus/timing-html.tcl b/hw/syn/altera/dut/timing-html.tcl similarity index 100% rename from hw/syn/altera/quartus/timing-html.tcl rename to hw/syn/altera/dut/timing-html.tcl diff --git a/hw/syn/altera/dut/top/Makefile b/hw/syn/altera/dut/top/Makefile new file mode 100644 index 000000000..2a273e698 --- /dev/null +++ b/hw/syn/altera/dut/top/Makefile @@ -0,0 +1,36 @@ +PROJECT = vortex_afu +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv + +include ../../common.mk + +# AFU parameters +CONFIGS += -DNOPAE +CONFIGS += -DPLATFORM_PROVIDES_LOCAL_MEMORY +ifeq (,$(findstring PLATFORM_MEMORY_BANKS,$(CONFIGS))) + CONFIGS += -DPLATFORM_MEMORY_BANKS=2 +endif +ifeq (,$(findstring PLATFORM_MEMORY_ADDR_WIDTH,$(CONFIGS))) + ifeq ($(XLEN),64) + CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=47 + else + CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=31 + endif +endif +ifeq (,$(findstring PLATFORM_MEMORY_DATA_WIDTH,$(CONFIGS))) + CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=512 +endif +ifeq (,$(findstring PLATFORM_MEMORY_BURST_CNT_WIDTH,$(CONFIGS))) + CONFIGS += -DPLATFORM_MEMORY_BURST_CNT_WIDTH=4 +endif + +#CONFIGS += -DNUM_CORES=2 +#CONFIGS += -DNUM_WARPS=32 +#CONFIGS += -DNUM_THREADS=32 +#CONFIGS += -DL2_ENABLE + +FPU_INCLUDE = -I$(RTL_DIR)/fpu +ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src +endif +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR) -I$(AFU_DIR)/ccip -I$(IP_CACHE_DIR) $(FPU_INCLUDE) diff --git a/hw/syn/altera/quartus/unittest/Makefile b/hw/syn/altera/dut/unittest/Makefile similarity index 62% rename from hw/syn/altera/quartus/unittest/Makefile rename to hw/syn/altera/dut/unittest/Makefile index 2bfb18e4e..c4479f154 100644 --- a/hw/syn/altera/quartus/unittest/Makefile +++ b/hw/syn/altera/dut/unittest/Makefile @@ -6,6 +6,6 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(IP_CACHE_DIR) $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/altera/quartus/vortex/Makefile b/hw/syn/altera/dut/vortex/Makefile similarity index 68% rename from hw/syn/altera/quartus/vortex/Makefile rename to hw/syn/altera/dut/vortex/Makefile index 7429df414..80c256021 100644 --- a/hw/syn/altera/quartus/vortex/Makefile +++ b/hw/syn/altera/dut/vortex/Makefile @@ -11,6 +11,6 @@ include ../../common.mk FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(IP_CACHE_DIR) $(FPU_INCLUDE) diff --git a/hw/syn/altera/opae/Makefile b/hw/syn/altera/opae/Makefile index 62a9bb72c..61935f2e4 100644 --- a/hw/syn/altera/opae/Makefile +++ b/hw/syn/altera/opae/Makefile @@ -36,7 +36,6 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_AFU DBG_SCOPE_FLAGS += -DDBG_SCOPE_ISSUE DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU -DBG_SCOPE_FLAGS += -DDBG_SCOPE_MSCHED ifeq ($(DEVICE_FAMILY), stratix10) CONFIGS += -DALTERA_S10 @@ -55,10 +54,13 @@ CONFIGS_32c := -DNUM_CLUSTERS=2 -DNUM_CORES=16 CONFIGS_64c := -DNUM_CLUSTERS=4 -DNUM_CORES=16 CONFIGS += $(CONFIGS_$(NUM_CORES)c) -# include paths +# include sources +RTL_PKGS = $(AFU_DIR)/local_mem_cfg_pkg.sv $(AFU_DIR)/ccip/ccip_if_pkg.sv +RTL_PKGS += $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src + RTL_PKGS += $(THIRD_PARTY_DIR)/cvfpu/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src endif RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR) -I$(IP_CACHE_DIR) RTL_INCLUDE += $(FPU_INCLUDE) @@ -96,13 +98,13 @@ ifdef PERF endif # ast dump flags -XML_CFLAGS = $(filter-out -DSYNTHESIS -DQUARTUS, $(CFLAGS)) -I$(AFU_DIR)/ccip -I$(DPI_DIR) -DNOPAE +XML_CFLAGS = $(filter-out -DSYNTHESIS -DQUARTUS, $(CFLAGS)) $(RTL_PKGS) -I$(AFU_DIR)/ccip -I$(DPI_DIR) -DPLATFORM_PROVIDES_LOCAL_MEMORY -DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32 -DPLATFORM_MEMORY_DATA_WIDTH=512 -DPLATFORM_MEMORY_BURST_CNT_WIDTH=4 -DNOPAE -DSV_DPI all: swconfig ip-gen setup build ip-gen: $(IP_CACHE_DIR)/ip-gen.log $(IP_CACHE_DIR)/ip-gen.log: - $(SCRIPT_DIR)/ip_gen.sh $(IP_CACHE_DIR) + $(SCRIPT_DIR)/altera_ip_gen.sh $(IP_CACHE_DIR) swconfig: vortex_afu.h vortex_afu.h: $(SRC_DIR)/vortex_afu.json diff --git a/hw/syn/altera/opae/run_ase.sh b/hw/syn/altera/opae/start_ase.sh similarity index 74% rename from hw/syn/altera/opae/run_ase.sh rename to hw/syn/altera/opae/start_ase.sh index 04fd27540..d408b2170 100755 --- a/hw/syn/altera/opae/run_ase.sh +++ b/hw/syn/altera/opae/start_ase.sh @@ -17,12 +17,6 @@ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" BUILD_DIR=$(realpath $1) -PROGRAM=$(basename "$2") -PROGRAM_DIR=`dirname $2` - -POCL_PATH=$TOOLDIR/pocl -VORTEX_RT_PATH=$SCRIPT_DIR/../../../../runtime - # Export ASE_WORKDIR variable export ASE_WORKDIR=$BUILD_DIR/synth/work @@ -35,7 +29,6 @@ rm -f $BUILD_DIR/synth/nohup.out pushd $BUILD_DIR/synth echo " [DBG] starting ASE simnulator (stdout saved to '$BUILD_DIR/synth/nohup.out')" setsid make sim &> /dev/null & -SIM_PID=$! popd # Wait for simulator readiness @@ -44,14 +37,3 @@ while [ ! -f $ASE_WORKDIR/.ase_ready.pid ] do sleep 1 done - -# run application -pushd $PROGRAM_DIR -shift 2 -echo " [DBG] running ./$PROGRAM $*" -ASE_LOG=0 LD_LIBRARY_PATH=$POCL_PATH/lib:$VORTEX_RT_PATH/opae:$LD_LIBRARY_PATH ./$PROGRAM $* -popd - -# stop the simulator (kill process group) -kill -- -$(ps -o pgid= $SIM_PID | grep -o '[0-9]*') -wait $SIM_PID 2> /dev/null \ No newline at end of file diff --git a/hw/syn/altera/opae/stop_ase.sh b/hw/syn/altera/opae/stop_ase.sh new file mode 100755 index 000000000..caee290db --- /dev/null +++ b/hw/syn/altera/opae/stop_ase.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +# Copyright © 2019-2023 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + +BUILD_DIR=$(realpath $1) + +# Export ASE_WORKDIR variable +export ASE_WORKDIR=$BUILD_DIR/synth/work + +# stop the simulator (kill process group) +if [ -f "$ASE_WORKDIR/.ase_ready.pid" ]; then + SIM_PID=$(grep '^pid' "$ASE_WORKDIR/.ase_ready.pid" | cut -d'=' -f2 | tr -d ' ') + echo " [DBG] stopping ASE simulator (pid=$SIM_PID)" + kill -- -$(ps -o pgid= $SIM_PID | grep -o '[0-9]*') + wait $SIM_PID 2> /dev/null +else + echo "ASE PID file does not exist." +fi \ No newline at end of file diff --git a/hw/syn/altera/power_play.sh b/hw/syn/altera/power_play.sh old mode 100644 new mode 100755 diff --git a/hw/syn/altera/quartus/test/Makefile b/hw/syn/altera/quartus/test/Makefile deleted file mode 100644 index 0c4a7ae4e..000000000 --- a/hw/syn/altera/quartus/test/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -PROJECT = Vortex -TOP_LEVEL_ENTITY = $(PROJECT) -SRC_FILE = $(PROJECT).sv - -include ../../common.mk - -FPU_INCLUDE = -I$(RTL_DIR)/fpu -ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src -endif -RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache -I$(IP_CACHE_DIR) $(FPU_INCLUDE) diff --git a/hw/syn/altera/quartus/top/Makefile b/hw/syn/altera/quartus/top/Makefile deleted file mode 100644 index 341690206..000000000 --- a/hw/syn/altera/quartus/top/Makefile +++ /dev/null @@ -1,32 +0,0 @@ -PROJECT = vortex_afu -TOP_LEVEL_ENTITY = $(PROJECT) -SRC_FILE = $(PROJECT).sv - -include ../../common.mk - -# AFU parameters -CONFIGS += -DNOPAE -CONFIGS += -DPLATFORM_PROVIDES_LOCAL_MEMORY -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BANKS,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=2 -endif -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=26 -endif -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=512 -endif -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH=4 -endif - -#CONFIGS += -DNUM_CORES=2 -#CONFIGS += -DNUM_WARPS=32 -#CONFIGS += -DNUM_THREADS=32 -#CONFIGS += -DL2_ENABLE - -FPU_INCLUDE = -I$(RTL_DIR)/fpu -ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src -endif -RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR) -I$(AFU_DIR)/ccip -I$(IP_CACHE_DIR) $(FPU_INCLUDE) diff --git a/hw/syn/xilinx/README b/hw/syn/xilinx/README index 563c4c17e..0fb83e71b 100644 --- a/hw/syn/xilinx/README +++ b/hw/syn/xilinx/README @@ -5,9 +5,12 @@ platforminfo -l xbutil validate --device 0000:09:00.1 --verbose # generate FPU IPs -vivado -mode batch -source scripts/gen_ip.tcl -tclargs ip/xilinx_u50_gen3x16_xdma_5_202210_1 +vivado -mode batch -source xilinx_ip_gen.tcl -tclargs ip/xilinx_u50_gen3x16_xdma_5_202210_1 # build FPGA +PREFIX=build_base_1c NUM_CORES=1 TARGET=hw_emu PLATFORM=xilinx_u55c_gen3x16_xdma_3_202210_1 make > build_u55c_hw_emu_base_1c.log 2>&1 & +PREFIX=build_base_1c NUM_CORES=1 TARGET=hw PLATFORM=xilinx_u55c_gen3x16_xdma_3_202210_1 make > build_u55c_hw_base_1c.log 2>&1 & + PREFIX=build_base_1c NUM_CORES=1 TARGET=hw_emu PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 make > build_u50_hw_emu_base_1c.log 2>&1 & PREFIX=build_base_1c NUM_CORES=1 TARGET=hw PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 make > build_u50_hw_base_1c.log 2>&1 & @@ -25,14 +28,21 @@ PREFIX=build TARGET=hw_emu PLATFORM=xilinx_vck5000_gen3x16_xdma_1_202120_1 make # debug hw_emu using xsim xsim --gui xilinx_u50_gen3x16_xdma_5_202210_1-0-vortex_afu.wdb & -# debug hw using ILA +# h/w debugging using ILA +## (1) check for ILA support platforminfo --json="hardwarePlatform.extensions.chipscope_debug" xilinx_u50_gen3x16_xdma_5_202210_1 +## (2) chedk for XVC full path to get device id ls /dev/xfpga/xvc_pub* -ls /dev/xvc_pub* -debug_hw --xvc_pcie /dev/xfpga/xvc_pub.u2305.0 --hw_server -debug_hw --xvc_pcie /dev/xvc_pub.u0 --hw_server +## (3) start h/w server +debug_hw --xvc_pcie /dev/xfpga/xvc_pub. --hw_server +## (4) start application and pause +## (5) start vivado to connect to h/w server and select ILA probes debug_hw --vivado --host localhost --ltx_file ./build_xilinx_u50_gen3x16_xdma_5_202210_1_hw/_x/link/vivado/vpl/prj/prj.runs/impl_1/debug_nets.ltx & -make chipscope TARGET=hw PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 +## (6) resume application + +# supported ILA Makefie targets +TARGET=hw PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 make hw_server +TARGET=hw PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 make chipscope # analyze build report vitis_analyzer build_xilinx_u50_gen3x16_xdma_5_202210_1_hw_4c/bin/vortex_afu.xclbin.link_summary diff --git a/hw/syn/xilinx/dut/Makefile b/hw/syn/xilinx/dut/Makefile new file mode 100644 index 000000000..fe37eb4b8 --- /dev/null +++ b/hw/syn/xilinx/dut/Makefile @@ -0,0 +1,58 @@ +ROOT_DIR := $(realpath ../../../..) +include $(ROOT_DIR)/config.mk + +PREFIX ?= build + +BUILD_DIR := $(PREFIX) + +.PHONY: unittest scope mem_unit lmem cache fpu core issue vortex top + +unittest: + mkdir -p unittest/$(BUILD_DIR) + cp unittest/Makefile unittest/$(BUILD_DIR) + $(MAKE) -C unittest/$(BUILD_DIR) clean && $(MAKE) -C unittest/$(BUILD_DIR) > unittest/$(BUILD_DIR)/build.log 2>&1 & + +scope: + mkdir -p scope/$(BUILD_DIR) + cp scope/Makefile scope/$(BUILD_DIR) + $(MAKE) -C scope/$(BUILD_DIR) clean && $(MAKE) -C scope/$(BUILD_DIR) > scope/$(BUILD_DIR)/build.log 2>&1 & + +mem_unit: + mkdir -p mem_unit/$(BUILD_DIR) + cp mem_unit/Makefile mem_unit/$(BUILD_DIR) + $(MAKE) -C mem_unit/$(BUILD_DIR) clean && $(MAKE) -C mem_unit/$(BUILD_DIR) > mem_unit/$(BUILD_DIR)/build.log 2>&1 & + +lmem: + mkdir -p lmem/$(BUILD_DIR) + cp lmem/Makefile lmem/$(BUILD_DIR) + $(MAKE) -C lmem/$(BUILD_DIR) clean && $(MAKE) -C lmem/$(BUILD_DIR) > lmem/$(BUILD_DIR)/build.log 2>&1 & + +cache: + mkdir -p cache/$(BUILD_DIR) + cp cache/Makefile cache/$(BUILD_DIR) + $(MAKE) -C cache/$(BUILD_DIR) clean && $(MAKE) -C cache/$(BUILD_DIR) > cache/$(BUILD_DIR)/build.log 2>&1 & + +fpu: + mkdir -p fpu/$(BUILD_DIR) + cp fpu/Makefile fpu/$(BUILD_DIR) + $(MAKE) -C fpu/$(BUILD_DIR) clean && $(MAKE) -C fpu/$(BUILD_DIR) > fpu/$(BUILD_DIR)/build.log 2>&1 & + +core: + mkdir -p core/$(BUILD_DIR) + cp core/Makefile core/$(BUILD_DIR) + $(MAKE) -C core/$(BUILD_DIR) clean && $(MAKE) -C core/$(BUILD_DIR) > core/$(BUILD_DIR)/build.log 2>&1 & + +issue: + mkdir -p issue/$(BUILD_DIR) + cp issue/Makefile issue/$(BUILD_DIR) + $(MAKE) -C issue/$(BUILD_DIR) clean && $(MAKE) -C issue/$(BUILD_DIR) > issue/$(BUILD_DIR)/build.log 2>&1 & + +vortex: + mkdir -p vortex/$(BUILD_DIR) + cp vortex/Makefile vortex/$(BUILD_DIR) + $(MAKE) -C vortex/$(BUILD_DIR) clean && $(MAKE) -C vortex/$(BUILD_DIR) > vortex/$(BUILD_DIR)/build.log 2>&1 & + +top: + mkdir -p top/$(BUILD_DIR) + cp top/Makefile top/$(BUILD_DIR) + $(MAKE) -C top/$(BUILD_DIR) clean && $(MAKE) -C top/$(BUILD_DIR) > top/$(BUILD_DIR)/build.log 2>&1 & \ No newline at end of file diff --git a/hw/syn/xilinx/dut/cache/Makefile b/hw/syn/xilinx/dut/cache/Makefile new file mode 100644 index 000000000..f96a76142 --- /dev/null +++ b/hw/syn/xilinx/dut/cache/Makefile @@ -0,0 +1,7 @@ +PROJECT = VX_cache_top +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv + +include ../../common.mk + +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache diff --git a/hw/syn/xilinx/dut/common.mk b/hw/syn/xilinx/dut/common.mk new file mode 100644 index 000000000..933621bef --- /dev/null +++ b/hw/syn/xilinx/dut/common.mk @@ -0,0 +1,46 @@ +ROOT_DIR := $(realpath ../../../../../..) +include $(ROOT_DIR)/config.mk + +DEVICE ?= xcu55c-fsvh2892-2L-e + +MAX_JOBS ?= 8 + +VIVADO := $(XILINX_VIVADO)/bin/vivado + +SRC_DIR := $(VORTEX_HOME)/hw/syn/xilinx/dut + +RTL_DIR := $(VORTEX_HOME)/hw/rtl +AFU_DIR := $(RTL_DIR)/afu/xrt +SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts + +NCPUS := $(shell lscpu | grep "^Core(s) per socket:" | awk '{print $$4}') +JOBS ?= $(shell echo $$(( $(NCPUS) > $(MAX_JOBS) ? $(MAX_JOBS) : $(NCPUS) ))) + +CONFIGS += -DNDEBUG +CONFIGS += -DVIVADO +CONFIGS += -DSYNTHESIS + +# Build targets +all: $(PROJECT).xpr + +gen-sources: project_1/sources.txt +project_1/sources.txt: + mkdir -p project_1 + $(SCRIPT_DIR)/gen_sources.sh $(CONFIGS) $(RTL_INCLUDE) -T$(TOP_LEVEL_ENTITY) -P -Cproject_1/src -Oproject_1/sources.txt + +build: $(PROJECT).xpr +$(PROJECT).xpr: project_1/sources.txt +ifdef FPU_IP + MAX_JOBS=$(JOBS) FPU_IP=project_1/ip $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc $(SCRIPT_DIR) +else + MAX_JOBS=$(JOBS) $(VIVADO) -mode batch -source $(SRC_DIR)/project.tcl -tclargs $(TOP_LEVEL_ENTITY) $(DEVICE) project_1/sources.txt $(SRC_DIR)/project.xdc $(SCRIPT_DIR) +endif + +clean: + rm -rf project_1 + rm -rf .Xil + rm -f *.rpt + rm -f vivado*.log + rm -f vivado*.jou + +.PHONY: all gen-sources build clean \ No newline at end of file diff --git a/hw/syn/xilinx/dut/core/Makefile b/hw/syn/xilinx/dut/core/Makefile new file mode 100644 index 000000000..2ce824a3f --- /dev/null +++ b/hw/syn/xilinx/dut/core/Makefile @@ -0,0 +1,15 @@ +PROJECT = VX_core_top +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv +FPU_IP = 1 + +include ../../common.mk + +#CONFIGS += -DNUM_WARPS=32 +#CONFIGS += -DNUM_THREADS=32 + +FPU_INCLUDE = -I$(RTL_DIR)/fpu +ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src +endif +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/xilinx/dut/fpu/Makefile b/hw/syn/xilinx/dut/fpu/Makefile new file mode 100644 index 000000000..c3d3fd99f --- /dev/null +++ b/hw/syn/xilinx/dut/fpu/Makefile @@ -0,0 +1,12 @@ +PROJECT = VX_fpu_dsp +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv +FPU_IP = 1 + +include ../../common.mk + +FPU_INCLUDE = -I$(RTL_DIR)/fpu +ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src +endif +RTL_INCLUDE = $(FPU_INCLUDE) -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces diff --git a/hw/syn/xilinx/dut/issue/Makefile b/hw/syn/xilinx/dut/issue/Makefile new file mode 100644 index 000000000..07e8f343d --- /dev/null +++ b/hw/syn/xilinx/dut/issue/Makefile @@ -0,0 +1,14 @@ +PROJECT = VX_issue_top +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv + +include ../../common.mk + +#CONFIGS += -DNUM_WARPS=32 +#CONFIGS += -DNUM_THREADS=32 + +FPU_INCLUDE = -I$(RTL_DIR)/fpu +ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src +endif +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem $(FPU_INCLUDE) $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/xilinx/dut/lmem/Makefile b/hw/syn/xilinx/dut/lmem/Makefile new file mode 100644 index 000000000..b3ba57c8d --- /dev/null +++ b/hw/syn/xilinx/dut/lmem/Makefile @@ -0,0 +1,7 @@ +PROJECT = VX_local_mem_top +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv + +include ../../common.mk + +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem diff --git a/hw/syn/xilinx/dut/mem_unit/Makefile b/hw/syn/xilinx/dut/mem_unit/Makefile new file mode 100644 index 000000000..209492265 --- /dev/null +++ b/hw/syn/xilinx/dut/mem_unit/Makefile @@ -0,0 +1,7 @@ +PROJECT = VX_mem_unit_top +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv + +include ../../common.mk + +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/core -I$(RTL_DIR)/fpu diff --git a/hw/syn/xilinx/dut/project.tcl b/hw/syn/xilinx/dut/project.tcl new file mode 100644 index 000000000..dcaf883fa --- /dev/null +++ b/hw/syn/xilinx/dut/project.tcl @@ -0,0 +1,128 @@ +# Copyright © 2019-2023 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Start time +set start_time [clock seconds] + +if { $::argc != 5 } { + puts "ERROR: Program \"$::argv0\" requires 5 arguments!\n" + puts "Usage: $::argv0 \n" + exit +} + +# Set the project name +set project_name "project_1" + +set top_module [lindex $::argv 0] +set device_part [lindex $::argv 1] +set vcs_file [lindex $::argv 2] +set xdc_file [lindex $::argv 3] +set tool_dir [lindex $::argv 4] + +puts "Using top_module=$top_module" +puts "Using device_part=$device_part" +puts "Using vcs_file=$vcs_file" +puts "Using xdc_file=$xdc_file" +puts "Using tool_dir=$tool_dir" + +# Set the number of jobs based on MAX_JOBS environment variable +if {[info exists ::env(MAX_JOBS)]} { + set num_jobs $::env(MAX_JOBS) + puts "using num_jobs=$num_jobs" +} else { + set num_jobs 0 +} + +# create fpu ip +if {[info exists ::env(FPU_IP)]} { + set ip_dir $::env(FPU_IP) + set argv [list $ip_dir $device_part] + set argc 2 + source ${tool_dir}/xilinx_ip_gen.tcl +} + +source "${tool_dir}/parse_vcs_list.tcl" +set vlist [parse_vcs_list "${vcs_file}"] + +set vsources_list [lindex $vlist 0] +set vincludes_list [lindex $vlist 1] +set vdefines_list [lindex $vlist 2] + +#puts $vsources_list +#puts $vincludes_list +#puts $vdefines_list + +# Create project +create_project $project_name $project_name -force -part $device_part + +# Add constrains file +read_xdc $xdc_file + +# Add the design sources +add_files -norecurse -verbose $vsources_list + +# process defines +set_property verilog_define ${vdefines_list} [current_fileset] + +# add fpu ip +if {[info exists ::env(FPU_IP)]} { + set ip_dir $::env(FPU_IP) + add_files -norecurse -verbose ${ip_dir}/xil_fma/xil_fma.xci + add_files -norecurse -verbose ${ip_dir}/xil_fdiv/xil_fdiv.xci + add_files -norecurse -verbose ${ip_dir}/xil_fsqrt/xil_fsqrt.xci +} + +update_compile_order -fileset sources_1 + +set_property top $top_module [current_fileset] +set_property \ + -name {STEPS.SYNTH_DESIGN.ARGS.MORE OPTIONS} \ + -value {-mode out_of_context -flatten_hierarchy "rebuilt"} \ + -objects [get_runs synth_1] + +# Synthesis +if {$num_jobs != 0} { + launch_runs synth_1 -jobs $num_jobs +} else { + launch_runs synth_1 +} +wait_on_run synth_1 +open_run synth_1 +write_checkpoint -force post_synth.dcp +report_utilization -file utilization.rpt -hierarchical -hierarchical_percentages + +# Implementation +if {$num_jobs != 0} { + launch_runs impl_1 -jobs $num_jobs +} else { + launch_runs impl_1 +} +wait_on_run impl_1 +open_run impl_1 +write_checkpoint -force post_impl.dcp + +# Generate the synthesis report +report_place_status -file place.rpt +report_route_status -file route.rpt +report_timing_summary -file timing.rpt +report_power -file power.rpt +report_drc -file drc.rpt + +# End time and calculation +set elapsed_time [expr {[clock seconds] - $start_time}] + +# Display elapsed time +set hours [format "%02d" [expr {$elapsed_time / 3600}]] +set minutes [format "%02d" [expr {($elapsed_time % 3600) / 60}]] +set seconds [format "%02d" [expr {$elapsed_time % 60}]] +puts "Total elapsed time: ${hours}h ${minutes}m ${seconds}s" \ No newline at end of file diff --git a/hw/syn/xilinx/dut/project.xdc b/hw/syn/xilinx/dut/project.xdc new file mode 100644 index 000000000..f786e7837 --- /dev/null +++ b/hw/syn/xilinx/dut/project.xdc @@ -0,0 +1,4 @@ +set CLK_FREQ_MHZ 300 +set clk_port_name clk +set clk_port [get_ports $clk_port_name] +create_clock -name core_clock -period [expr 1000.0 / $CLK_FREQ_MHZ] $clk_port \ No newline at end of file diff --git a/hw/syn/xilinx/dut/scope/Makefile b/hw/syn/xilinx/dut/scope/Makefile new file mode 100644 index 000000000..405f05e8a --- /dev/null +++ b/hw/syn/xilinx/dut/scope/Makefile @@ -0,0 +1,7 @@ +PROJECT = VX_scope_tap +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv + +include ../../common.mk + +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs diff --git a/hw/syn/xilinx/dut/top/Makefile b/hw/syn/xilinx/dut/top/Makefile new file mode 100644 index 000000000..c471b7807 --- /dev/null +++ b/hw/syn/xilinx/dut/top/Makefile @@ -0,0 +1,17 @@ +PROJECT = vortex_afu +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv +FPU_IP = 1 + +include ../../common.mk + +#CONFIGS += -DNUM_CORES=2 +#CONFIGS += -DNUM_WARPS=32 +#CONFIGS += -DNUM_THREADS=32 +#CONFIGS += -DL2_ENABLE + +FPU_INCLUDE = -I$(RTL_DIR)/fpu +ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src +endif +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR) -I$(AFU_DIR)/ccip $(FPU_INCLUDE) diff --git a/hw/syn/xilinx/dut/unittest/Makefile b/hw/syn/xilinx/dut/unittest/Makefile new file mode 100644 index 000000000..1bc66aa38 --- /dev/null +++ b/hw/syn/xilinx/dut/unittest/Makefile @@ -0,0 +1,11 @@ +PROJECT = Unittest +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv + +include ../../common.mk + +FPU_INCLUDE = -I$(RTL_DIR)/fpu +ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src +endif +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) \ No newline at end of file diff --git a/hw/syn/xilinx/dut/vortex/Makefile b/hw/syn/xilinx/dut/vortex/Makefile new file mode 100644 index 000000000..eb6d45a88 --- /dev/null +++ b/hw/syn/xilinx/dut/vortex/Makefile @@ -0,0 +1,17 @@ +PROJECT = Vortex +TOP_LEVEL_ENTITY = $(PROJECT) +SRC_FILE = $(PROJECT).sv +FPU_IP = 1 + +include ../../common.mk + +#CONFIGS += -DNUM_CORES=2 +#CONFIGS += -DNUM_WARPS=32 +#CONFIGS += -DNUM_THREADS=32 +#CONFIGS += -DL2_ENABLE + +FPU_INCLUDE = -I$(RTL_DIR)/fpu +ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src +endif +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) diff --git a/hw/syn/xilinx/xrt/kill_build.sh b/hw/syn/xilinx/kill_build.sh similarity index 100% rename from hw/syn/xilinx/xrt/kill_build.sh rename to hw/syn/xilinx/kill_build.sh diff --git a/hw/syn/xilinx/xrt/kill_hwserver.sh b/hw/syn/xilinx/kill_hwserver.sh similarity index 100% rename from hw/syn/xilinx/xrt/kill_hwserver.sh rename to hw/syn/xilinx/kill_hwserver.sh diff --git a/hw/syn/xilinx/xrt/kill_sim.sh b/hw/syn/xilinx/kill_sim.sh similarity index 100% rename from hw/syn/xilinx/xrt/kill_sim.sh rename to hw/syn/xilinx/kill_sim.sh diff --git a/hw/syn/xilinx/sandbox/Makefile b/hw/syn/xilinx/sandbox/Makefile new file mode 100644 index 000000000..e4def9c4e --- /dev/null +++ b/hw/syn/xilinx/sandbox/Makefile @@ -0,0 +1,71 @@ +ROOT_DIR := $(realpath ../../../..) +include $(ROOT_DIR)/config.mk + +DEVICE ?= xcu55c-fsvh2892-2L-e + +MAX_JOBS ?= 8 + +VIVADO := $(XILINX_VIVADO)/bin/vivado + +SRC_DIR := $(VORTEX_HOME)/hw/syn/xilinx/sandbox + +RTL_DIR := $(VORTEX_HOME)/hw/rtl +DPI_DIR := $(VORTEX_HOME)/hw/dpi +AFU_DIR := $(RTL_DIR)/afu/xrt +SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts + +KERNEL ?= fibonacci + +NCPUS := $(shell lscpu | grep "^Core(s) per socket:" | awk '{print $$4}') +JOBS ?= $(shell echo $$(( $(NCPUS) > $(MAX_JOBS) ? $(MAX_JOBS) : $(NCPUS) ))) + +# include paths +FPU_INCLUDE = -I$(RTL_DIR)/fpu +ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src +endif +TEX_INCLUDE = -I$(RTL_DIR)/tex +RASTER_INCLUDE = -I$(RTL_DIR)/raster +OM_INCLUDE = -I$(RTL_DIR)/om +RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache +RTL_INCLUDE += $(FPU_INCLUDE) $(TEX_INCLUDE) $(RASTER_INCLUDE) $(OM_INCLUDE) +RTL_INCLUDE += -I$(SRC_DIR) + +# compilation flags +CFLAGS += -DNDEBUG -DSYNTHESIS -DVIVADO +CFLAGS += $(CONFIGS) +CFLAGS += $(RTL_INCLUDE) +CFLAGS += -DEXT_F_DISABLE + +# update memory layout for 2MB RAM +CFLAGS += -DSTARTUP_ADDR=32\'h80000 +CFLAGS += -DSTACK_BASE_ADDR=32\'hFF000 + +all: build + +$(KERNEL).bin: + $(MAKE) -C $(ROOT_DIR)/kernel clean + STACK_BASE_ADDR=0xFF000 $(MAKE) -C $(ROOT_DIR)/kernel + $(MAKE) -C $(ROOT_DIR)/tests/kernel/$(KERNEL) clean + STARTUP_ADDR=0x8000 $(MAKE) -C $(ROOT_DIR)/tests/kernel/$(KERNEL) + cp $(ROOT_DIR)/tests/kernel/$(KERNEL)/$(KERNEL).bin $(KERNEL).bin + +kernel.bin.coe: $(KERNEL).bin + $(SCRIPT_DIR)/bin2coe.py --out=$@ --binfile=8192:$(KERNEL).bin --depth=16384 --wordsize=64 --little_endian + +gen-sources: project_1/sources.txt +project_1/sources.txt: + mkdir -p project_1 + $(SCRIPT_DIR)/gen_sources.sh $(CFLAGS) -P -Cproject_1/src -Oproject_1/sources.txt + +build: done.dcp +done.dcp: project_1/sources.txt kernel.bin.coe project.tcl + MAX_JOBS=$(JOBS) $(VIVADO) -mode batch -source project.tcl -tclargs $(DEVICE) project_1/sources.txt $(SCRIPT_DIR) + echo done > done.dcp + +run: project_1/project_1.xpr + $(VIVADO) project_1/project_1.xpr & + +clean: + rm -rf project_1 project1.tcl $(KERNEL).bin kernel.bin.coe + rm -rf .Xil *.log *.jou *.dcp *.rpt diff --git a/hw/syn/xilinx/sandbox/Vortex_top.v b/hw/syn/xilinx/sandbox/Vortex_top.v new file mode 100644 index 000000000..cd634b9b6 --- /dev/null +++ b/hw/syn/xilinx/sandbox/Vortex_top.v @@ -0,0 +1,122 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_define.vh" + +module Vortex_top #( + parameter C_M_AXI_GMEM_DATA_WIDTH = 512, + parameter C_M_AXI_GMEM_ADDR_WIDTH = `XLEN, + parameter C_M_AXI_GMEM_ID_WIDTH = 32, + parameter C_M_AXI_MEM_NUM_BANKS = 1 +) ( + input wire clk, + input wire reset, + + // AXI4 memory interface + output wire m_axi_mem_awvalid, + input wire m_axi_mem_awready, + output wire [C_M_AXI_GMEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr, + output wire [C_M_AXI_GMEM_ID_WIDTH - 1:0] m_axi_mem_awid, + output wire [7:0] m_axi_mem_awlen, + output wire [2:0] m_axi_mem_awsize, + output wire [1:0] m_axi_mem_awburst, + output wire [1:0] m_axi_mem_awlock, + output wire [3:0] m_axi_mem_awcache, + output wire [2:0] m_axi_mem_awprot, + output wire [3:0] m_axi_mem_awqos, + output wire m_axi_mem_wvalid, + input wire m_axi_mem_wready, + output wire [C_M_AXI_GMEM_DATA_WIDTH-1:0] m_axi_mem_wdata, + output wire [C_M_AXI_GMEM_DATA_WIDTH/8-1:0] m_axi_mem_wstrb, + output wire m_axi_mem_wlast, + output wire m_axi_mem_arvalid, + input wire m_axi_mem_arready, + output wire [C_M_AXI_GMEM_ADDR_WIDTH-1:0] m_axi_mem_araddr, + output wire [C_M_AXI_GMEM_ID_WIDTH-1:0] m_axi_mem_arid, + output wire [7:0] m_axi_mem_arlen, + output wire [2:0] m_axi_mem_arsize, + output wire [1:0] m_axi_mem_arburst, + output wire [1:0] m_axi_mem_arlock, + output wire [3:0] m_axi_mem_arcache, + output wire [2:0] m_axi_mem_arprot, + output wire [3:0] m_axi_mem_arqos, + input wire m_axi_mem_rvalid, + output wire m_axi_mem_rready, + input wire [C_M_AXI_GMEM_DATA_WIDTH - 1:0] m_axi_mem_rdata, + input wire m_axi_mem_rlast, + input wire [C_M_AXI_GMEM_ID_WIDTH - 1:0] m_axi_mem_rid, + input wire [1:0] m_axi_mem_rresp, + input wire m_axi_mem_bvalid, + output wire m_axi_mem_bready, + input wire [1:0] m_axi_mem_bresp, + input wire [C_M_AXI_GMEM_ID_WIDTH - 1:0] m_axi_mem_bid, + + input wire dcr_wr_valid, + input wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr, + input wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data, + + output wire busy +); + + Vortex_wrap #( + .C_M_AXI_GMEM_DATA_WIDTH(C_M_AXI_GMEM_DATA_WIDTH), + .C_M_AXI_GMEM_ADDR_WIDTH(C_M_AXI_GMEM_ADDR_WIDTH), + .C_M_AXI_GMEM_ID_WIDTH(C_M_AXI_GMEM_ID_WIDTH), + .C_M_AXI_MEM_NUM_BANKS(C_M_AXI_MEM_NUM_BANKS) + ) wrapper ( + .clk(clk), + .reset(reset), + .m_axi_mem_awvalid(m_axi_mem_awvalid), + .m_axi_mem_awready(m_axi_mem_awready), + .m_axi_mem_awaddr(m_axi_mem_awaddr), + .m_axi_mem_awid(m_axi_mem_awid), + .m_axi_mem_awlen(m_axi_mem_awlen), + .m_axi_mem_awsize(m_axi_mem_awsize), + .m_axi_mem_awburst(m_axi_mem_awburst), + .m_axi_mem_awlock(m_axi_mem_awlock), + .m_axi_mem_awcache(m_axi_mem_awcache), + .m_axi_mem_awprot(m_axi_mem_awprot), + .m_axi_mem_awqos(m_axi_mem_awqos), + .m_axi_mem_wvalid(m_axi_mem_wvalid), + .m_axi_mem_wready(m_axi_mem_wready), + .m_axi_mem_wdata(m_axi_mem_wdata), + .m_axi_mem_wstrb(m_axi_mem_wstrb), + .m_axi_mem_wlast(m_axi_mem_wlast), + .m_axi_mem_arvalid(m_axi_mem_arvalid), + .m_axi_mem_arready(m_axi_mem_arready), + .m_axi_mem_araddr(m_axi_mem_araddr), + .m_axi_mem_arid(m_axi_mem_arid), + .m_axi_mem_arlen(m_axi_mem_arlen), + .m_axi_mem_arsize(m_axi_mem_arsize), + .m_axi_mem_arburst(m_axi_mem_arburst), + .m_axi_mem_arlock(m_axi_mem_arlock), + .m_axi_mem_arcache(m_axi_mem_arcache), + .m_axi_mem_arprot(m_axi_mem_arprot), + .m_axi_mem_arqos(m_axi_mem_arqos), + .m_axi_mem_rvalid(m_axi_mem_rvalid), + .m_axi_mem_rready(m_axi_mem_rready), + .m_axi_mem_rdata(m_axi_mem_rdata), + .m_axi_mem_rlast(m_axi_mem_rlast), + .m_axi_mem_rid(m_axi_mem_rid), + .m_axi_mem_rresp(m_axi_mem_rresp), + .m_axi_mem_bvalid(m_axi_mem_bvalid), + .m_axi_mem_bready(m_axi_mem_bready), + .m_axi_mem_bresp(m_axi_mem_bresp), + .m_axi_mem_bid(m_axi_mem_bid), + .dcr_wr_valid(dcr_wr_valid), + .dcr_wr_addr(dcr_wr_addr), + .dcr_wr_data(dcr_wr_data), + .busy(busy) + ); + +endmodule diff --git a/hw/syn/xilinx/test/project_1_files/Vortex_top.v b/hw/syn/xilinx/sandbox/Vortex_wrap.sv similarity index 97% rename from hw/syn/xilinx/test/project_1_files/Vortex_top.v rename to hw/syn/xilinx/sandbox/Vortex_wrap.sv index a7adf71bc..5ec7a868e 100644 --- a/hw/syn/xilinx/test/project_1_files/Vortex_top.v +++ b/hw/syn/xilinx/sandbox/Vortex_wrap.sv @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -13,7 +13,7 @@ `include "VX_define.vh" -module Vortex_top #( +module Vortex_wrap #( parameter C_M_AXI_GMEM_DATA_WIDTH = 512, parameter C_M_AXI_GMEM_ADDR_WIDTH = `XLEN, parameter C_M_AXI_GMEM_ID_WIDTH = 32, @@ -22,7 +22,7 @@ module Vortex_top #( input wire clk, input wire reset, - // AXI4 memory interface + // AXI4 memory interface output wire m_axi_mem_awvalid, input wire m_axi_mem_awready, output wire [C_M_AXI_GMEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr, @@ -138,13 +138,13 @@ module Vortex_top #( assign m_axi_mem_rvalid_a[0] = m_axi_mem_rvalid; assign m_axi_mem_rready = m_axi_mem_rready_a[0]; - assign m_axi_mem_rdata_a[0] = m_axi_mem_rdata; + assign m_axi_mem_rdata_a[0] = m_axi_mem_rdata; assign m_axi_mem_rlast_a[0] = m_axi_mem_rlast; assign m_axi_mem_rid_a[0] = m_axi_mem_rid; assign m_axi_mem_rresp_a[0] = m_axi_mem_rresp; assign m_axi_mem_bvalid_a[0] = m_axi_mem_bvalid; - assign m_axi_mem_bready = m_axi_mem_bready_a[0]; + assign m_axi_mem_bready = m_axi_mem_bready_a[0]; assign m_axi_mem_bresp_a[0] = m_axi_mem_bresp; assign m_axi_mem_bid_a[0] = m_axi_mem_bid; @@ -177,7 +177,7 @@ module Vortex_top #( .m_axi_bvalid (m_axi_mem_bvalid_a), .m_axi_bready (m_axi_mem_bready_a), .m_axi_bid (m_axi_mem_bid_a), - .m_axi_bresp (m_axi_mem_bresp_a), + .m_axi_bresp (m_axi_mem_bresp_a), .m_axi_arvalid (m_axi_mem_arvalid_a), .m_axi_arready (m_axi_mem_arready_a), @@ -193,7 +193,7 @@ module Vortex_top #( .m_axi_rvalid (m_axi_mem_rvalid_a), .m_axi_rready (m_axi_mem_rready_a), - .m_axi_rdata (m_axi_mem_rdata_a), + .m_axi_rdata (m_axi_mem_rdata_a), .m_axi_rid (m_axi_mem_rid_a), .m_axi_rresp (m_axi_mem_rresp_a), .m_axi_rlast (m_axi_mem_rlast_a), @@ -204,5 +204,5 @@ module Vortex_top #( .busy (busy) ); - + endmodule diff --git a/hw/syn/xilinx/sandbox/project.tcl.in b/hw/syn/xilinx/sandbox/project.tcl.in new file mode 100644 index 000000000..8926b43ad --- /dev/null +++ b/hw/syn/xilinx/sandbox/project.tcl.in @@ -0,0 +1,475 @@ +# Copyright © 2019-2023 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if { $::argc != 3 } { + puts "ERROR: Program \"$::argv0\" requires 3 arguments!\n" + puts "Usage: $::argv0 \n" + exit +} + +set device_part [lindex $::argv 0] +set vcs_file [lindex $::argv 1] +set tool_dir [lindex $::argv 2] + +puts "Using device_part=$device_part" +puts "Using vcs_file=$vcs_file" +puts "Using tool_dir=$tool_dir" + +# Set the number of jobs based on MAX_JOBS environment variable +if {[info exists ::env(MAX_JOBS)]} { + set num_jobs $::env(MAX_JOBS) + puts "using num_jobs=$num_jobs" + #puts $num_jobs +} else { + set num_jobs 0 +} + +proc run_setup {} { + global device_part vcs_file tool_dir + + # Set the project name + set project_name "project_1" + + # Use project name variable, if specified in the tcl shell + if { [info exists ::user_project_name] } { + set project_name $::user_project_name + } + + source "${tool_dir}/parse_vcs_list.tcl" + set vlist [parse_vcs_list "${vcs_file}"] + + set vsources_list [lindex $vlist 0] + set vincludes_list [lindex $vlist 1] + set vdefines_list [lindex $vlist 2] + + #puts ${vsources_list} + #puts ${vincludes_list} + #puts ${vdefines_list} + + # Create project + create_project $project_name $project_name -force -part $device_part + + # Set the directory path for the new project + set proj_dir [get_property directory [current_project]] + + # Create 'sources_1' fileset (if not found) + if {[string equal [get_filesets -quiet sources_1] ""]} { + create_fileset -srcset sources_1 + } + + # add source files + set obj [get_filesets sources_1] + add_files -norecurse -verbose -fileset $obj ${vsources_list} + + # process defines + set obj [get_filesets sources_1] + foreach def $vdefines_list { + set_property -name "verilog_define" -value $def -objects $obj + } + + # Set 'sources_1' fileset properties + set obj [get_filesets sources_1] + set_property -name "name" -value "sources_1" -objects $obj + set_property -name "top" -value "design_1_wrapper" -objects $obj + + # Create 'constrs_1' fileset (if not found) + if {[string equal [get_filesets -quiet constrs_1] ""]} { + create_fileset -constrset constrs_1 + } + + # Set 'constrs_1' fileset object + set obj [get_filesets constrs_1] + + # Empty (no sources present) + + # Set 'constrs_1' fileset properties + set obj [get_filesets constrs_1] + set_property -name "constrs_type" -value "XDC" -objects $obj + set_property -name "name" -value "constrs_1" -objects $obj + set_property -name "target_constrs_file" -value "" -objects $obj + + # Create 'sim_1' fileset (if not found) + if {[string equal [get_filesets -quiet sim_1] ""]} { + create_fileset -simset sim_1 + } + + set testbench_file "" + foreach file ${vsources_list} { + if {[string match "*testbench.v" $file]} { + set testbench_file [file normalize $file] + break + } + } + + # Set 'sim_1' fileset object + set obj [get_filesets sim_1] + # Import local files from the original project + set files [list $testbench_file] + set imported_files [import_files -fileset sim_1 $files] + + # Set 'sim_1' fileset file properties for remote files + # None + + # Set 'sim_1' fileset file properties for local files +set file "testbench.v" +set file_obj [get_files -of_objects [get_filesets sim_1] [list "*$file"]] + set_property -name "file_type" -value "Verilog" -objects $file_obj + set_property -name "is_enabled" -value "1" -objects $file_obj + set_property -name "is_global_include" -value "0" -objects $file_obj + set_property -name "library" -value "xil_defaultlib" -objects $file_obj + set_property -name "path_mode" -value "RelativeFirst" -objects $file_obj + set_property -name "used_in" -value "synthesis implementation simulation" -objects $file_obj + set_property -name "used_in_implementation" -value "1" -objects $file_obj + set_property -name "used_in_simulation" -value "1" -objects $file_obj + set_property -name "used_in_synthesis" -value "1" -objects $file_obj + + # Set 'sim_1' fileset properties + set obj [get_filesets sim_1] + set_property -name "32bit" -value "0" -objects $obj + set_property -name "force_compile_glbl" -value "0" -objects $obj + set_property -name "generate_scripts_only" -value "0" -objects $obj + set_property -name "generic" -value "" -objects $obj + set_property -name "hbs.configure_design_for_hier_access" -value "1" -objects $obj + set_property -name "include_dirs" -value "" -objects $obj + set_property -name "incremental" -value "1" -objects $obj + set_property -name "name" -value "sim_1" -objects $obj + set_property -name "source_set" -value "sources_1" -objects $obj + set_property -name "systemc_include_dirs" -value "" -objects $obj + set_property -name "top" -value "testbench" -objects $obj + set_property -name "top_auto_set" -value "0" -objects $obj + set_property -name "top_lib" -value "xil_defaultlib" -objects $obj + set_property -name "verilog_define" -value "" -objects $obj + set_property -name "verilog_uppercase" -value "0" -objects $obj + + # Set 'utils_1' fileset object + set obj [get_filesets utils_1] + # Empty (no sources present) + + # Set 'utils_1' fileset properties + set obj [get_filesets utils_1] + set_property -name "name" -value "utils_1" -objects $obj + + # Proc to create BD design_1 + proc cr_bd_design_1 { parentCell } { + # The design that will be created by this Tcl proc contains the following + # module references: + # Vortex_top + + # CHANGE DESIGN NAME HERE + set design_name design_1 + + common::send_gid_msg -ssname BD::TCL -id 2010 -severity "INFO" "Currently there is no design <$design_name> in project, so creating one..." + + create_bd_design $design_name + + set bCheckIPsPassed 1 + ################################################################## + # CHECK IPs + ################################################################## + set bCheckIPs 1 + if { $bCheckIPs == 1 } { + set list_check_ips "\ + xilinx.com:ip:axi_bram_ctrl:4.1\ + xilinx.com:ip:blk_mem_gen:8.4\ + " + + set list_ips_missing "" + common::send_gid_msg -ssname BD::TCL -id 2011 -severity "INFO" "Checking if the following IPs exist in the project's IP catalog: $list_check_ips ." + + foreach ip_vlnv $list_check_ips { + set ip_obj [get_ipdefs -all $ip_vlnv] + if { $ip_obj eq "" } { + lappend list_ips_missing $ip_vlnv + } + } + + if { $list_ips_missing ne "" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2012 -severity "ERROR" "The following IPs are not found in the IP Catalog:\n $list_ips_missing\n\nResolution: Please add the repository containing the IP(s) to the project." } + set bCheckIPsPassed 0 + } + + } + + ################################################################## + # CHECK Modules + ################################################################## + set bCheckModules 1 + if { $bCheckModules == 1 } { + set list_check_mods "\ + Vortex_top\ + " + + set list_mods_missing "" + common::send_gid_msg -ssname BD::TCL -id 2020 -severity "INFO" "Checking if the following modules exist in the project's sources: $list_check_mods ." + + foreach mod_vlnv $list_check_mods { + if { [can_resolve_reference $mod_vlnv] == 0 } { + lappend list_mods_missing $mod_vlnv + } + } + + if { $list_mods_missing ne "" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2021 -severity "ERROR" "The following module(s) are not found in the project: $list_mods_missing" } + common::send_gid_msg -ssname BD::TCL -id 2022 -severity "INFO" "Please add source files for the missing module(s) above." + set bCheckIPsPassed 0 + } + } + + if { $bCheckIPsPassed != 1 } { + common::send_gid_msg -ssname BD::TCL -id 2023 -severity "WARNING" "Will not continue with creation of design due to the error(s) above." + return 3 + } + + variable script_folder + + if { $parentCell eq "" } { + set parentCell [get_bd_cells /] + } + + # Get object for parentCell + set parentObj [get_bd_cells $parentCell] + if { $parentObj == "" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2090 -severity "ERROR" "Unable to find parent cell <$parentCell>!"} + return + } + + # Make sure parentObj is hier blk + set parentType [get_property TYPE $parentObj] + if { $parentType ne "hier" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2091 -severity "ERROR" "Parent <$parentObj> has TYPE = <$parentType>. Expected to be ."} + return + } + + # Save current instance; Restore later + set oldCurInst [current_bd_instance .] + + # Set parent object as current + current_bd_instance $parentObj + + + # Create interface ports + + # Create ports + set clk_100MHz [ create_bd_port -dir I -type clk -freq_hz 100000000 clk_100MHz ] + set resetn [ create_bd_port -dir I -type rst resetn ] + set_property -dict [ list \ + CONFIG.POLARITY {ACTIVE_LOW} \ + ] $resetn + set vx_busy [ create_bd_port -dir O vx_busy ] + set vx_reset [ create_bd_port -dir I -type rst vx_reset ] + set_property -dict [ list \ + CONFIG.POLARITY {ACTIVE_HIGH} \ + ] $vx_reset + + set dcr_wr_valid [ create_bd_port -dir I dcr_wr_valid ] + set dcr_wr_addr [ create_bd_port -dir I -from 11 -to 0 dcr_wr_addr ] + set dcr_wr_data [ create_bd_port -dir I -from 31 -to 0 dcr_wr_data ] + + # Create instance: Vortex_top_0, and set properties + set block_name Vortex_top + set block_cell_name Vortex_top_0 + if { [catch {set Vortex_top_0 [create_bd_cell -type module -reference $block_name $block_cell_name] } errmsg] } { + catch {common::send_gid_msg -ssname BD::TCL -id 2095 -severity "ERROR" "Unable to add referenced block <$block_name>. Please add the files for ${block_name}'s definition into the project."} + return 1 + } elseif { $Vortex_top_0 eq "" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2096 -severity "ERROR" "Unable to referenced block <$block_name>. Please add the files for ${block_name}'s definition into the project."} + return 1 + } + + # Create instance: axi_bram_ctrl_0, and set properties + set axi_bram_ctrl_0 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_bram_ctrl:4.1 axi_bram_ctrl_0 ] + set_property -dict [ list \ + CONFIG.DATA_WIDTH {512} \ + CONFIG.ECC_TYPE {0} \ + ] $axi_bram_ctrl_0 + + # Create instance: axi_bram_ctrl_0_bram, and set properties + set axi_bram_ctrl_0_bram [ create_bd_cell -type ip -vlnv xilinx.com:ip:blk_mem_gen:8.4 axi_bram_ctrl_0_bram ] + + set_property -dict [ list \ + CONFIG.Assume_Synchronous_Clk {true} \ + CONFIG.Byte_Size {8} \ + CONFIG.Load_Init_File {true} \ + CONFIG.Coe_File {@CURRENTDIR@/hw/syn/xilinx/sandbox/kernel.bin.coe} \ + CONFIG.EN_SAFETY_CKT {true} \ + CONFIG.Enable_32bit_Address {true} \ + CONFIG.Fill_Remaining_Memory_Locations {false} \ + CONFIG.Memory_Type {Simple_Dual_Port_RAM} \ + CONFIG.Operating_Mode_A {NO_CHANGE} \ + CONFIG.Operating_Mode_B {READ_FIRST} \ + CONFIG.Port_B_Write_Rate {0} \ + CONFIG.Read_Width_A {512} \ + CONFIG.Read_Width_B {512} \ + CONFIG.Register_PortA_Output_of_Memory_Primitives {false} \ + CONFIG.Register_PortB_Output_of_Memory_Primitives {false} \ + CONFIG.Remaining_Memory_Locations {0} \ + CONFIG.Use_Byte_Write_Enable {true} \ + CONFIG.Use_RSTA_Pin {false} \ + CONFIG.Use_RSTB_Pin {true} \ + CONFIG.Write_Width_A {512} \ + CONFIG.Write_Depth_A {16384} \ + CONFIG.use_bram_block {Stand_Alone} \ + ] $axi_bram_ctrl_0_bram + + # Create interface connections + connect_bd_intf_net -intf_net Vortex_top_0_m_axi_mem [get_bd_intf_pins Vortex_top_0/m_axi_mem] [get_bd_intf_pins axi_bram_ctrl_0/S_AXI] + connect_bd_intf_net -intf_net axi_bram_ctrl_0_BRAM_PORTA [get_bd_intf_pins axi_bram_ctrl_0/BRAM_PORTA] [get_bd_intf_pins axi_bram_ctrl_0_bram/BRAM_PORTA] + connect_bd_intf_net -intf_net axi_bram_ctrl_0_BRAM_PORTB [get_bd_intf_pins axi_bram_ctrl_0/BRAM_PORTB] [get_bd_intf_pins axi_bram_ctrl_0_bram/BRAM_PORTB] + + # Create port connections + connect_bd_net -net Vortex_top_0_busy [get_bd_ports vx_busy] [get_bd_pins Vortex_top_0/busy] + connect_bd_net -net clk_wiz_clk_out1 [get_bd_ports clk_100MHz] [get_bd_pins Vortex_top_0/clk] [get_bd_pins axi_bram_ctrl_0/s_axi_aclk] + connect_bd_net -net resetn_1 [get_bd_ports resetn] [get_bd_pins axi_bram_ctrl_0/s_axi_aresetn] + connect_bd_net -net vx_reset_1 [get_bd_ports vx_reset] [get_bd_pins Vortex_top_0/reset] + connect_bd_net -net dcr_wr_valid_1 [get_bd_ports dcr_wr_valid] [get_bd_pins Vortex_top_0/dcr_wr_valid] + connect_bd_net -net dcr_wr_addr_1 [get_bd_ports dcr_wr_addr] [get_bd_pins Vortex_top_0/dcr_wr_addr] + connect_bd_net -net dcr_wr_data_1 [get_bd_ports dcr_wr_data] [get_bd_pins Vortex_top_0/dcr_wr_data] + + # Create address segments + assign_bd_address -offset 0x00000000 -range 0x00100000 -target_address_space [get_bd_addr_spaces Vortex_top_0/m_axi_mem] [get_bd_addr_segs axi_bram_ctrl_0/S_AXI/Mem0] -force + + # Perform GUI Layout + regenerate_bd_layout -layout_string { + "ActiveEmotionalView":"Default View", + "Default View_ScaleFactor":"1.0", + "Default View_TopLeft":"-195,-165", + "ExpandedHierarchyInLayout":"", + "guistr":"# # String gsaved with Nlview 7.0r4 2019-12-20 bk=1.5203 VDI=41 GEI=36 GUI=JA:10.0 TLS + # -string -flagsOSRD + preplace port clk_100MHz -pg 1 -lvl 0 -x 0 -y 40 -defaultsOSRD + preplace port resetn -pg 1 -lvl 0 -x 0 -y 20 -defaultsOSRD + preplace port vx_busy -pg 1 -lvl 4 -x 950 -y 220 -defaultsOSRD + preplace port vx_reset -pg 1 -lvl 0 -x 0 -y 110 -defaultsOSRD + preplace port dcr_wr_valid -pg 1 -lvl 0 -x 0 -y 130 -defaultsOSRD + preplace portBus dcr_wr_addr -pg 1 -lvl 0 -x 0 -y 150 -defaultsOSRD + preplace portBus dcr_wr_data -pg 1 -lvl 0 -x 0 -y 170 -defaultsOSRD + preplace inst Vortex_top_0 -pg 1 -lvl 1 -x 190 -y 130 -defaultsOSRD + preplace inst axi_bram_ctrl_0 -pg 1 -lvl 2 -x 520 -y 140 -defaultsOSRD + preplace inst axi_bram_ctrl_0_bram -pg 1 -lvl 3 -x 800 -y 140 -defaultsOSRD + preplace netloc Vortex_top_0_busy 1 1 3 360J 220 NJ 220 NJ + preplace netloc clk_wiz_clk_out1 1 0 2 20 30 370 + preplace netloc resetn_1 1 0 2 NJ 20 380J + preplace netloc vx_reset_1 1 0 1 NJ 110 + preplace netloc dcr_wr_valid_1 1 0 1 NJ 130 + preplace netloc dcr_wr_addr_1 1 0 1 NJ 150 + preplace netloc dcr_wr_data_1 1 0 1 NJ 170 + preplace netloc axi_bram_ctrl_0_BRAM_PORTB 1 2 1 N 150 + preplace netloc axi_bram_ctrl_0_BRAM_PORTA 1 2 1 N 130 + preplace netloc Vortex_top_0_m_axi_mem 1 1 1 N 120 + levelinfo -pg 1 0 190 520 800 950 + pagesize -pg 1 -db -bbox -sgen -180 0 1060 240 + " + } + + # Restore current instance + current_bd_instance $oldCurInst + + validate_bd_design + save_bd_design + close_bd_design $design_name + } + # End of cr_bd_design_1() + cr_bd_design_1 "" + set_property EXCLUDE_DEBUG_LOGIC "0" [get_files design_1.bd ] + set_property GENERATE_SYNTH_CHECKPOINT "1" [get_files design_1.bd ] + set_property IS_ENABLED "1" [get_files design_1.bd ] + set_property IS_GLOBAL_INCLUDE "0" [get_files design_1.bd ] + #set_property IS_LOCKED "0" [get_files design_1.bd ] + set_property LIBRARY "xil_defaultlib" [get_files design_1.bd ] + set_property PATH_MODE "RelativeFirst" [get_files design_1.bd ] + set_property PFM_NAME "" [get_files design_1.bd ] + set_property REGISTERED_WITH_MANAGER "1" [get_files design_1.bd ] + set_property SYNTH_CHECKPOINT_MODE "Hierarchical" [get_files design_1.bd ] + set_property USED_IN "synthesis implementation simulation" [get_files design_1.bd ] + set_property USED_IN_IMPLEMENTATION "1" [get_files design_1.bd ] + set_property USED_IN_SIMULATION "1" [get_files design_1.bd ] + set_property USED_IN_SYNTHESIS "1" [get_files design_1.bd ] + + # Call make_wrapper to create wrapper files + set wrapper_path [make_wrapper -fileset sources_1 -files [ get_files -norecurse design_1.bd] -top] + add_files -norecurse -fileset sources_1 $wrapper_path + + update_compile_order -fileset sources_1 +} + +proc run_synthesis {} { + global num_jobs + # Synthesis + if {$num_jobs != 0} { + launch_runs synth_1 -jobs $num_jobs + } else { + launch_runs synth_1 + } + wait_on_run synth_1 + open_run synth_1 + report_utilization -file utilization.rpt -hierarchical -hierarchical_percentages + + write_checkpoint -force post_synth.dcp +} + +proc run_implementation {} { + global tool_dir num_jobs + source "${tool_dir}/ila_insert.tcl" + insert_ila 8192 + + # Implementation + if {$num_jobs != 0} { + launch_runs impl_1 -jobs $num_jobs + } else { + launch_runs impl_1 + } + wait_on_run impl_1 + open_run impl_1 + report_place_status -file place.rpt + report_route_status -file route.rpt + write_checkpoint -force post_impl.dcp +} + +proc run_report {} { + # Generate reports + report_timing_summary -file timing.rpt + report_power -file power.rpt + report_drc -file drc.rpt +} + +############################################################################### + +# Start time +set start_time [clock seconds] + +# Check if the post-implementation checkpoint exists +if { [file exists post_impl.dcp] } { + puts "Resuming from post-implementation checkpoint: post_impl.dcp" + open_checkpoint post_impl.dcp + run_report +} elseif { [file exists post_synth.dcp] } { + puts "Resuming from post-synthesis checkpoint: post_synth.dcp" + open_checkpoint post_synth.dcp + run_implementation + run_report +} else { + # execute full pipeline + run_setup + run_synthesis + run_implementation + run_report +} + +# End time and calculation +set elapsed_time [expr {[clock seconds] - $start_time}] + +# Display elapsed time +set hours [format "%02d" [expr {$elapsed_time / 3600}]] +set minutes [format "%02d" [expr {($elapsed_time % 3600) / 60}]] +set seconds [format "%02d" [expr {$elapsed_time % 60}]] +puts "Total elapsed time: ${hours}h ${minutes}m ${seconds}s" \ No newline at end of file diff --git a/hw/syn/xilinx/test/project_1_files/testbench.v b/hw/syn/xilinx/sandbox/testbench.v similarity index 100% rename from hw/syn/xilinx/test/project_1_files/testbench.v rename to hw/syn/xilinx/sandbox/testbench.v diff --git a/hw/syn/xilinx/test/Makefile b/hw/syn/xilinx/test/Makefile deleted file mode 100644 index e15789516..000000000 --- a/hw/syn/xilinx/test/Makefile +++ /dev/null @@ -1,54 +0,0 @@ -ROOT_DIR := $(realpath ../../../..) -include $(ROOT_DIR)/config.mk - -VIVADO := $(XILINX_VIVADO)/bin/vivado - -SRC_DIR := $(VORTEX_HOME)/hw/syn/xilinx/test - -RTL_DIR := $(VORTEX_HOME)/hw/rtl -DPI_DIR := $(VORTEX_HOME)/hw/dpi -AFU_DIR := $(RTL_DIR)/afu/opae -SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts - -# include paths -FPU_INCLUDE = -I$(RTL_DIR)/fpu -ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src -endif -RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -RTL_INCLUDE += $(FPU_INCLUDE) -RTL_INCLUDE += -I$(SRC_DIR)/project_1_files - -# compilation flags -CFLAGS += -DNDEBUG -DSYNTHESIS -DVIVADO -CFLAGS += $(CONFIGS) -CFLAGS += $(RTL_INCLUDE) -CFLAGS += -DEXT_F_DISABLE -#CFLAGS += -DNUM_CORES 4 - -# update memory layout for 2MB RAM -CFLAGS += -DSTARTUP_ADDR=32\'h80000 -CFLAGS += -DIO_BASE_ADDR=32\'hFF000 - -COE_FILE := $(SRC_DIR)/project_1_files/kernel.bin.coe -ESCAPED_COE_FILE := $(shell echo "$(COE_FILE)" | sed -e 's/[\/&]/\\&/g') - -all: build - -gen-sources: project_1/sources.txt -project_1/sources.txt: - mkdir -p project_1 - $(SCRIPT_DIR)/gen_sources.sh $(CFLAGS) -P -Cproject_1/src -Oproject_1/sources.txt - -project.tcl: project.tcl.in - sed -e 's/%COE_FILE%/$(ESCAPED_COE_FILE)/g' < $< > $@ - -build: project_1/vortex.xpr -project_1/vortex.xpr: project_1/sources.txt project.tcl - $(VIVADO) -mode batch -source project.tcl -tclargs project_1/sources.txt project_1/src $(SCRIPT_DIR) - -run: project_1/vortex.xpr - $(VIVADO) project_1/vortex.xpr & - -clean: - rm -rf project_1 project.tcl diff --git a/hw/syn/xilinx/test/kernel/Makefile b/hw/syn/xilinx/test/kernel/Makefile deleted file mode 100644 index 515533689..000000000 --- a/hw/syn/xilinx/test/kernel/Makefile +++ /dev/null @@ -1,51 +0,0 @@ -ROOT_DIR := $(realpath ../../../../..) -include $(ROOT_DIR)/config.mk - -ifeq ($(XLEN),64) -CFLAGS += -march=rv64imafd -mabi=lp64d -else -CFLAGS += -march=rv32imaf -mabi=ilp32f -endif - -SRC_DIR := $(VORTEX_HOME)/hw/syn/xilinx/test/kernel - -SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts - -BIN2COE_PATH ?= $(SCRIPT_DIR)/bin2coe - -CC = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-gcc -AR = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-gcc-ar -DP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objdump -CP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objcopy - -CFLAGS += -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections -CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_HOME)/hw - -LDFLAGS += -lm -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=0x80000000 - -PROJECT = kernel - -SRCS = $(SRC_DIR)/main.c $(SRC_DIR)/start.S - -all: $(PROJECT).elf $(PROJECT).hex $(PROJECT).bin $(PROJECT).dump $(PROJECT).bin.coe - -$(PROJECT).dump: $(PROJECT).elf - $(DP) -D $< > $@ - -$(PROJECT).hex: $(PROJECT).elf - $(CP) -O ihex $< $@ - -$(PROJECT).bin: $(PROJECT).elf - $(CP) -O binary $< $@ - -$(PROJECT).bin.coe: $(PROJECT).bin - $(BIN2COE_PATH)/bin2coe $< --out=$@ --binary=$(PROJECT).bin --data=$(PROJECT).dat --binaddr=8192 --depth=16384 --wordsize=64 - -$(PROJECT).elf: $(SRCS) - $(CC) $(CFLAGS) $^ $(LDFLAGS) -o $@ - -.depend: $(SRCS) - $(CC) $(CFLAGS) -MM $^ > .depend; - -clean: - rm -rf *.bin *.elf *.hex *.dump *.coe .depend diff --git a/hw/syn/xilinx/test/kernel/kernel.dat b/hw/syn/xilinx/test/kernel/kernel.dat deleted file mode 100644 index 6e197b719..000000000 --- a/hw/syn/xilinx/test/kernel/kernel.dat +++ /dev/null @@ -1,3 +0,0 @@ -@1 -000000C00000008000000002, -00000003000000020000000100000000, \ No newline at end of file diff --git a/hw/syn/xilinx/test/project.tcl.in b/hw/syn/xilinx/test/project.tcl.in deleted file mode 100644 index a2692f637..000000000 --- a/hw/syn/xilinx/test/project.tcl.in +++ /dev/null @@ -1,2228 +0,0 @@ -if { $::argc != 3 } { - puts "ERROR: Program \"$::argv0\" requires 3 arguments!\n" - puts "Usage: $::argv0 \n" - exit -} - -set vcs_file [lindex $::argv 0] -set files_dir [lindex $::argv 1] -set tool_dir [lindex $::argv 2] - -#puts $vcs_file -#puts $files_dir -#puts $tool_dir - -set origin_dir [file normalize "."] - -# Use origin directory path location variable, if specified in the tcl shell -if { [info exists ::origin_dir_loc] } { - set origin_dir $::origin_dir_loc -} - -# Set the project name -set project_name "project_1" - -# Use project name variable, if specified in the tcl shell -if { [info exists ::user_project_name] } { - set project_name $::user_project_name -} - -source "${tool_dir}/parse_vcs_list.tcl" -set vlist [parse_vcs_list "${vcs_file}"] - -set vsources_list [lindex $vlist 0] -set vincludes_list [lindex $vlist 1] -set vdefines_list [lindex $vlist 2] - -#puts ${vsources_list} -#puts ${vincludes_list} -#puts ${vdefines_list} - -# Create project -create_project ${project_name} ./${project_name} -force -part xcu280-fsvh2892-2L-e - -# Set the directory path for the new project -set proj_dir [get_property directory [current_project]] - -# Set project properties -set obj [current_project] -set_property -name "board_part" -value "xilinx.com:au280:part0:1.1" -objects $obj -set_property -name "compxlib.activehdl_compiled_library_dir" -value "$proj_dir/${project_name}.cache/compile_simlib/activehdl" -objects $obj -set_property -name "compxlib.funcsim" -value "1" -objects $obj -set_property -name "compxlib.ies_compiled_library_dir" -value "$proj_dir/${project_name}.cache/compile_simlib/ies" -objects $obj -set_property -name "compxlib.modelsim_compiled_library_dir" -value "$proj_dir/${project_name}.cache/compile_simlib/modelsim" -objects $obj -set_property -name "compxlib.overwrite_libs" -value "0" -objects $obj -set_property -name "compxlib.questa_compiled_library_dir" -value "$proj_dir/${project_name}.cache/compile_simlib/questa" -objects $obj -set_property -name "compxlib.riviera_compiled_library_dir" -value "$proj_dir/${project_name}.cache/compile_simlib/riviera" -objects $obj -set_property -name "compxlib.timesim" -value "1" -objects $obj -set_property -name "compxlib.vcs_compiled_library_dir" -value "$proj_dir/${project_name}.cache/compile_simlib/vcs" -objects $obj -set_property -name "compxlib.xsim_compiled_library_dir" -value "" -objects $obj -set_property -name "corecontainer.enable" -value "0" -objects $obj -set_property -name "default_lib" -value "xil_defaultlib" -objects $obj -set_property -name "enable_optional_runs_sta" -value "0" -objects $obj -set_property -name "enable_vhdl_2008" -value "1" -objects $obj -set_property -name "generate_ip_upgrade_log" -value "1" -objects $obj -set_property -name "ip_cache_permissions" -value "read write" -objects $obj -set_property -name "ip_interface_inference_priority" -value "" -objects $obj -set_property -name "ip_output_repo" -value "$proj_dir/${project_name}.cache/ip" -objects $obj -set_property -name "legacy_ip_repo_paths" -value "" -objects $obj -set_property -name "mem.enable_memory_map_generation" -value "1" -objects $obj -set_property -name "platform.board_id" -value "au280" -objects $obj -set_property -name "platform.default_output_type" -value "undefined" -objects $obj -set_property -name "platform.design_intent.datacenter" -value "undefined" -objects $obj -set_property -name "platform.design_intent.embedded" -value "undefined" -objects $obj -set_property -name "platform.design_intent.external_host" -value "undefined" -objects $obj -set_property -name "platform.design_intent.server_managed" -value "undefined" -objects $obj -set_property -name "platform.rom.debug_type" -value "0" -objects $obj -set_property -name "platform.rom.prom_type" -value "0" -objects $obj -set_property -name "platform.slrconstraintmode" -value "0" -objects $obj -set_property -name "preferred_sim_model" -value "rtl" -objects $obj -set_property -name "project_type" -value "Default" -objects $obj -set_property -name "pr_flow" -value "0" -objects $obj -set_property -name "sim.central_dir" -value "$proj_dir/${project_name}.ip_user_files" -objects $obj -set_property -name "sim.ip.auto_export_scripts" -value "1" -objects $obj -set_property -name "sim.use_ip_compiled_libs" -value "1" -objects $obj -set_property -name "simulator.activehdl_gcc_install_dir" -value "" -objects $obj -set_property -name "simulator.activehdl_install_dir" -value "" -objects $obj -set_property -name "simulator.ies_gcc_install_dir" -value "" -objects $obj -set_property -name "simulator.ies_install_dir" -value "" -objects $obj -set_property -name "simulator.modelsim_gcc_install_dir" -value "" -objects $obj -set_property -name "simulator.modelsim_install_dir" -value "" -objects $obj -set_property -name "simulator.questa_gcc_install_dir" -value "" -objects $obj -set_property -name "simulator.riviera_gcc_install_dir" -value "" -objects $obj -set_property -name "simulator.riviera_install_dir" -value "" -objects $obj -set_property -name "simulator.vcs_gcc_install_dir" -value "" -objects $obj -set_property -name "simulator.vcs_install_dir" -value "" -objects $obj -set_property -name "simulator.xcelium_gcc_install_dir" -value "" -objects $obj -set_property -name "simulator.xcelium_install_dir" -value "" -objects $obj -set_property -name "simulator_language" -value "Verilog" -objects $obj -set_property -name "source_mgmt_mode" -value "All" -objects $obj -set_property -name "target_language" -value "Verilog" -objects $obj -set_property -name "target_simulator" -value "XSim" -objects $obj -set_property -name "tool_flow" -value "Vivado" -objects $obj -set_property -name "webtalk.activehdl_export_sim" -value "27" -objects $obj -set_property -name "webtalk.ies_export_sim" -value "27" -objects $obj -set_property -name "webtalk.modelsim_export_sim" -value "27" -objects $obj -set_property -name "webtalk.questa_export_sim" -value "27" -objects $obj -set_property -name "webtalk.riviera_export_sim" -value "27" -objects $obj -set_property -name "webtalk.vcs_export_sim" -value "27" -objects $obj -set_property -name "webtalk.xcelium_export_sim" -value "5" -objects $obj -set_property -name "webtalk.xsim_export_sim" -value "27" -objects $obj -set_property -name "webtalk.xsim_launch_sim" -value "91" -objects $obj -set_property -name "xpm_libraries" -value "XPM_CDC XPM_MEMORY" -objects $obj -set_property -name "xsim.array_display_limit" -value "1024" -objects $obj -set_property -name "xsim.radix" -value "hex" -objects $obj -set_property -name "xsim.time_unit" -value "ns" -objects $obj -set_property -name "xsim.trace_limit" -value "65536" -objects $obj - -# Create 'sources_1' fileset (if not found) -if {[string equal [get_filesets -quiet sources_1] ""]} { - create_fileset -srcset sources_1 -} - -# add source files -set obj [get_filesets sources_1] -add_files -norecurse -verbose -fileset $obj ${vsources_list} - -# process defines -set obj [get_filesets sources_1] -foreach def $vdefines_list { - set_property -name "verilog_define" -value $def -objects $obj -} - -# Set 'sources_1' fileset properties -set obj [get_filesets sources_1] -set_property -name "design_mode" -value "RTL" -objects $obj -set_property -name "edif_extra_search_paths" -value "" -objects $obj -set_property -name "elab_link_dcps" -value "1" -objects $obj -set_property -name "elab_load_timing_constraints" -value "1" -objects $obj -set_property -name "generic" -value "" -objects $obj -set_property -name "include_dirs" -value "" -objects $obj -set_property -name "lib_map_file" -value "" -objects $obj -set_property -name "loop_count" -value "1000" -objects $obj -set_property -name "name" -value "sources_1" -objects $obj -set_property -name "top" -value "design_1_wrapper" -objects $obj -set_property -name "top_auto_set" -value "0" -objects $obj -set_property -name "verilog_define" -value "" -objects $obj -set_property -name "verilog_uppercase" -value "1" -objects $obj -set_property -name "verilog_version" -value "verilog_2001" -objects $obj -set_property -name "vhdl_version" -value "vhdl_2k" -objects $obj - -# Create 'constrs_1' fileset (if not found) -if {[string equal [get_filesets -quiet constrs_1] ""]} { - create_fileset -constrset constrs_1 -} - -# Set 'constrs_1' fileset object -set obj [get_filesets constrs_1] - -# Empty (no sources present) - -# Set 'constrs_1' fileset properties -set obj [get_filesets constrs_1] -set_property -name "constrs_type" -value "XDC" -objects $obj -set_property -name "name" -value "constrs_1" -objects $obj -set_property -name "target_constrs_file" -value "" -objects $obj - -# Create 'sim_1' fileset (if not found) -if {[string equal [get_filesets -quiet sim_1] ""]} { - create_fileset -simset sim_1 -} - -# Set 'sim_1' fileset object -set obj [get_filesets sim_1] -# Import local files from the original project -set files [list \ - [file normalize "$files_dir/testbench.v" ]\ -] -set imported_files [import_files -fileset sim_1 $files] - -# Set 'sim_1' fileset file properties for remote files -# None - -# Set 'sim_1' fileset file properties for local files -set file "testbench.v" -set file_obj [get_files -of_objects [get_filesets sim_1] [list "*$file"]] -set_property -name "file_type" -value "Verilog" -objects $file_obj -set_property -name "is_enabled" -value "1" -objects $file_obj -set_property -name "is_global_include" -value "0" -objects $file_obj -set_property -name "library" -value "xil_defaultlib" -objects $file_obj -set_property -name "path_mode" -value "RelativeFirst" -objects $file_obj -set_property -name "used_in" -value "synthesis implementation simulation" -objects $file_obj -set_property -name "used_in_implementation" -value "1" -objects $file_obj -set_property -name "used_in_simulation" -value "1" -objects $file_obj -set_property -name "used_in_synthesis" -value "1" -objects $file_obj - -# Set 'sim_1' fileset properties -set obj [get_filesets sim_1] -set_property -name "32bit" -value "0" -objects $obj -set_property -name "force_compile_glbl" -value "0" -objects $obj -set_property -name "generate_scripts_only" -value "0" -objects $obj -set_property -name "generic" -value "" -objects $obj -set_property -name "hbs.configure_design_for_hier_access" -value "1" -objects $obj -set_property -name "include_dirs" -value "" -objects $obj -set_property -name "incremental" -value "1" -objects $obj -set_property -name "name" -value "sim_1" -objects $obj -set_property -name "nl.cell" -value "" -objects $obj -set_property -name "nl.incl_unisim_models" -value "0" -objects $obj -set_property -name "nl.mode" -value "funcsim" -objects $obj -set_property -name "nl.process_corner" -value "slow" -objects $obj -set_property -name "nl.rename_top" -value "" -objects $obj -set_property -name "nl.sdf_anno" -value "1" -objects $obj -set_property -name "nl.write_all_overrides" -value "0" -objects $obj -set_property -name "source_set" -value "sources_1" -objects $obj -set_property -name "systemc_include_dirs" -value "" -objects $obj -set_property -name "top" -value "testbench" -objects $obj -set_property -name "top_auto_set" -value "0" -objects $obj -set_property -name "top_lib" -value "xil_defaultlib" -objects $obj -set_property -name "transport_int_delay" -value "0" -objects $obj -set_property -name "transport_path_delay" -value "0" -objects $obj -set_property -name "unifast" -value "0" -objects $obj -set_property -name "verilog_define" -value "" -objects $obj -set_property -name "verilog_uppercase" -value "0" -objects $obj -set_property -name "xelab.dll" -value "0" -objects $obj -set_property -name "xsim.compile.tcl.pre" -value "" -objects $obj -set_property -name "xsim.compile.xsc.more_options" -value "" -objects $obj -set_property -name "xsim.compile.xvhdl.more_options" -value "" -objects $obj -set_property -name "xsim.compile.xvhdl.nosort" -value "1" -objects $obj -set_property -name "xsim.compile.xvhdl.relax" -value "1" -objects $obj -set_property -name "xsim.compile.xvlog.more_options" -value "" -objects $obj -set_property -name "xsim.compile.xvlog.nosort" -value "1" -objects $obj -set_property -name "xsim.compile.xvlog.relax" -value "1" -objects $obj -set_property -name "xsim.elaborate.debug_level" -value "typical" -objects $obj -set_property -name "xsim.elaborate.load_glbl" -value "1" -objects $obj -set_property -name "xsim.elaborate.mt_level" -value "auto" -objects $obj -set_property -name "xsim.elaborate.rangecheck" -value "0" -objects $obj -set_property -name "xsim.elaborate.relax" -value "1" -objects $obj -set_property -name "xsim.elaborate.sdf_delay" -value "sdfmax" -objects $obj -set_property -name "xsim.elaborate.snapshot" -value "" -objects $obj -set_property -name "xsim.elaborate.xelab.more_options" -value "" -objects $obj -set_property -name "xsim.elaborate.xsc.more_options" -value "" -objects $obj -set_property -name "xsim.simulate.add_positional" -value "0" -objects $obj -set_property -name "xsim.simulate.custom_tcl" -value "" -objects $obj -set_property -name "xsim.simulate.log_all_signals" -value "0" -objects $obj -set_property -name "xsim.simulate.no_quit" -value "0" -objects $obj -set_property -name "xsim.simulate.runtime" -value "4000ns" -objects $obj -set_property -name "xsim.simulate.saif" -value "" -objects $obj -set_property -name "xsim.simulate.saif_all_signals" -value "0" -objects $obj -set_property -name "xsim.simulate.saif_scope" -value "" -objects $obj -set_property -name "xsim.simulate.tcl.post" -value "" -objects $obj -set_property -name "xsim.simulate.wdb" -value "" -objects $obj -set_property -name "xsim.simulate.xsim.more_options" -value "" -objects $obj - -# Set 'utils_1' fileset object -set obj [get_filesets utils_1] -# Empty (no sources present) - -# Set 'utils_1' fileset properties -set obj [get_filesets utils_1] -set_property -name "name" -value "utils_1" -objects $obj - -# Proc to create BD design_1 -proc cr_bd_design_1 { parentCell } { -# The design that will be created by this Tcl proc contains the following -# module references: -# Vortex_top - -# CHANGE DESIGN NAME HERE -set design_name design_1 - -common::send_gid_msg -ssname BD::TCL -id 2010 -severity "INFO" "Currently there is no design <$design_name> in project, so creating one..." - -create_bd_design $design_name - -set bCheckIPsPassed 1 -################################################################## -# CHECK IPs -################################################################## -set bCheckIPs 1 -if { $bCheckIPs == 1 } { - set list_check_ips "\ - xilinx.com:ip:axi_bram_ctrl:4.1\ - xilinx.com:ip:blk_mem_gen:8.4\ - " - - set list_ips_missing "" - common::send_gid_msg -ssname BD::TCL -id 2011 -severity "INFO" "Checking if the following IPs exist in the project's IP catalog: $list_check_ips ." - - foreach ip_vlnv $list_check_ips { - set ip_obj [get_ipdefs -all $ip_vlnv] - if { $ip_obj eq "" } { - lappend list_ips_missing $ip_vlnv - } - } - - if { $list_ips_missing ne "" } { - catch {common::send_gid_msg -ssname BD::TCL -id 2012 -severity "ERROR" "The following IPs are not found in the IP Catalog:\n $list_ips_missing\n\nResolution: Please add the repository containing the IP(s) to the project." } - set bCheckIPsPassed 0 - } - - } - - ################################################################## - # CHECK Modules - ################################################################## - set bCheckModules 1 - if { $bCheckModules == 1 } { - set list_check_mods "\ - Vortex_top\ - " - - set list_mods_missing "" - common::send_gid_msg -ssname BD::TCL -id 2020 -severity "INFO" "Checking if the following modules exist in the project's sources: $list_check_mods ." - - foreach mod_vlnv $list_check_mods { - if { [can_resolve_reference $mod_vlnv] == 0 } { - lappend list_mods_missing $mod_vlnv - } - } - - if { $list_mods_missing ne "" } { - catch {common::send_gid_msg -ssname BD::TCL -id 2021 -severity "ERROR" "The following module(s) are not found in the project: $list_mods_missing" } - common::send_gid_msg -ssname BD::TCL -id 2022 -severity "INFO" "Please add source files for the missing module(s) above." - set bCheckIPsPassed 0 - } -} - -if { $bCheckIPsPassed != 1 } { - common::send_gid_msg -ssname BD::TCL -id 2023 -severity "WARNING" "Will not continue with creation of design due to the error(s) above." - return 3 -} - -variable script_folder - -if { $parentCell eq "" } { - set parentCell [get_bd_cells /] -} - -# Get object for parentCell -set parentObj [get_bd_cells $parentCell] -if { $parentObj == "" } { - catch {common::send_gid_msg -ssname BD::TCL -id 2090 -severity "ERROR" "Unable to find parent cell <$parentCell>!"} - return -} - -# Make sure parentObj is hier blk -set parentType [get_property TYPE $parentObj] -if { $parentType ne "hier" } { - catch {common::send_gid_msg -ssname BD::TCL -id 2091 -severity "ERROR" "Parent <$parentObj> has TYPE = <$parentType>. Expected to be ."} - return -} - -# Save current instance; Restore later -set oldCurInst [current_bd_instance .] - -# Set parent object as current -current_bd_instance $parentObj - - -# Create interface ports - -# Create ports -set clk_100MHz [ create_bd_port -dir I -type clk -freq_hz 100000000 clk_100MHz ] -set resetn [ create_bd_port -dir I -type rst resetn ] -set_property -dict [ list \ - CONFIG.POLARITY {ACTIVE_LOW} \ -] $resetn -set vx_busy [ create_bd_port -dir O vx_busy ] -set vx_reset [ create_bd_port -dir I -type rst vx_reset ] -set_property -dict [ list \ - CONFIG.POLARITY {ACTIVE_HIGH} \ -] $vx_reset - -set dcr_wr_valid [ create_bd_port -dir I dcr_wr_valid ] -set dcr_wr_addr [ create_bd_port -dir I -from 11 -to 0 dcr_wr_addr ] -set dcr_wr_data [ create_bd_port -dir I -from 31 -to 0 dcr_wr_data ] - -# Create instance: Vortex_top_0, and set properties -set block_name Vortex_top -set block_cell_name Vortex_top_0 -if { [catch {set Vortex_top_0 [create_bd_cell -type module -reference $block_name $block_cell_name] } errmsg] } { - catch {common::send_gid_msg -ssname BD::TCL -id 2095 -severity "ERROR" "Unable to add referenced block <$block_name>. Please add the files for ${block_name}'s definition into the project."} - return 1 - } elseif { $Vortex_top_0 eq "" } { - catch {common::send_gid_msg -ssname BD::TCL -id 2096 -severity "ERROR" "Unable to referenced block <$block_name>. Please add the files for ${block_name}'s definition into the project."} - return 1 - } - -# Create instance: axi_bram_ctrl_0, and set properties -set axi_bram_ctrl_0 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_bram_ctrl:4.1 axi_bram_ctrl_0 ] -set_property -dict [ list \ - CONFIG.DATA_WIDTH {512} \ - CONFIG.ECC_TYPE {0} \ -] $axi_bram_ctrl_0 - -# Create instance: axi_bram_ctrl_0_bram, and set properties -set axi_bram_ctrl_0_bram [ create_bd_cell -type ip -vlnv xilinx.com:ip:blk_mem_gen:8.4 axi_bram_ctrl_0_bram ] - -set_property -dict [ list \ - CONFIG.Assume_Synchronous_Clk {true} \ - CONFIG.Byte_Size {8} \ - CONFIG.Load_Init_File {true} \ - CONFIG.Coe_File {%COE_FILE%} \ - CONFIG.EN_SAFETY_CKT {true} \ - CONFIG.Enable_32bit_Address {true} \ - CONFIG.Fill_Remaining_Memory_Locations {false} \ - CONFIG.Memory_Type {Simple_Dual_Port_RAM} \ - CONFIG.Operating_Mode_A {NO_CHANGE} \ - CONFIG.Operating_Mode_B {READ_FIRST} \ - CONFIG.Port_B_Write_Rate {0} \ - CONFIG.Read_Width_A {512} \ - CONFIG.Read_Width_B {512} \ - CONFIG.Register_PortA_Output_of_Memory_Primitives {false} \ - CONFIG.Register_PortB_Output_of_Memory_Primitives {false} \ - CONFIG.Remaining_Memory_Locations {0} \ - CONFIG.Use_Byte_Write_Enable {true} \ - CONFIG.Use_RSTA_Pin {false} \ - CONFIG.Use_RSTB_Pin {true} \ - CONFIG.Write_Width_A {512} \ - CONFIG.Write_Depth_A {16384} \ - CONFIG.use_bram_block {Stand_Alone} \ -] $axi_bram_ctrl_0_bram - -# Create interface connections -connect_bd_intf_net -intf_net Vortex_top_0_m_axi_mem [get_bd_intf_pins Vortex_top_0/m_axi_mem] [get_bd_intf_pins axi_bram_ctrl_0/S_AXI] -connect_bd_intf_net -intf_net axi_bram_ctrl_0_BRAM_PORTA [get_bd_intf_pins axi_bram_ctrl_0/BRAM_PORTA] [get_bd_intf_pins axi_bram_ctrl_0_bram/BRAM_PORTA] -connect_bd_intf_net -intf_net axi_bram_ctrl_0_BRAM_PORTB [get_bd_intf_pins axi_bram_ctrl_0/BRAM_PORTB] [get_bd_intf_pins axi_bram_ctrl_0_bram/BRAM_PORTB] - -# Create port connections -connect_bd_net -net Vortex_top_0_busy [get_bd_ports vx_busy] [get_bd_pins Vortex_top_0/busy] -connect_bd_net -net clk_wiz_clk_out1 [get_bd_ports clk_100MHz] [get_bd_pins Vortex_top_0/clk] [get_bd_pins axi_bram_ctrl_0/s_axi_aclk] -connect_bd_net -net resetn_1 [get_bd_ports resetn] [get_bd_pins axi_bram_ctrl_0/s_axi_aresetn] -connect_bd_net -net vx_reset_1 [get_bd_ports vx_reset] [get_bd_pins Vortex_top_0/reset] -connect_bd_net -net dcr_wr_valid_1 [get_bd_ports dcr_wr_valid] [get_bd_pins Vortex_top_0/dcr_wr_valid] -connect_bd_net -net dcr_wr_addr_1 [get_bd_ports dcr_wr_addr] [get_bd_pins Vortex_top_0/dcr_wr_addr] -connect_bd_net -net dcr_wr_data_1 [get_bd_ports dcr_wr_data] [get_bd_pins Vortex_top_0/dcr_wr_data] - -# Create address segments -assign_bd_address -offset 0x00000000 -range 0x00100000 -target_address_space [get_bd_addr_spaces Vortex_top_0/m_axi_mem] [get_bd_addr_segs axi_bram_ctrl_0/S_AXI/Mem0] -force - -# Perform GUI Layout -regenerate_bd_layout -layout_string { - "ActiveEmotionalView":"Default View", - "Default View_ScaleFactor":"1.0", - "Default View_TopLeft":"-195,-165", - "ExpandedHierarchyInLayout":"", - "guistr":"# # String gsaved with Nlview 7.0r4 2019-12-20 bk=1.5203 VDI=41 GEI=36 GUI=JA:10.0 TLS -# -string -flagsOSRD -preplace port clk_100MHz -pg 1 -lvl 0 -x 0 -y 40 -defaultsOSRD -preplace port resetn -pg 1 -lvl 0 -x 0 -y 20 -defaultsOSRD -preplace port vx_busy -pg 1 -lvl 4 -x 950 -y 220 -defaultsOSRD -preplace port vx_reset -pg 1 -lvl 0 -x 0 -y 110 -defaultsOSRD -preplace port dcr_wr_valid -pg 1 -lvl 0 -x 0 -y 130 -defaultsOSRD -preplace portBus dcr_wr_addr -pg 1 -lvl 0 -x 0 -y 150 -defaultsOSRD -preplace portBus dcr_wr_data -pg 1 -lvl 0 -x 0 -y 170 -defaultsOSRD -preplace inst Vortex_top_0 -pg 1 -lvl 1 -x 190 -y 130 -defaultsOSRD -preplace inst axi_bram_ctrl_0 -pg 1 -lvl 2 -x 520 -y 140 -defaultsOSRD -preplace inst axi_bram_ctrl_0_bram -pg 1 -lvl 3 -x 800 -y 140 -defaultsOSRD -preplace netloc Vortex_top_0_busy 1 1 3 360J 220 NJ 220 NJ -preplace netloc clk_wiz_clk_out1 1 0 2 20 30 370 -preplace netloc resetn_1 1 0 2 NJ 20 380J -preplace netloc vx_reset_1 1 0 1 NJ 110 -preplace netloc dcr_wr_valid_1 1 0 1 NJ 130 -preplace netloc dcr_wr_addr_1 1 0 1 NJ 150 -preplace netloc dcr_wr_data_1 1 0 1 NJ 170 -preplace netloc axi_bram_ctrl_0_BRAM_PORTB 1 2 1 N 150 -preplace netloc axi_bram_ctrl_0_BRAM_PORTA 1 2 1 N 130 -preplace netloc Vortex_top_0_m_axi_mem 1 1 1 N 120 -levelinfo -pg 1 0 190 520 800 950 -pagesize -pg 1 -db -bbox -sgen -180 0 1060 240 -" -} - - # Restore current instance - current_bd_instance $oldCurInst - - validate_bd_design - save_bd_design - close_bd_design $design_name -} -# End of cr_bd_design_1() -cr_bd_design_1 "" -set_property EXCLUDE_DEBUG_LOGIC "0" [get_files design_1.bd ] -set_property GENERATE_SYNTH_CHECKPOINT "1" [get_files design_1.bd ] -set_property IS_ENABLED "1" [get_files design_1.bd ] -set_property IS_GLOBAL_INCLUDE "0" [get_files design_1.bd ] -#set_property IS_LOCKED "0" [get_files design_1.bd ] -set_property LIBRARY "xil_defaultlib" [get_files design_1.bd ] -set_property PATH_MODE "RelativeFirst" [get_files design_1.bd ] -set_property PFM_NAME "" [get_files design_1.bd ] -set_property REGISTERED_WITH_MANAGER "1" [get_files design_1.bd ] -set_property SYNTH_CHECKPOINT_MODE "Hierarchical" [get_files design_1.bd ] -set_property USED_IN "synthesis implementation simulation" [get_files design_1.bd ] -set_property USED_IN_IMPLEMENTATION "1" [get_files design_1.bd ] -set_property USED_IN_SIMULATION "1" [get_files design_1.bd ] -set_property USED_IN_SYNTHESIS "1" [get_files design_1.bd ] - -#call make_wrapper to create wrapper files -set wrapper_path [make_wrapper -fileset sources_1 -files [ get_files -norecurse design_1.bd] -top] -add_files -norecurse -fileset sources_1 $wrapper_path - -# Create 'synth_1' run (if not found) -if {[string equal [get_runs -quiet synth_1] ""]} { - create_run -name synth_1 -part xcu280-fsvh2892-2L-e -flow {Vivado Synthesis 2020} -strategy "Vivado Synthesis Defaults" -report_strategy {No Reports} -constrset constrs_1 -} else { - set_property strategy "Vivado Synthesis Defaults" [get_runs synth_1] - set_property flow "Vivado Synthesis 2020" [get_runs synth_1] -} -set obj [get_runs synth_1] -set_property set_report_strategy_name 1 $obj -set_property report_strategy {Vivado Synthesis Default Reports} $obj -set_property set_report_strategy_name 0 $obj -# Create 'synth_1_synth_report_utilization_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs synth_1] synth_1_synth_report_utilization_0] "" ] } { - create_report_config -report_name synth_1_synth_report_utilization_0 -report_type report_utilization:1.0 -steps synth_design -runs synth_1 -} -set obj [get_report_configs -of_objects [get_runs synth_1] synth_1_synth_report_utilization_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Utilization - Synth Design" -objects $obj -set_property -name "options.pblocks" -value "" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.slr" -value "0" -objects $obj -set_property -name "options.packthru" -value "0" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.hierarchical_percentages" -value "0" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -set obj [get_runs synth_1] -set_property -name "constrset" -value "constrs_1" -objects $obj -set_property -name "description" -value "Vivado Synthesis Defaults" -objects $obj -set_property -name "flow" -value "Vivado Synthesis 2020" -objects $obj -set_property -name "name" -value "synth_1" -objects $obj -set_property -name "needs_refresh" -value "0" -objects $obj -set_property -name "srcset" -value "sources_1" -objects $obj -set_property -name "incremental_checkpoint" -value "" -objects $obj -set_property -name "auto_incremental_checkpoint" -value "0" -objects $obj -set_property -name "rqs_files" -value "" -objects $obj -set_property -name "incremental_checkpoint.more_options" -value "" -objects $obj -set_property -name "include_in_archive" -value "1" -objects $obj -set_property -name "gen_full_bitstream" -value "1" -objects $obj -set_property -name "write_incremental_synth_checkpoint" -value "0" -objects $obj -set_property -name "auto_incremental_checkpoint.directory" -value "$proj_dir/project_1.srcs/utils_1/imports/synth_1" -objects $obj -set_property -name "strategy" -value "Vivado Synthesis Defaults" -objects $obj -set_property -name "steps.synth_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.synth_design.tcl.post" -value "" -objects $obj -set_property -name "steps.synth_design.args.flatten_hierarchy" -value "rebuilt" -objects $obj -set_property -name "steps.synth_design.args.gated_clock_conversion" -value "off" -objects $obj -set_property -name "steps.synth_design.args.bufg" -value "12" -objects $obj -set_property -name "steps.synth_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.synth_design.args.retiming" -value "0" -objects $obj -set_property -name "steps.synth_design.args.fsm_extraction" -value "auto" -objects $obj -set_property -name "steps.synth_design.args.keep_equivalent_registers" -value "0" -objects $obj -set_property -name "steps.synth_design.args.resource_sharing" -value "auto" -objects $obj -set_property -name "steps.synth_design.args.control_set_opt_threshold" -value "auto" -objects $obj -set_property -name "steps.synth_design.args.no_lc" -value "0" -objects $obj -set_property -name "steps.synth_design.args.no_srlextract" -value "0" -objects $obj -set_property -name "steps.synth_design.args.shreg_min_size" -value "3" -objects $obj -set_property -name "steps.synth_design.args.max_bram" -value "-1" -objects $obj -set_property -name "steps.synth_design.args.max_uram" -value "-1" -objects $obj -set_property -name "steps.synth_design.args.max_dsp" -value "-1" -objects $obj -set_property -name "steps.synth_design.args.max_bram_cascade_height" -value "-1" -objects $obj -set_property -name "steps.synth_design.args.max_uram_cascade_height" -value "-1" -objects $obj -set_property -name "steps.synth_design.args.cascade_dsp" -value "auto" -objects $obj -set_property -name "steps.synth_design.args.assert" -value "0" -objects $obj -set_property -name "steps.synth_design.args.more options" -value "" -objects $obj - -# Create 'synth_1_copy_1' run (if not found) -if {[string equal [get_runs -quiet synth_1_copy_1] ""]} { - create_run -name synth_1_copy_1 -part xcu280-fsvh2892-2L-e -flow {Vivado Synthesis 2020} -strategy "Vivado Synthesis Defaults" -report_strategy {No Reports} -constrset constrs_1 -} else { - set_property strategy "Vivado Synthesis Defaults" [get_runs synth_1_copy_1] - set_property flow "Vivado Synthesis 2020" [get_runs synth_1_copy_1] -} -set obj [get_runs synth_1_copy_1] -set_property set_report_strategy_name 1 $obj -set_property report_strategy {Vivado Synthesis Default Reports} $obj -set_property set_report_strategy_name 0 $obj -# Create 'synth_1_copy_1_synth_report_utilization_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs synth_1_copy_1] synth_1_copy_1_synth_report_utilization_0] "" ] } { - create_report_config -report_name synth_1_copy_1_synth_report_utilization_0 -report_type report_utilization:1.0 -steps synth_design -runs synth_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs synth_1_copy_1] synth_1_copy_1_synth_report_utilization_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Utilization - Synth Design" -objects $obj -set_property -name "options.pblocks" -value "" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.slr" -value "0" -objects $obj -set_property -name "options.packthru" -value "0" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.hierarchical_percentages" -value "0" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -set obj [get_runs synth_1_copy_1] -set_property -name "constrset" -value "constrs_1" -objects $obj -set_property -name "description" -value "Vivado Synthesis Defaults" -objects $obj -set_property -name "flow" -value "Vivado Synthesis 2020" -objects $obj -set_property -name "name" -value "synth_1_copy_1" -objects $obj -set_property -name "needs_refresh" -value "0" -objects $obj -set_property -name "srcset" -value "sources_1" -objects $obj -set_property -name "incremental_checkpoint" -value "" -objects $obj -set_property -name "auto_incremental_checkpoint" -value "0" -objects $obj -set_property -name "rqs_files" -value "" -objects $obj -set_property -name "incremental_checkpoint.more_options" -value "" -objects $obj -set_property -name "include_in_archive" -value "1" -objects $obj -set_property -name "gen_full_bitstream" -value "1" -objects $obj -set_property -name "write_incremental_synth_checkpoint" -value "0" -objects $obj -set_property -name "auto_incremental_checkpoint.directory" -value "$proj_dir/project_1.srcs/utils_1/imports/synth_1" -objects $obj -set_property -name "strategy" -value "Vivado Synthesis Defaults" -objects $obj -set_property -name "steps.synth_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.synth_design.tcl.post" -value "" -objects $obj -set_property -name "steps.synth_design.args.flatten_hierarchy" -value "rebuilt" -objects $obj -set_property -name "steps.synth_design.args.gated_clock_conversion" -value "off" -objects $obj -set_property -name "steps.synth_design.args.bufg" -value "12" -objects $obj -set_property -name "steps.synth_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.synth_design.args.retiming" -value "0" -objects $obj -set_property -name "steps.synth_design.args.fsm_extraction" -value "auto" -objects $obj -set_property -name "steps.synth_design.args.keep_equivalent_registers" -value "0" -objects $obj -set_property -name "steps.synth_design.args.resource_sharing" -value "auto" -objects $obj -set_property -name "steps.synth_design.args.control_set_opt_threshold" -value "auto" -objects $obj -set_property -name "steps.synth_design.args.no_lc" -value "0" -objects $obj -set_property -name "steps.synth_design.args.no_srlextract" -value "0" -objects $obj -set_property -name "steps.synth_design.args.shreg_min_size" -value "3" -objects $obj -set_property -name "steps.synth_design.args.max_bram" -value "-1" -objects $obj -set_property -name "steps.synth_design.args.max_uram" -value "-1" -objects $obj -set_property -name "steps.synth_design.args.max_dsp" -value "-1" -objects $obj -set_property -name "steps.synth_design.args.max_bram_cascade_height" -value "-1" -objects $obj -set_property -name "steps.synth_design.args.max_uram_cascade_height" -value "-1" -objects $obj -set_property -name "steps.synth_design.args.cascade_dsp" -value "auto" -objects $obj -set_property -name "steps.synth_design.args.assert" -value "0" -objects $obj -set_property -name "steps.synth_design.args.more options" -value "" -objects $obj - -# set the current synth run -current_run -synthesis [get_runs synth_1] - -# preserve signal names -set_property STEPS.SYNTH_DESIGN.ARGS.FLATTEN_HIERARCHY none [get_runs synth_1] - -# Create 'impl_1' run (if not found) -if {[string equal [get_runs -quiet impl_1] ""]} { - create_run -name impl_1 -part xcu280-fsvh2892-2L-e -flow {Vivado Implementation 2020} -strategy "Vivado Implementation Defaults" -report_strategy {No Reports} -constrset constrs_1 -parent_run synth_1 -} else { - set_property strategy "Vivado Implementation Defaults" [get_runs impl_1] - set_property flow "Vivado Implementation 2020" [get_runs impl_1] -} -set obj [get_runs impl_1] -set_property set_report_strategy_name 1 $obj -set_property report_strategy {Vivado Implementation Default Reports} $obj -set_property set_report_strategy_name 0 $obj -# Create 'impl_1_init_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_init_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_init_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps init_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_init_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Design Initialization" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_opt_report_drc_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_opt_report_drc_0] "" ] } { - create_report_config -report_name impl_1_opt_report_drc_0 -report_type report_drc:1.0 -steps opt_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_opt_report_drc_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "DRC - Opt Design" -objects $obj -set_property -name "options.upgrade_cw" -value "0" -objects $obj -set_property -name "options.checks" -value "" -objects $obj -set_property -name "options.ruledecks" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps opt_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_power_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_power_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_power_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps power_opt_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_power_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Power Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_place_report_io_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_io_0] "" ] } { - create_report_config -report_name impl_1_place_report_io_0 -report_type report_io:1.0 -steps place_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_io_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "IO - Place Design" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_place_report_utilization_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_utilization_0] "" ] } { - create_report_config -report_name impl_1_place_report_utilization_0 -report_type report_utilization:1.0 -steps place_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_utilization_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Utilization - Place Design" -objects $obj -set_property -name "options.pblocks" -value "" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.slr" -value "0" -objects $obj -set_property -name "options.packthru" -value "0" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.hierarchical_percentages" -value "0" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_place_report_control_sets_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_control_sets_0] "" ] } { - create_report_config -report_name impl_1_place_report_control_sets_0 -report_type report_control_sets:1.0 -steps place_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_control_sets_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Control Sets - Place Design" -objects $obj -set_property -name "options.verbose" -value "1" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_place_report_incremental_reuse_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_incremental_reuse_0] "" ] } { - create_report_config -report_name impl_1_place_report_incremental_reuse_0 -report_type report_incremental_reuse:1.0 -steps place_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_incremental_reuse_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Incremental Reuse - Place Design" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_place_report_incremental_reuse_1' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_incremental_reuse_1] "" ] } { - create_report_config -report_name impl_1_place_report_incremental_reuse_1 -report_type report_incremental_reuse:1.0 -steps place_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_incremental_reuse_1] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Incremental Reuse - Place Design" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_place_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_place_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps place_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_place_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Place Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_post_place_power_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_post_place_power_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_post_place_power_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps post_place_power_opt_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_post_place_power_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Post-Place Power Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_phys_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_phys_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_phys_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps phys_opt_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_phys_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Post-Place Phys Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_route_report_drc_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_drc_0] "" ] } { - create_report_config -report_name impl_1_route_report_drc_0 -report_type report_drc:1.0 -steps route_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_drc_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "DRC - Route Design" -objects $obj -set_property -name "options.upgrade_cw" -value "0" -objects $obj -set_property -name "options.checks" -value "" -objects $obj -set_property -name "options.ruledecks" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_route_report_methodology_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_methodology_0] "" ] } { - create_report_config -report_name impl_1_route_report_methodology_0 -report_type report_methodology:1.0 -steps route_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_methodology_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Methodology - Route Design" -objects $obj -set_property -name "options.checks" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_route_report_power_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_power_0] "" ] } { - create_report_config -report_name impl_1_route_report_power_0 -report_type report_power:1.0 -steps route_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_power_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Power - Route Design" -objects $obj -set_property -name "options.advisory" -value "0" -objects $obj -set_property -name "options.xpe" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_route_report_route_status_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_route_status_0] "" ] } { - create_report_config -report_name impl_1_route_report_route_status_0 -report_type report_route_status:1.0 -steps route_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_route_status_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Route Status - Route Design" -objects $obj -set_property -name "options.of_objects" -value "" -objects $obj -set_property -name "options.route_type" -value "" -objects $obj -set_property -name "options.list_all_nets" -value "0" -objects $obj -set_property -name "options.show_all" -value "0" -objects $obj -set_property -name "options.has_routing" -value "0" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_route_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_route_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps route_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Timing Summary - Route Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_route_report_incremental_reuse_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_incremental_reuse_0] "" ] } { - create_report_config -report_name impl_1_route_report_incremental_reuse_0 -report_type report_incremental_reuse:1.0 -steps route_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_incremental_reuse_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Incremental Reuse - Route Design" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_route_report_clock_utilization_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_clock_utilization_0] "" ] } { - create_report_config -report_name impl_1_route_report_clock_utilization_0 -report_type report_clock_utilization:1.0 -steps route_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_clock_utilization_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Clock Utilization - Route Design" -objects $obj -set_property -name "options.write_xdc" -value "0" -objects $obj -set_property -name "options.clock_roots_only" -value "0" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_route_report_bus_skew_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_bus_skew_0] "" ] } { - create_report_config -report_name impl_1_route_report_bus_skew_0 -report_type report_bus_skew:1.1 -steps route_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_route_report_bus_skew_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Bus Skew - Route Design" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.slack_greater_than" -value "" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.warn_on_violation" -value "1" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_post_route_phys_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_post_route_phys_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_post_route_phys_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps post_route_phys_opt_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_post_route_phys_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Timing Summary - Post-Route Phys Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "1" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_post_route_phys_opt_report_bus_skew_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1] impl_1_post_route_phys_opt_report_bus_skew_0] "" ] } { - create_report_config -report_name impl_1_post_route_phys_opt_report_bus_skew_0 -report_type report_bus_skew:1.1 -steps post_route_phys_opt_design -runs impl_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1] impl_1_post_route_phys_opt_report_bus_skew_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Bus Skew - Post-Route Phys Opt Design" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.slack_greater_than" -value "" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.warn_on_violation" -value "1" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -set obj [get_runs impl_1] -set_property -name "constrset" -value "constrs_1" -objects $obj -set_property -name "description" -value "Default settings for Implementation." -objects $obj -set_property -name "flow" -value "Vivado Implementation 2020" -objects $obj -set_property -name "name" -value "impl_1" -objects $obj -set_property -name "needs_refresh" -value "0" -objects $obj -set_property -name "pr_configuration" -value "" -objects $obj -set_property -name "srcset" -value "sources_1" -objects $obj -set_property -name "incremental_checkpoint" -value "" -objects $obj -set_property -name "auto_incremental_checkpoint" -value "0" -objects $obj -set_property -name "rqs_files" -value "" -objects $obj -set_property -name "incremental_checkpoint.more_options" -value "" -objects $obj -set_property -name "include_in_archive" -value "1" -objects $obj -set_property -name "gen_full_bitstream" -value "1" -objects $obj -set_property -name "auto_incremental_checkpoint.directory" -value "$proj_dir/project_1.srcs/utils_1/imports/impl_1" -objects $obj -set_property -name "strategy" -value "Vivado Implementation Defaults" -objects $obj -set_property -name "steps.init_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.init_design.tcl.post" -value "" -objects $obj -set_property -name "steps.opt_design.is_enabled" -value "1" -objects $obj -set_property -name "steps.opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.opt_design.args.verbose" -value "0" -objects $obj -set_property -name "steps.opt_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.power_opt_design.is_enabled" -value "0" -objects $obj -set_property -name "steps.power_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.power_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.power_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.place_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.place_design.tcl.post" -value "" -objects $obj -set_property -name "steps.place_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.place_design.args.more options" -value "" -objects $obj -set_property -name "steps.post_place_power_opt_design.is_enabled" -value "0" -objects $obj -set_property -name "steps.post_place_power_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.post_place_power_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.post_place_power_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.phys_opt_design.is_enabled" -value "1" -objects $obj -set_property -name "steps.phys_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.phys_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.phys_opt_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.phys_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.route_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.route_design.tcl.post" -value "" -objects $obj -set_property -name "steps.route_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.route_design.args.more options" -value "" -objects $obj -set_property -name "steps.post_route_phys_opt_design.is_enabled" -value "0" -objects $obj -set_property -name "steps.post_route_phys_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.post_route_phys_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.post_route_phys_opt_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.post_route_phys_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.write_bitstream.tcl.pre" -value "" -objects $obj -set_property -name "steps.write_bitstream.tcl.post" -value "" -objects $obj -set_property -name "steps.write_bitstream.args.raw_bitfile" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.mask_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.no_binary_bitfile" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.bin_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.readback_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.logic_location_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.verbose" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.more options" -value "" -objects $obj - -# Create 'impl_1_copy_1' run (if not found) -if {[string equal [get_runs -quiet impl_1_copy_1] ""]} { - create_run -name impl_1_copy_1 -part xcu280-fsvh2892-2L-e -flow {Vivado Implementation 2020} -strategy "Vivado Implementation Defaults" -report_strategy {No Reports} -constrset constrs_1 -parent_run synth_1 -} else { - set_property strategy "Vivado Implementation Defaults" [get_runs impl_1_copy_1] - set_property flow "Vivado Implementation 2020" [get_runs impl_1_copy_1] -} -set obj [get_runs impl_1_copy_1] -set_property set_report_strategy_name 1 $obj -set_property report_strategy {Vivado Implementation Default Reports} $obj -set_property set_report_strategy_name 0 $obj -# Create 'impl_1_copy_1_init_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_init_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_1_init_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps init_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_init_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Design Initialization" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_opt_report_drc_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_opt_report_drc_0] "" ] } { - create_report_config -report_name impl_1_copy_1_opt_report_drc_0 -report_type report_drc:1.0 -steps opt_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_opt_report_drc_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "DRC - Opt Design" -objects $obj -set_property -name "options.upgrade_cw" -value "0" -objects $obj -set_property -name "options.checks" -value "" -objects $obj -set_property -name "options.ruledecks" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_1_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps opt_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_power_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_power_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_1_power_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps power_opt_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_power_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Power Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_place_report_io_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_io_0] "" ] } { - create_report_config -report_name impl_1_copy_1_place_report_io_0 -report_type report_io:1.0 -steps place_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_io_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "IO - Place Design" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_place_report_utilization_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_utilization_0] "" ] } { - create_report_config -report_name impl_1_copy_1_place_report_utilization_0 -report_type report_utilization:1.0 -steps place_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_utilization_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Utilization - Place Design" -objects $obj -set_property -name "options.pblocks" -value "" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.slr" -value "0" -objects $obj -set_property -name "options.packthru" -value "0" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.hierarchical_percentages" -value "0" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_place_report_control_sets_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_control_sets_0] "" ] } { - create_report_config -report_name impl_1_copy_1_place_report_control_sets_0 -report_type report_control_sets:1.0 -steps place_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_control_sets_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Control Sets - Place Design" -objects $obj -set_property -name "options.verbose" -value "1" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_place_report_incremental_reuse_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_incremental_reuse_0] "" ] } { - create_report_config -report_name impl_1_copy_1_place_report_incremental_reuse_0 -report_type report_incremental_reuse:1.0 -steps place_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_incremental_reuse_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Incremental Reuse - Place Design" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_place_report_incremental_reuse_1' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_incremental_reuse_1] "" ] } { - create_report_config -report_name impl_1_copy_1_place_report_incremental_reuse_1 -report_type report_incremental_reuse:1.0 -steps place_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_incremental_reuse_1] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Incremental Reuse - Place Design" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_place_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_1_place_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps place_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_place_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Place Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_post_place_power_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_post_place_power_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_1_post_place_power_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps post_place_power_opt_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_post_place_power_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Post-Place Power Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_phys_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_phys_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_1_phys_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps phys_opt_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_phys_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Post-Place Phys Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_route_report_drc_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_drc_0] "" ] } { - create_report_config -report_name impl_1_copy_1_route_report_drc_0 -report_type report_drc:1.0 -steps route_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_drc_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "DRC - Route Design" -objects $obj -set_property -name "options.upgrade_cw" -value "0" -objects $obj -set_property -name "options.checks" -value "" -objects $obj -set_property -name "options.ruledecks" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_route_report_methodology_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_methodology_0] "" ] } { - create_report_config -report_name impl_1_copy_1_route_report_methodology_0 -report_type report_methodology:1.0 -steps route_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_methodology_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Methodology - Route Design" -objects $obj -set_property -name "options.checks" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_route_report_power_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_power_0] "" ] } { - create_report_config -report_name impl_1_copy_1_route_report_power_0 -report_type report_power:1.0 -steps route_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_power_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Power - Route Design" -objects $obj -set_property -name "options.advisory" -value "0" -objects $obj -set_property -name "options.xpe" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_route_report_route_status_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_route_status_0] "" ] } { - create_report_config -report_name impl_1_copy_1_route_report_route_status_0 -report_type report_route_status:1.0 -steps route_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_route_status_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Route Status - Route Design" -objects $obj -set_property -name "options.of_objects" -value "" -objects $obj -set_property -name "options.route_type" -value "" -objects $obj -set_property -name "options.list_all_nets" -value "0" -objects $obj -set_property -name "options.show_all" -value "0" -objects $obj -set_property -name "options.has_routing" -value "0" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_route_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_1_route_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps route_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Timing Summary - Route Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_route_report_incremental_reuse_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_incremental_reuse_0] "" ] } { - create_report_config -report_name impl_1_copy_1_route_report_incremental_reuse_0 -report_type report_incremental_reuse:1.0 -steps route_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_incremental_reuse_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Incremental Reuse - Route Design" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_route_report_clock_utilization_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_clock_utilization_0] "" ] } { - create_report_config -report_name impl_1_copy_1_route_report_clock_utilization_0 -report_type report_clock_utilization:1.0 -steps route_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_clock_utilization_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Clock Utilization - Route Design" -objects $obj -set_property -name "options.write_xdc" -value "0" -objects $obj -set_property -name "options.clock_roots_only" -value "0" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_route_report_bus_skew_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_bus_skew_0] "" ] } { - create_report_config -report_name impl_1_copy_1_route_report_bus_skew_0 -report_type report_bus_skew:1.1 -steps route_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_route_report_bus_skew_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Bus Skew - Route Design" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.slack_greater_than" -value "" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.warn_on_violation" -value "1" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_post_route_phys_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_post_route_phys_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_1_post_route_phys_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps post_route_phys_opt_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_post_route_phys_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Timing Summary - Post-Route Phys Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "1" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_1_post_route_phys_opt_report_bus_skew_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_post_route_phys_opt_report_bus_skew_0] "" ] } { - create_report_config -report_name impl_1_copy_1_post_route_phys_opt_report_bus_skew_0 -report_type report_bus_skew:1.1 -steps post_route_phys_opt_design -runs impl_1_copy_1 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_1] impl_1_copy_1_post_route_phys_opt_report_bus_skew_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Bus Skew - Post-Route Phys Opt Design" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.slack_greater_than" -value "" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.warn_on_violation" -value "1" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -set obj [get_runs impl_1_copy_1] -set_property -name "constrset" -value "constrs_1" -objects $obj -set_property -name "description" -value "Default settings for Implementation." -objects $obj -set_property -name "flow" -value "Vivado Implementation 2020" -objects $obj -set_property -name "name" -value "impl_1_copy_1" -objects $obj -set_property -name "needs_refresh" -value "0" -objects $obj -set_property -name "pr_configuration" -value "" -objects $obj -set_property -name "srcset" -value "sources_1" -objects $obj -set_property -name "incremental_checkpoint" -value "" -objects $obj -set_property -name "auto_incremental_checkpoint" -value "0" -objects $obj -set_property -name "rqs_files" -value "" -objects $obj -set_property -name "incremental_checkpoint.more_options" -value "" -objects $obj -set_property -name "include_in_archive" -value "1" -objects $obj -set_property -name "gen_full_bitstream" -value "1" -objects $obj -set_property -name "auto_incremental_checkpoint.directory" -value "$proj_dir/project_1.srcs/utils_1/imports/impl_1" -objects $obj -set_property -name "strategy" -value "Vivado Implementation Defaults" -objects $obj -set_property -name "steps.init_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.init_design.tcl.post" -value "" -objects $obj -set_property -name "steps.opt_design.is_enabled" -value "1" -objects $obj -set_property -name "steps.opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.opt_design.args.verbose" -value "0" -objects $obj -set_property -name "steps.opt_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.power_opt_design.is_enabled" -value "0" -objects $obj -set_property -name "steps.power_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.power_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.power_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.place_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.place_design.tcl.post" -value "" -objects $obj -set_property -name "steps.place_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.place_design.args.more options" -value "" -objects $obj -set_property -name "steps.post_place_power_opt_design.is_enabled" -value "0" -objects $obj -set_property -name "steps.post_place_power_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.post_place_power_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.post_place_power_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.phys_opt_design.is_enabled" -value "1" -objects $obj -set_property -name "steps.phys_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.phys_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.phys_opt_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.phys_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.route_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.route_design.tcl.post" -value "" -objects $obj -set_property -name "steps.route_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.route_design.args.more options" -value "" -objects $obj -set_property -name "steps.post_route_phys_opt_design.is_enabled" -value "0" -objects $obj -set_property -name "steps.post_route_phys_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.post_route_phys_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.post_route_phys_opt_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.post_route_phys_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.write_bitstream.tcl.pre" -value "" -objects $obj -set_property -name "steps.write_bitstream.tcl.post" -value "" -objects $obj -set_property -name "steps.write_bitstream.args.raw_bitfile" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.mask_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.no_binary_bitfile" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.bin_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.readback_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.logic_location_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.verbose" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.more options" -value "" -objects $obj - -# Create 'impl_1_copy_2' run (if not found) -if {[string equal [get_runs -quiet impl_1_copy_2] ""]} { - create_run -name impl_1_copy_2 -part xcu280-fsvh2892-2L-e -flow {Vivado Implementation 2020} -strategy "Vivado Implementation Defaults" -report_strategy {No Reports} -constrset constrs_1 -parent_run synth_1 -} else { - set_property strategy "Vivado Implementation Defaults" [get_runs impl_1_copy_2] - set_property flow "Vivado Implementation 2020" [get_runs impl_1_copy_2] -} -set obj [get_runs impl_1_copy_2] -set_property set_report_strategy_name 1 $obj -set_property report_strategy {Vivado Implementation Default Reports} $obj -set_property set_report_strategy_name 0 $obj -# Create 'impl_1_copy_2_init_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_init_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_2_init_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps init_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_init_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Design Initialization" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_opt_report_drc_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_opt_report_drc_0] "" ] } { - create_report_config -report_name impl_1_copy_2_opt_report_drc_0 -report_type report_drc:1.0 -steps opt_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_opt_report_drc_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "DRC - Opt Design" -objects $obj -set_property -name "options.upgrade_cw" -value "0" -objects $obj -set_property -name "options.checks" -value "" -objects $obj -set_property -name "options.ruledecks" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_2_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps opt_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_power_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_power_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_2_power_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps power_opt_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_power_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Power Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_place_report_io_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_io_0] "" ] } { - create_report_config -report_name impl_1_copy_2_place_report_io_0 -report_type report_io:1.0 -steps place_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_io_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "IO - Place Design" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_place_report_utilization_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_utilization_0] "" ] } { - create_report_config -report_name impl_1_copy_2_place_report_utilization_0 -report_type report_utilization:1.0 -steps place_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_utilization_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Utilization - Place Design" -objects $obj -set_property -name "options.pblocks" -value "" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.slr" -value "0" -objects $obj -set_property -name "options.packthru" -value "0" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.hierarchical_percentages" -value "0" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_place_report_control_sets_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_control_sets_0] "" ] } { - create_report_config -report_name impl_1_copy_2_place_report_control_sets_0 -report_type report_control_sets:1.0 -steps place_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_control_sets_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Control Sets - Place Design" -objects $obj -set_property -name "options.verbose" -value "1" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_place_report_incremental_reuse_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_incremental_reuse_0] "" ] } { - create_report_config -report_name impl_1_copy_2_place_report_incremental_reuse_0 -report_type report_incremental_reuse:1.0 -steps place_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_incremental_reuse_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Incremental Reuse - Place Design" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_place_report_incremental_reuse_1' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_incremental_reuse_1] "" ] } { - create_report_config -report_name impl_1_copy_2_place_report_incremental_reuse_1 -report_type report_incremental_reuse:1.0 -steps place_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_incremental_reuse_1] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Incremental Reuse - Place Design" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_place_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_2_place_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps place_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_place_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Place Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_post_place_power_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_post_place_power_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_2_post_place_power_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps post_place_power_opt_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_post_place_power_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Post-Place Power Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_phys_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_phys_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_2_phys_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps phys_opt_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_phys_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "0" -objects $obj -set_property -name "display_name" -value "Timing Summary - Post-Place Phys Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_route_report_drc_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_drc_0] "" ] } { - create_report_config -report_name impl_1_copy_2_route_report_drc_0 -report_type report_drc:1.0 -steps route_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_drc_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "DRC - Route Design" -objects $obj -set_property -name "options.upgrade_cw" -value "0" -objects $obj -set_property -name "options.checks" -value "" -objects $obj -set_property -name "options.ruledecks" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_route_report_methodology_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_methodology_0] "" ] } { - create_report_config -report_name impl_1_copy_2_route_report_methodology_0 -report_type report_methodology:1.0 -steps route_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_methodology_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Methodology - Route Design" -objects $obj -set_property -name "options.checks" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_route_report_power_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_power_0] "" ] } { - create_report_config -report_name impl_1_copy_2_route_report_power_0 -report_type report_power:1.0 -steps route_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_power_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Power - Route Design" -objects $obj -set_property -name "options.advisory" -value "0" -objects $obj -set_property -name "options.xpe" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_route_report_route_status_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_route_status_0] "" ] } { - create_report_config -report_name impl_1_copy_2_route_report_route_status_0 -report_type report_route_status:1.0 -steps route_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_route_status_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Route Status - Route Design" -objects $obj -set_property -name "options.of_objects" -value "" -objects $obj -set_property -name "options.route_type" -value "" -objects $obj -set_property -name "options.list_all_nets" -value "0" -objects $obj -set_property -name "options.show_all" -value "0" -objects $obj -set_property -name "options.has_routing" -value "0" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_route_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_2_route_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps route_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Timing Summary - Route Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "0" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_route_report_incremental_reuse_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_incremental_reuse_0] "" ] } { - create_report_config -report_name impl_1_copy_2_route_report_incremental_reuse_0 -report_type report_incremental_reuse:1.0 -steps route_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_incremental_reuse_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Incremental Reuse - Route Design" -objects $obj -set_property -name "options.cells" -value "" -objects $obj -set_property -name "options.hierarchical" -value "0" -objects $obj -set_property -name "options.hierarchical_depth" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_route_report_clock_utilization_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_clock_utilization_0] "" ] } { - create_report_config -report_name impl_1_copy_2_route_report_clock_utilization_0 -report_type report_clock_utilization:1.0 -steps route_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_clock_utilization_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Clock Utilization - Route Design" -objects $obj -set_property -name "options.write_xdc" -value "0" -objects $obj -set_property -name "options.clock_roots_only" -value "0" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_route_report_bus_skew_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_bus_skew_0] "" ] } { - create_report_config -report_name impl_1_copy_2_route_report_bus_skew_0 -report_type report_bus_skew:1.1 -steps route_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_route_report_bus_skew_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Bus Skew - Route Design" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.slack_greater_than" -value "" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.warn_on_violation" -value "1" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_post_route_phys_opt_report_timing_summary_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_post_route_phys_opt_report_timing_summary_0] "" ] } { - create_report_config -report_name impl_1_copy_2_post_route_phys_opt_report_timing_summary_0 -report_type report_timing_summary:1.0 -steps post_route_phys_opt_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_post_route_phys_opt_report_timing_summary_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Timing Summary - Post-Route Phys Opt Design" -objects $obj -set_property -name "options.check_timing_verbose" -value "0" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "10" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.report_unconstrained" -value "0" -objects $obj -set_property -name "options.warn_on_violation" -value "1" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.cell" -value "" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -# Create 'impl_1_copy_2_post_route_phys_opt_report_bus_skew_0' report (if not found) -if { [ string equal [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_post_route_phys_opt_report_bus_skew_0] "" ] } { - create_report_config -report_name impl_1_copy_2_post_route_phys_opt_report_bus_skew_0 -report_type report_bus_skew:1.1 -steps post_route_phys_opt_design -runs impl_1_copy_2 -} -set obj [get_report_configs -of_objects [get_runs impl_1_copy_2] impl_1_copy_2_post_route_phys_opt_report_bus_skew_0] -if { $obj != "" } { -set_property -name "is_enabled" -value "1" -objects $obj -set_property -name "display_name" -value "Bus Skew - Post-Route Phys Opt Design" -objects $obj -set_property -name "options.delay_type" -value "" -objects $obj -set_property -name "options.setup" -value "0" -objects $obj -set_property -name "options.hold" -value "0" -objects $obj -set_property -name "options.max_paths" -value "" -objects $obj -set_property -name "options.nworst" -value "" -objects $obj -set_property -name "options.unique_pins" -value "0" -objects $obj -set_property -name "options.path_type" -value "" -objects $obj -set_property -name "options.slack_lesser_than" -value "" -objects $obj -set_property -name "options.slack_greater_than" -value "" -objects $obj -set_property -name "options.significant_digits" -value "" -objects $obj -set_property -name "options.warn_on_violation" -value "1" -objects $obj -set_property -name "options.more_options" -value "" -objects $obj - -} -set obj [get_runs impl_1_copy_2] -set_property -name "constrset" -value "constrs_1" -objects $obj -set_property -name "description" -value "Default settings for Implementation." -objects $obj -set_property -name "flow" -value "Vivado Implementation 2020" -objects $obj -set_property -name "name" -value "impl_1_copy_2" -objects $obj -set_property -name "needs_refresh" -value "0" -objects $obj -set_property -name "pr_configuration" -value "" -objects $obj -set_property -name "srcset" -value "sources_1" -objects $obj -set_property -name "incremental_checkpoint" -value "" -objects $obj -set_property -name "auto_incremental_checkpoint" -value "0" -objects $obj -set_property -name "rqs_files" -value "" -objects $obj -set_property -name "incremental_checkpoint.more_options" -value "" -objects $obj -set_property -name "include_in_archive" -value "1" -objects $obj -set_property -name "gen_full_bitstream" -value "1" -objects $obj -set_property -name "auto_incremental_checkpoint.directory" -value "$proj_dir/project_1.srcs/utils_1/imports/impl_1" -objects $obj -set_property -name "strategy" -value "Vivado Implementation Defaults" -objects $obj -set_property -name "steps.init_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.init_design.tcl.post" -value "" -objects $obj -set_property -name "steps.opt_design.is_enabled" -value "1" -objects $obj -set_property -name "steps.opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.opt_design.args.verbose" -value "0" -objects $obj -set_property -name "steps.opt_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.power_opt_design.is_enabled" -value "0" -objects $obj -set_property -name "steps.power_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.power_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.power_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.place_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.place_design.tcl.post" -value "" -objects $obj -set_property -name "steps.place_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.place_design.args.more options" -value "" -objects $obj -set_property -name "steps.post_place_power_opt_design.is_enabled" -value "0" -objects $obj -set_property -name "steps.post_place_power_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.post_place_power_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.post_place_power_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.phys_opt_design.is_enabled" -value "1" -objects $obj -set_property -name "steps.phys_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.phys_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.phys_opt_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.phys_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.route_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.route_design.tcl.post" -value "" -objects $obj -set_property -name "steps.route_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.route_design.args.more options" -value "" -objects $obj -set_property -name "steps.post_route_phys_opt_design.is_enabled" -value "0" -objects $obj -set_property -name "steps.post_route_phys_opt_design.tcl.pre" -value "" -objects $obj -set_property -name "steps.post_route_phys_opt_design.tcl.post" -value "" -objects $obj -set_property -name "steps.post_route_phys_opt_design.args.directive" -value "Default" -objects $obj -set_property -name "steps.post_route_phys_opt_design.args.more options" -value "" -objects $obj -set_property -name "steps.write_bitstream.tcl.pre" -value "" -objects $obj -set_property -name "steps.write_bitstream.tcl.post" -value "" -objects $obj -set_property -name "steps.write_bitstream.args.raw_bitfile" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.mask_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.no_binary_bitfile" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.bin_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.readback_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.logic_location_file" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.verbose" -value "0" -objects $obj -set_property -name "steps.write_bitstream.args.more options" -value "" -objects $obj - -# set the current impl run -current_run -implementation [get_runs impl_1] - -puts "INFO: Project created:${project_name}" -# Create 'drc_1' gadget (if not found) -if {[string equal [get_dashboard_gadgets [ list "drc_1" ] ] ""]} { -create_dashboard_gadget -name {drc_1} -type drc -} -set obj [get_dashboard_gadgets [ list "drc_1" ] ] -set_property -name "active_reports" -value "" -objects $obj -set_property -name "active_reports_invalid" -value "" -objects $obj -set_property -name "active_run" -value "0" -objects $obj -set_property -name "hide_unused_data" -value "1" -objects $obj -set_property -name "incl_new_reports" -value "0" -objects $obj -set_property -name "reports" -value "impl_1#impl_1_route_report_drc_0" -objects $obj -set_property -name "run.step" -value "route_design" -objects $obj -set_property -name "run.type" -value "implementation" -objects $obj -set_property -name "statistics.critical_warning" -value "1" -objects $obj -set_property -name "statistics.error" -value "1" -objects $obj -set_property -name "statistics.info" -value "1" -objects $obj -set_property -name "statistics.warning" -value "1" -objects $obj -set_property -name "view.orientation" -value "Horizontal" -objects $obj -set_property -name "view.type" -value "Graph" -objects $obj - -# Create 'methodology_1' gadget (if not found) -if {[string equal [get_dashboard_gadgets [ list "methodology_1" ] ] ""]} { -create_dashboard_gadget -name {methodology_1} -type methodology -} -set obj [get_dashboard_gadgets [ list "methodology_1" ] ] -set_property -name "active_reports" -value "" -objects $obj -set_property -name "active_reports_invalid" -value "" -objects $obj -set_property -name "active_run" -value "0" -objects $obj -set_property -name "hide_unused_data" -value "1" -objects $obj -set_property -name "incl_new_reports" -value "0" -objects $obj -set_property -name "reports" -value "impl_1#impl_1_route_report_methodology_0" -objects $obj -set_property -name "run.step" -value "route_design" -objects $obj -set_property -name "run.type" -value "implementation" -objects $obj -set_property -name "statistics.critical_warning" -value "1" -objects $obj -set_property -name "statistics.error" -value "1" -objects $obj -set_property -name "statistics.info" -value "1" -objects $obj -set_property -name "statistics.warning" -value "1" -objects $obj -set_property -name "view.orientation" -value "Horizontal" -objects $obj -set_property -name "view.type" -value "Graph" -objects $obj - -# Create 'power_1' gadget (if not found) -if {[string equal [get_dashboard_gadgets [ list "power_1" ] ] ""]} { -create_dashboard_gadget -name {power_1} -type power -} -set obj [get_dashboard_gadgets [ list "power_1" ] ] -set_property -name "active_reports" -value "" -objects $obj -set_property -name "active_reports_invalid" -value "" -objects $obj -set_property -name "active_run" -value "0" -objects $obj -set_property -name "hide_unused_data" -value "1" -objects $obj -set_property -name "incl_new_reports" -value "0" -objects $obj -set_property -name "reports" -value "impl_1#impl_1_route_report_power_0" -objects $obj -set_property -name "run.step" -value "route_design" -objects $obj -set_property -name "run.type" -value "implementation" -objects $obj -set_property -name "statistics.bram" -value "1" -objects $obj -set_property -name "statistics.clocks" -value "1" -objects $obj -set_property -name "statistics.dsp" -value "1" -objects $obj -set_property -name "statistics.gth" -value "1" -objects $obj -set_property -name "statistics.gtp" -value "1" -objects $obj -set_property -name "statistics.gtx" -value "1" -objects $obj -set_property -name "statistics.gtz" -value "1" -objects $obj -set_property -name "statistics.io" -value "1" -objects $obj -set_property -name "statistics.logic" -value "1" -objects $obj -set_property -name "statistics.mmcm" -value "1" -objects $obj -set_property -name "statistics.pcie" -value "1" -objects $obj -set_property -name "statistics.phaser" -value "1" -objects $obj -set_property -name "statistics.pll" -value "1" -objects $obj -set_property -name "statistics.pl_static" -value "1" -objects $obj -set_property -name "statistics.ps7" -value "1" -objects $obj -set_property -name "statistics.ps" -value "1" -objects $obj -set_property -name "statistics.ps_static" -value "1" -objects $obj -set_property -name "statistics.signals" -value "1" -objects $obj -set_property -name "statistics.total_power" -value "1" -objects $obj -set_property -name "statistics.transceiver" -value "1" -objects $obj -set_property -name "statistics.xadc" -value "1" -objects $obj -set_property -name "view.orientation" -value "Horizontal" -objects $obj -set_property -name "view.type" -value "Graph" -objects $obj - -# Create 'timing_1' gadget (if not found) -if {[string equal [get_dashboard_gadgets [ list "timing_1" ] ] ""]} { -create_dashboard_gadget -name {timing_1} -type timing -} -set obj [get_dashboard_gadgets [ list "timing_1" ] ] -set_property -name "active_reports" -value "" -objects $obj -set_property -name "active_reports_invalid" -value "" -objects $obj -set_property -name "active_run" -value "0" -objects $obj -set_property -name "hide_unused_data" -value "1" -objects $obj -set_property -name "incl_new_reports" -value "0" -objects $obj -set_property -name "reports" -value "impl_1#impl_1_route_report_timing_summary_0" -objects $obj -set_property -name "run.step" -value "route_design" -objects $obj -set_property -name "run.type" -value "implementation" -objects $obj -set_property -name "statistics.ths" -value "1" -objects $obj -set_property -name "statistics.tns" -value "1" -objects $obj -set_property -name "statistics.tpws" -value "1" -objects $obj -set_property -name "statistics.whs" -value "1" -objects $obj -set_property -name "statistics.wns" -value "1" -objects $obj -set_property -name "view.orientation" -value "Horizontal" -objects $obj -set_property -name "view.type" -value "Table" -objects $obj - -# Create 'utilization_1' gadget (if not found) -if {[string equal [get_dashboard_gadgets [ list "utilization_1" ] ] ""]} { -create_dashboard_gadget -name {utilization_1} -type utilization -} -set obj [get_dashboard_gadgets [ list "utilization_1" ] ] -set_property -name "active_reports" -value "" -objects $obj -set_property -name "active_reports_invalid" -value "" -objects $obj -set_property -name "active_run" -value "0" -objects $obj -set_property -name "hide_unused_data" -value "1" -objects $obj -set_property -name "incl_new_reports" -value "0" -objects $obj -set_property -name "reports" -value "synth_1#synth_1_synth_report_utilization_0" -objects $obj -set_property -name "run.step" -value "synth_design" -objects $obj -set_property -name "run.type" -value "synthesis" -objects $obj -set_property -name "statistics.bram" -value "1" -objects $obj -set_property -name "statistics.bufg" -value "1" -objects $obj -set_property -name "statistics.dsp" -value "1" -objects $obj -set_property -name "statistics.ff" -value "1" -objects $obj -set_property -name "statistics.gt" -value "1" -objects $obj -set_property -name "statistics.io" -value "1" -objects $obj -set_property -name "statistics.lut" -value "1" -objects $obj -set_property -name "statistics.lutram" -value "1" -objects $obj -set_property -name "statistics.mmcm" -value "1" -objects $obj -set_property -name "statistics.pcie" -value "1" -objects $obj -set_property -name "statistics.pll" -value "1" -objects $obj -set_property -name "statistics.uram" -value "1" -objects $obj -set_property -name "view.orientation" -value "Horizontal" -objects $obj -set_property -name "view.type" -value "Graph" -objects $obj - -# Create 'utilization_2' gadget (if not found) -if {[string equal [get_dashboard_gadgets [ list "utilization_2" ] ] ""]} { -create_dashboard_gadget -name {utilization_2} -type utilization -} -set obj [get_dashboard_gadgets [ list "utilization_2" ] ] -set_property -name "active_reports" -value "" -objects $obj -set_property -name "active_reports_invalid" -value "" -objects $obj -set_property -name "active_run" -value "0" -objects $obj -set_property -name "hide_unused_data" -value "1" -objects $obj -set_property -name "incl_new_reports" -value "0" -objects $obj -set_property -name "reports" -value "impl_1#impl_1_place_report_utilization_0" -objects $obj -set_property -name "run.step" -value "place_design" -objects $obj -set_property -name "run.type" -value "implementation" -objects $obj -set_property -name "statistics.bram" -value "1" -objects $obj -set_property -name "statistics.bufg" -value "1" -objects $obj -set_property -name "statistics.dsp" -value "1" -objects $obj -set_property -name "statistics.ff" -value "1" -objects $obj -set_property -name "statistics.gt" -value "1" -objects $obj -set_property -name "statistics.io" -value "1" -objects $obj -set_property -name "statistics.lut" -value "1" -objects $obj -set_property -name "statistics.lutram" -value "1" -objects $obj -set_property -name "statistics.mmcm" -value "1" -objects $obj -set_property -name "statistics.pcie" -value "1" -objects $obj -set_property -name "statistics.pll" -value "1" -objects $obj -set_property -name "statistics.uram" -value "1" -objects $obj -set_property -name "view.orientation" -value "Horizontal" -objects $obj -set_property -name "view.type" -value "Graph" -objects $obj - -move_dashboard_gadget -name {utilization_1} -row 0 -col 0 -move_dashboard_gadget -name {power_1} -row 1 -col 0 -move_dashboard_gadget -name {drc_1} -row 2 -col 0 -move_dashboard_gadget -name {timing_1} -row 0 -col 1 -move_dashboard_gadget -name {utilization_2} -row 1 -col 1 -move_dashboard_gadget -name {methodology_1} -row 2 -col 1 diff --git a/hw/syn/xilinx/test/project_1_files/kernel.bin.coe b/hw/syn/xilinx/test/project_1_files/kernel.bin.coe deleted file mode 100644 index a316d82b5..000000000 --- a/hw/syn/xilinx/test/project_1_files/kernel.bin.coe +++ /dev/null @@ -1,16386 +0,0 @@ -MEMORY_INITIALIZATION_RADIX=16; -MEMORY_INITIALIZATION_VECTOR= -0, -000000C00000008000000002, -00000003000000020000000100000000, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -00f586b30007a60340d585b300d7073300d787b3002797930027171300f707330207086302e787b3cc5027f30480258304402683040027030000000b008000ef, -00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000008067fef718e300c6a02300478793, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0; diff --git a/hw/syn/xilinx/xrt/Makefile b/hw/syn/xilinx/xrt/Makefile index 38ae29f36..f5997352c 100644 --- a/hw/syn/xilinx/xrt/Makefile +++ b/hw/syn/xilinx/xrt/Makefile @@ -4,7 +4,7 @@ include $(ROOT_DIR)/config.mk ifneq ($(findstring Makefile, $(MAKEFILE_LIST)), Makefile) help: $(ECHO) "Makefile Usage:" - $(ECHO) " make all TARGET= PLATFORM=" + $(ECHO) " make all TARGET= PLATFORM=" $(ECHO) " Command to generate the design for specified Target and Device." $(ECHO) "" $(ECHO) " make clean" @@ -53,6 +53,9 @@ DBG_TRACE_FLAGS += -DDBG_TRACE_PIPELINE DBG_TRACE_FLAGS += -DDBG_TRACE_MEM DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE DBG_TRACE_FLAGS += -DDBG_TRACE_AFU +DBG_TRACE_FLAGS += -DDBG_TRACE_TEX +DBG_TRACE_FLAGS += -DDBG_TRACE_RASTER +DBG_TRACE_FLAGS += -DDBG_TRACE_OM DBG_TRACE_FLAGS += -DDBG_TRACE_GBAR # Control logic analyzer monitors @@ -60,7 +63,6 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_AFU DBG_SCOPE_FLAGS += -DDBG_SCOPE_ISSUE DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU -DBG_SCOPE_FLAGS += -DDBG_SCOPE_MSCHED # cluster configuration CONFIGS_1c := -DNUM_CLUSTERS=1 -DNUM_CORES=1 @@ -72,26 +74,26 @@ CONFIGS_32c := -DNUM_CLUSTERS=2 -DNUM_CORES=16 CONFIGS_64c := -DNUM_CLUSTERS=4 -DNUM_CORES=16 CONFIGS += $(CONFIGS_$(NUM_CORES)c) -# include paths +# include sources +RTL_PKGS = $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv +RTL_PKGS += $(RTL_DIR)/tex/VX_tex_pkg.sv $(RTL_DIR)/raster/VX_raster_pkg.sv $(RTL_DIR)/om/VX_om_pkg.sv FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src + RTL_PKGS += $(THIRD_PARTY_DIR)/cvfpu/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src endif +TEX_INCLUDE = -I$(RTL_DIR)/tex +RASTER_INCLUDE = -I$(RTL_DIR)/raster +OM_INCLUDE = -I$(RTL_DIR)/om RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(AFU_DIR) -RTL_INCLUDE += $(FPU_INCLUDE) +RTL_INCLUDE += $(FPU_INCLUDE) $(TEX_INCLUDE) $(RASTER_INCLUDE) $(OM_INCLUDE) # Kernel compiler global settings VPP_FLAGS += --link --target $(TARGET) --platform $(PLATFORM) --save-temps --no_ip_cache VPP_FLAGS += --vivado.synth.jobs $(JOBS) --vivado.impl.jobs $(JOBS) -ifeq ($(DEV_ARCH), zynquplus) -# ztnq -else ifeq ($(DEV_ARCH), versal) -# versal -else -# alveo -VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:15] -endif +# load platform settings +include $(SRC_DIR)/platforms.mk VPP_FLAGS += --report_level 2 VPP_FLAGS += --config $(SRC_DIR)/vitis.ini @@ -113,12 +115,13 @@ endif # Debugging ifdef DEBUG - VPP_FLAGS += -g --debug.protocol all + VPP_FLAGS += -g --optimize 0 --debug.protocol all ifneq ($(TARGET), hw) VPP_FLAGS += --vivado.prop fileset.sim_1.xsim.elaborate.debug_level=all CFLAGS += -DDEBUG_LEVEL=$(DEBUG) $(DBG_TRACE_FLAGS) else - CFLAGS += -DNDEBUG + VPP_FLAGS += --debug.chipscope vortex_afu_1 + CFLAGS += -DNDEBUG -DCHIPSCOPE $(DBG_SCOPE_FLAGS) endif else VPP_FLAGS += --optimize 3 @@ -128,7 +131,7 @@ endif # Enable scope analyzer ifdef SCOPE CFLAGS += -DSCOPE $(DBG_SCOPE_FLAGS) - SCOPE_JSON += $(BUILD_DIR)/scope.json + SCOPE_JSON += $(BIN_DIR)/scope.json endif # compilation flags @@ -138,7 +141,7 @@ CFLAGS += $(CONFIGS) CFLAGS += $(RTL_INCLUDE) # ast dump flags -XML_CFLAGS = $(filter-out -DSYNTHESIS -DVIVADO, $(CFLAGS)) -I$(DPI_DIR) +XML_CFLAGS = $(filter-out -DSYNTHESIS -DVIVADO, $(CFLAGS)) $(RTL_PKGS) -I$(DPI_DIR) -DSV_DPI # RTL Kernel only supports Hardware and Hardware Emulation. ifneq ($(TARGET),$(findstring $(TARGET), hw hw_emu)) @@ -157,13 +160,13 @@ gen-ast: $(BUILD_DIR)/vortex.xml $(BUILD_DIR)/vortex.xml: mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); verilator --xml-only -O0 $(XML_CFLAGS) vortex_afu.v --xml-output vortex.xml -scope-json: $(BUILD_DIR)/scope.json -$(BUILD_DIR)/scope.json: $(BUILD_DIR)/vortex.xml - mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(SCRIPT_DIR)/scope.py vortex.xml -o scope.json +scope-json: $(BIN_DIR)/scope.json +$(BIN_DIR)/scope.json: $(BUILD_DIR)/vortex.xml + mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(SCRIPT_DIR)/scope.py vortex.xml -o bin/scope.json gen-xo: $(XO_CONTAINER) $(XO_CONTAINER): $(BUILD_DIR)/sources.txt - mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(VIVADO) -mode batch -source $(SRC_DIR)/scripts/gen_xo.tcl -tclargs ../$(XO_CONTAINER) vortex_afu sources.txt $(SCRIPT_DIR) ../$(BUILD_DIR) + mkdir -p $(BUILD_DIR); cd $(BUILD_DIR); $(VIVADO) -mode batch -source $(SRC_DIR)/gen_xo.tcl -tclargs ../$(XO_CONTAINER) vortex_afu sources.txt $(SCRIPT_DIR) ../$(BUILD_DIR) gen-bin: $(XCLBIN_CONTAINER) $(XCLBIN_CONTAINER): $(XO_CONTAINER) $(SCOPE_JSON) @@ -174,17 +177,14 @@ $(BIN_DIR)/emconfig.json: mkdir -p $(BIN_DIR); cd $(BUILD_DIR); emconfigutil --platform $(PLATFORM) --od ../$(BIN_DIR) report: $(XCLBIN_CONTAINER) -ifeq ($(TARGET),$(findstring $(TARGET), hw)) - cp $(BUILD_DIR)/_x/logs/link/syn/ulp_vortex_afu_1_0_synth_1_runme.log $(BUILD_DIR)/bin/runme.log - cp $(BUILD_DIR)/_x/reports/link/imp/impl_1_full_util_routed.rpt $(BUILD_DIR)/bin/synthesis.log - cp $(BUILD_DIR)/_x/reports/link/imp/impl_1_hw_bb_locked_timing_summary_routed.rpt $(BUILD_DIR)/bin/timing.log +ifeq ($(TARGET), hw) + cp $(BUILD_DIR)/_x/logs/link/syn/ulp_vortex_afu_1_0_synth_1_runme.log $(BUILD_DIR)/bin + cp $(BUILD_DIR)/_x/reports/link/syn/ulp_vortex_afu_1_0_synth_1_ulp_vortex_afu_1_0_utilization_synth.rpt $(BUILD_DIR)/bin + cp $(BUILD_DIR)/_x/reports/link/imp/impl_1_hw_bb_locked_timing_summary_routed.rpt $(BUILD_DIR)/bin endif -hwserver: - debug_hw --xvc_pcie /dev/xfpga/xvc_pub.u2305.0 --hw_server & - chipscope: - debug_hw --vivado --host localhost --ltx_file $(BUILD_DIR)/_x/link/vivado/vpl/prj/prj.runs/impl_1/debug_nets.ltx & + debug_hw --vivado --host localhost --ltx_file $(BUILD_DIR)/bin/vortex_afu.ltx & clean: $(RMDIR) $(BUILD_DIR) diff --git a/hw/syn/xilinx/xrt/scripts/gen_xo.tcl b/hw/syn/xilinx/xrt/gen_xo.tcl similarity index 89% rename from hw/syn/xilinx/xrt/scripts/gen_xo.tcl rename to hw/syn/xilinx/xrt/gen_xo.tcl index 0f95f09be..d5b1e41a2 100644 --- a/hw/syn/xilinx/xrt/scripts/gen_xo.tcl +++ b/hw/syn/xilinx/xrt/gen_xo.tcl @@ -1,10 +1,10 @@ # Copyright © 2019-2023 -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -12,7 +12,7 @@ # limitations under the License. if { $::argc != 5 } { - puts "ERROR: Program \"$::argv0\" requires 4 arguments!\n" + puts "ERROR: Program \"$::argv0\" requires 5 arguments!\n" puts "Usage: $::argv0 \n" exit } @@ -31,10 +31,10 @@ if {[file exists "${xoname}"]} { set argv [list ${build_dir}/ip] set argc 1 -source ${script_path}/gen_ip.tcl +source ${tool_dir}/xilinx_ip_gen.tcl set argv [list ${krnl_name} ${vcs_file} ${tool_dir} ${build_dir}] set argc 4 source ${script_path}/package_kernel.tcl -package_xo -xo_path ${xoname} -kernel_name ${krnl_name} -ip_directory "${build_dir}/xo/packaged_kernel" +package_xo -xo_path ${xoname} -kernel_name ${krnl_name} -ip_directory "${build_dir}/xo/packaged_kernel" \ No newline at end of file diff --git a/hw/syn/xilinx/xrt/scripts/package_kernel.tcl b/hw/syn/xilinx/xrt/package_kernel.tcl similarity index 50% rename from hw/syn/xilinx/xrt/scripts/package_kernel.tcl rename to hw/syn/xilinx/xrt/package_kernel.tcl index 607e7955d..ebe767c69 100644 --- a/hw/syn/xilinx/xrt/scripts/package_kernel.tcl +++ b/hw/syn/xilinx/xrt/package_kernel.tcl @@ -1,10 +1,10 @@ # Copyright © 2019-2023 -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,6 +22,11 @@ set vcs_file [lindex $::argv 1] set tool_dir [lindex $::argv 2] set build_dir [lindex $::argv 3] +puts "Using krnl_name=$krnl_name" +puts "Using vcs_file=$vcs_file" +puts "Using tool_dir=$tool_dir" +puts "Using build_dir=$build_dir" + set path_to_packaged "${build_dir}/xo/packaged_kernel" set path_to_tmp_project "${build_dir}/xo/project" @@ -36,14 +41,27 @@ set vdefines_list [lindex $vlist 2] #puts ${vincludes_list} #puts ${vdefines_list} -# find if chipscope is enabled set chipscope 0 +set num_banks 1 +set merged_mem_if 0 + +# parse vdefines_list for configuration parameters foreach def $vdefines_list { set fields [split $def "="] set name [lindex $fields 0] if { $name == "CHIPSCOPE" } { set chipscope 1 } + if { $name == "PLATFORM_MEMORY_BANKS" } { + set num_banks [lindex $fields 1] + } + if { $name == "PLATFORM_MERGED_MEMORY_INTERFACE" } { + set merged_mem_if 1 + } +} + +if { $merged_mem_if == 1 } { + set num_banks 1 } create_project -force kernel_pack $path_to_tmp_project @@ -51,80 +69,76 @@ create_project -force kernel_pack $path_to_tmp_project add_files -norecurse ${vsources_list} set obj [get_filesets sources_1] -set files [list \ +set ip_files [list \ [file normalize "${build_dir}/ip/xil_fdiv/xil_fdiv.xci"] \ [file normalize "${build_dir}/ip/xil_fma/xil_fma.xci"] \ [file normalize "${build_dir}/ip/xil_fsqrt/xil_fsqrt.xci"] \ ] -add_files -verbose -norecurse -fileset $obj $files +add_files -verbose -norecurse -fileset $obj $ip_files set_property include_dirs ${vincludes_list} [current_fileset] -#set_property verilog_define ${vdefines_list} [current_fileset] +set_property verilog_define ${vdefines_list} [current_fileset] set obj [get_filesets sources_1] set_property -verbose -name "top" -value ${krnl_name} -objects $obj if { $chipscope == 1 } { # hw debugging - create_ip -name axis_ila -vendor xilinx.com -library ip -version 1.1 -module_name ila_afu + create_ip -name ila -vendor xilinx.com -library ip -version 6.2 -module_name ila_afu set_property -dict [list CONFIG.C_ADV_TRIGGER {true} \ CONFIG.C_EN_STRG_QUAL {1} \ - CONFIG.C_DATA_DEPTH {4096} \ + CONFIG.C_DATA_DEPTH {8192} \ CONFIG.C_NUM_OF_PROBES {2} \ CONFIG.C_PROBE0_WIDTH {8} \ - CONFIG.C_PROBE1_WIDTH {24} \ + CONFIG.C_PROBE1_WIDTH {64} \ + CONFIG.ALL_PROBE_SAME_MU {false} \ + CONFIG.ALL_PROBE_SAME_MU_CNT {2} \ ] [get_ips ila_afu] generate_target {instantiation_template} [get_files ila_afu.xci] set_property generate_synth_checkpoint false [get_files ila_afu.xci] - create_ip -name axis_ila -vendor xilinx.com -library ip -version 1.1 -module_name ila_fetch + create_ip -name ila -vendor xilinx.com -library ip -version 6.2 -module_name ila_fetch set_property -dict [list CONFIG.C_ADV_TRIGGER {true} \ CONFIG.C_EN_STRG_QUAL {1} \ - CONFIG.C_DATA_DEPTH {4096} \ + CONFIG.C_DATA_DEPTH {8192} \ CONFIG.C_NUM_OF_PROBES {3} \ - CONFIG.C_PROBE0_WIDTH {128} \ - CONFIG.C_PROBE1_WIDTH {128} \ - CONFIG.C_PROBE2_WIDTH {128} \ + CONFIG.C_PROBE0_WIDTH {40} \ + CONFIG.C_PROBE1_WIDTH {80} \ + CONFIG.C_PROBE2_WIDTH {40} \ + CONFIG.ALL_PROBE_SAME_MU {false} \ + CONFIG.ALL_PROBE_SAME_MU_CNT {2} \ ] [get_ips ila_fetch] generate_target {instantiation_template} [get_files ila_fetch.xci] set_property generate_synth_checkpoint false [get_files ila_fetch.xci] - create_ip -name axis_ila -vendor xilinx.com -library ip -version 1.1 -module_name ila_issue + create_ip -name ila -vendor xilinx.com -library ip -version 6.2 -module_name ila_issue set_property -dict [list CONFIG.C_ADV_TRIGGER {true} \ CONFIG.C_EN_STRG_QUAL {1} \ - CONFIG.C_DATA_DEPTH {4096} \ - CONFIG.C_NUM_OF_PROBES {2} \ - CONFIG.C_PROBE0_WIDTH {256} \ - CONFIG.C_PROBE1_WIDTH {128} \ + CONFIG.C_DATA_DEPTH {8192} \ + CONFIG.C_NUM_OF_PROBES {4} \ + CONFIG.C_PROBE0_WIDTH {112} \ + CONFIG.C_PROBE1_WIDTH {112} \ + CONFIG.C_PROBE2_WIDTH {280} \ + CONFIG.C_PROBE3_WIDTH {112} \ + CONFIG.ALL_PROBE_SAME_MU {false} \ + CONFIG.ALL_PROBE_SAME_MU_CNT {2} \ ] [get_ips ila_issue] generate_target {instantiation_template} [get_files ila_issue.xci] set_property generate_synth_checkpoint false [get_files ila_issue.xci] - create_ip -name axis_ila -vendor xilinx.com -library ip -version 1.1 -module_name ila_lsu + create_ip -name ila -vendor xilinx.com -library ip -version 6.2 -module_name ila_lsu set_property -dict [list CONFIG.C_ADV_TRIGGER {true} \ CONFIG.C_EN_STRG_QUAL {1} \ - CONFIG.C_DATA_DEPTH {4096} \ - CONFIG.C_NUM_OF_PROBES {4} \ - CONFIG.C_PROBE0_WIDTH {256} \ - CONFIG.C_PROBE1_WIDTH {128} \ - CONFIG.C_PROBE2_WIDTH {288} \ - CONFIG.C_PROBE3_WIDTH {256} \ + CONFIG.C_DATA_DEPTH {8192} \ + CONFIG.C_NUM_OF_PROBES {3} \ + CONFIG.C_PROBE0_WIDTH {288} \ + CONFIG.C_PROBE1_WIDTH {152} \ + CONFIG.C_PROBE2_WIDTH {72} \ + CONFIG.ALL_PROBE_SAME_MU {false} \ + CONFIG.ALL_PROBE_SAME_MU_CNT {2} \ ] [get_ips ila_lsu] generate_target {instantiation_template} [get_files ila_lsu.xci] set_property generate_synth_checkpoint false [get_files ila_lsu.xci] - - create_ip -name axis_ila -vendor xilinx.com -library ip -version 1.1 -module_name ila_msched - set_property -dict [list CONFIG.C_ADV_TRIGGER {true} \ - CONFIG.C_EN_STRG_QUAL {1} \ - CONFIG.C_DATA_DEPTH {4096} \ - CONFIG.C_NUM_OF_PROBES {4} \ - CONFIG.C_PROBE0_WIDTH {128} \ - CONFIG.C_PROBE1_WIDTH {128} \ - CONFIG.C_PROBE2_WIDTH {128} \ - CONFIG.C_PROBE3_WIDTH {128} \ - ] [get_ips ila_msched] - generate_target {instantiation_template} [get_files ila_msched.xci] - set_property generate_synth_checkpoint false [get_files ila_msched.xci] } update_compile_order -fileset sources_1 @@ -142,7 +156,7 @@ foreach up [ipx::get_user_parameters] { ipx::associate_bus_interfaces -busif s_axi_ctrl -clock ap_clk $core -for {set i 0} {$i < 1} {incr i} { +for {set i 0} {$i < $num_banks} {incr i} { ipx::associate_bus_interfaces -busif m_axi_mem_$i -clock ap_clk $core } @@ -150,96 +164,98 @@ set mem_map [::ipx::add_memory_map -quiet "s_axi_ctrl" $core] set addr_block [::ipx::add_address_block -quiet "reg0" $mem_map] set reg [::ipx::add_register "CTRL" $addr_block] - set_property description "Control signals" $reg - set_property address_offset 0x000 $reg - set_property size 32 $reg +set_property description "Control signals" $reg +set_property address_offset 0x000 $reg +set_property size 32 $reg set field [ipx::add_field AP_START $reg] - set_property ACCESS {read-write} $field - set_property BIT_OFFSET {0} $field - set_property BIT_WIDTH {1} $field - set_property DESCRIPTION {Control signal Register for 'ap_start'.} $field - set_property MODIFIED_WRITE_VALUE {modify} $field +set_property ACCESS {read-write} $field +set_property BIT_OFFSET {0} $field +set_property BIT_WIDTH {1} $field +set_property DESCRIPTION {Control signal Register for 'ap_start'.} $field +set_property MODIFIED_WRITE_VALUE {modify} $field set field [ipx::add_field AP_DONE $reg] - set_property ACCESS {read-only} $field - set_property BIT_OFFSET {1} $field - set_property BIT_WIDTH {1} $field - set_property DESCRIPTION {Control signal Register for 'ap_done'.} $field - set_property READ_ACTION {modify} $field +set_property ACCESS {read-only} $field +set_property BIT_OFFSET {1} $field +set_property BIT_WIDTH {1} $field +set_property DESCRIPTION {Control signal Register for 'ap_done'.} $field +set_property READ_ACTION {modify} $field set field [ipx::add_field AP_IDLE $reg] - set_property ACCESS {read-only} $field - set_property BIT_OFFSET {2} $field - set_property BIT_WIDTH {1} $field - set_property DESCRIPTION {Control signal Register for 'ap_idle'.} $field - set_property READ_ACTION {modify} $field +set_property ACCESS {read-only} $field +set_property BIT_OFFSET {2} $field +set_property BIT_WIDTH {1} $field +set_property DESCRIPTION {Control signal Register for 'ap_idle'.} $field +set_property READ_ACTION {modify} $field set field [ipx::add_field AP_READY $reg] - set_property ACCESS {read-only} $field - set_property BIT_OFFSET {3} $field - set_property BIT_WIDTH {1} $field - set_property DESCRIPTION {Control signal Register for 'ap_ready'.} $field - set_property READ_ACTION {modify} $field +set_property ACCESS {read-only} $field +set_property BIT_OFFSET {3} $field +set_property BIT_WIDTH {1} $field +set_property DESCRIPTION {Control signal Register for 'ap_ready'.} $field +set_property READ_ACTION {modify} $field set field [ipx::add_field RESERVED_1 $reg] - set_property ACCESS {read-only} $field - set_property BIT_OFFSET {4} $field - set_property BIT_WIDTH {3} $field - set_property DESCRIPTION {Reserved. 0s on read.} $field - set_property READ_ACTION {modify} $field +set_property ACCESS {read-only} $field +set_property BIT_OFFSET {4} $field +set_property BIT_WIDTH {3} $field +set_property DESCRIPTION {Reserved. 0s on read.} $field +set_property READ_ACTION {modify} $field set field [ipx::add_field AUTO_RESTART $reg] - set_property ACCESS {read-write} $field - set_property BIT_OFFSET {7} $field - set_property BIT_WIDTH {1} $field - set_property DESCRIPTION {Control signal Register for 'auto_restart'.} $field - set_property MODIFIED_WRITE_VALUE {modify} $field +set_property ACCESS {read-write} $field +set_property BIT_OFFSET {7} $field +set_property BIT_WIDTH {1} $field +set_property DESCRIPTION {Control signal Register for 'auto_restart'.} $field +set_property MODIFIED_WRITE_VALUE {modify} $field set field [ipx::add_field RESERVED_2 $reg] - set_property ACCESS {read-only} $field - set_property BIT_OFFSET {8} $field - set_property BIT_WIDTH {24} $field - set_property DESCRIPTION {Reserved. 0s on read.} $field - set_property READ_ACTION {modify} $field +set_property ACCESS {read-only} $field +set_property BIT_OFFSET {8} $field +set_property BIT_WIDTH {24} $field +set_property DESCRIPTION {Reserved. 0s on read.} $field +set_property READ_ACTION {modify} $field set reg [::ipx::add_register "GIER" $addr_block] - set_property description "Global Interrupt Enable Register" $reg - set_property address_offset 0x004 $reg - set_property size 32 $reg +set_property description "Global Interrupt Enable Register" $reg +set_property address_offset 0x004 $reg +set_property size 32 $reg set reg [::ipx::add_register "IP_IER" $addr_block] - set_property description "IP Interrupt Enable Register" $reg - set_property address_offset 0x008 $reg - set_property size 32 $reg +set_property description "IP Interrupt Enable Register" $reg +set_property address_offset 0x008 $reg +set_property size 32 $reg set reg [::ipx::add_register "IP_ISR" $addr_block] - set_property description "IP Interrupt Status Register" $reg - set_property address_offset 0x00C $reg - set_property size 32 $reg +set_property description "IP Interrupt Status Register" $reg +set_property address_offset 0x00C $reg +set_property size 32 $reg set reg [::ipx::add_register -quiet "DEV" $addr_block] - set_property address_offset 0x010 $reg - set_property size [expr {8*8}] $reg +set_property address_offset 0x010 $reg +set_property size [expr {8*8}] $reg set reg [::ipx::add_register -quiet "ISA" $addr_block] - set_property address_offset 0x01C $reg - set_property size [expr {8*8}] $reg +set_property address_offset 0x018 $reg +set_property size [expr {8*8}] $reg set reg [::ipx::add_register -quiet "DCR" $addr_block] - set_property address_offset 0x028 $reg - set_property size [expr {8*8}] $reg +set_property address_offset 0x020 $reg +set_property size [expr {8*8}] $reg set reg [::ipx::add_register -quiet "SCP" $addr_block] - set_property address_offset 0x034 $reg - set_property size [expr {8*8}] $reg - -for {set i 0} {$i < 1} {incr i} { - set reg [::ipx::add_register -quiet "MEM_$i" $addr_block] - set_property address_offset [expr {0x040 + $i * 12}] $reg - set_property size [expr {8*8}] $reg - set regparam [::ipx::add_register_parameter -quiet {ASSOCIATED_BUSIF} $reg] - set_property value m_axi_mem_$i $regparam +set_property address_offset 0x028 $reg +set_property size [expr {8*8}] $reg + +for {set i 0} {$i < $num_banks} {incr i} { +# Add register for each memory bank +set reg [::ipx::add_register -quiet "MEM_$i" $addr_block] +set_property address_offset [expr {0x30 + $i * 8}] $reg +set_property size [expr {8*8}] $reg +# Associate the bus interface +set regparam [::ipx::add_register_parameter ASSOCIATED_BUSIF $reg] +set_property value m_axi_mem_$i $regparam } set_property slave_memory_map_ref "s_axi_ctrl" [::ipx::get_bus_interfaces -of $core "s_axi_ctrl"] diff --git a/hw/syn/xilinx/xrt/platforms.mk b/hw/syn/xilinx/xrt/platforms.mk new file mode 100644 index 000000000..5a9a88e4d --- /dev/null +++ b/hw/syn/xilinx/xrt/platforms.mk @@ -0,0 +1,35 @@ +# Platform specific configurations +# Add your platform specific configurations here + +CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=512 + +ifeq ($(DEV_ARCH), zynquplus) +# zynquplus +CONFIGS += -DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32 +else ifeq ($(DEV_ARCH), versal) +# versal +CONFIGS += -DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32 +ifneq ($(findstring xilinx_vck5000,$(XSA)),) + CONFIGS += -DPLATFORM_MEMORY_OFFSET=40'hC000000000 +endif +else +# alveo +ifneq ($(findstring xilinx_u55c,$(XSA)),) + CONFIGS += -DPLATFORM_MEMORY_BANKS=32 -DPLATFORM_MEMORY_ADDR_WIDTH=29 + CONFIGS += -DPLATFORM_MERGED_MEMORY_INTERFACE + VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:31] + #VPP_FLAGS += $(foreach i,$(shell seq 0 31), --connectivity.sp vortex_afu_1.m_axi_mem_$(i):HBM[$(i)]) +else ifneq ($(findstring xilinx_u50,$(XSA)),) + CONFIGS += -DPLATFORM_MEMORY_BANKS=32 -DPLATFORM_MEMORY_ADDR_WIDTH=28 + VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:31] +else ifneq ($(findstring xilinx_u280,$(XSA)),) + CONFIGS += -DPLATFORM_MEMORY_BANKS=32 -DPLATFORM_MEMORY_ADDR_WIDTH=28 + VPP_FLAGS += --connectivity.sp vortex_afu_1.m_axi_mem_0:HBM[0:31] +else ifneq ($(findstring xilinx_u250,$(XSA)),) + CONFIGS += -DPLATFORM_MEMORY_BANKS=4 -DPLATFORM_MEMORY_ADDR_WIDTH=34 +else ifneq ($(findstring xilinx_u200,$(XSA)),) + CONFIGS += -DPLATFORM_MEMORY_BANKS=4 -DPLATFORM_MEMORY_ADDR_WIDTH=34 +else + CONFIGS += -DPLATFORM_MEMORY_BANKS=1 -DPLATFORM_MEMORY_ADDR_WIDTH=32 +endif +endif diff --git a/hw/syn/yosys/Makefile b/hw/syn/yosys/Makefile index 80bfdae02..a09d9198d 100644 --- a/hw/syn/yosys/Makefile +++ b/hw/syn/yosys/Makefile @@ -29,7 +29,7 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_AFU DBG_SCOPE_FLAGS += -DDBG_SCOPE_ISSUE DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU -DBG_SCOPE_FLAGS += -DDBG_SCOPE_MSCHED + # cluster configuration CONFIGS_1c := -DNUM_CLUSTERS=1 -DNUM_CORES=1 @@ -44,7 +44,7 @@ CONFIGS += $(CONFIGS_$(NUM_CORES)c) # include paths FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src + FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src endif RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache RTL_INCLUDE += $(FPU_INCLUDE) diff --git a/hw/syn/yosys/synth.sh b/hw/syn/yosys/synth.sh index 79708b189..b44f16e6b 100755 --- a/hw/syn/yosys/synth.sh +++ b/hw/syn/yosys/synth.sh @@ -20,13 +20,15 @@ # exit when any command fails set -e +library="" +sdc_file="" source="" top_level="" dir_list=() inc_args="" macro_args="" no_warnings=1 -process="elaborate,netlist,techmap,verilog" +process="elaborate,netlist,techmap,verilog,link" declare -a excluded_warnings=("Resizing cell port") @@ -66,8 +68,14 @@ checkErrors() usage() { echo "$0 usage:" && grep " .)\ #" $0; exit 0; } [ $# -eq 0 ] && usage -while getopts "s:t:I:D:P:Wh" arg; do +while getopts "c:l:s:t:I:D:P:Wh" arg; do case $arg in + l) # library + library=${OPTARG} + ;; + c) # SDC constraints + sdc_file=${OPTARG} + ;; s) # source source=${OPTARG} ;; @@ -95,6 +103,16 @@ while getopts "s:t:I:D:P:Wh" arg; do done { + # read device library + if [ -n "$library" ]; then + echo "read_liberty $library" + fi + + # read design constraints + if [ -n "$sdc_file" ]; then + echo "read_sdc $sdc_file" + fi + # read design sources for dir in "${dir_list[@]}" do @@ -117,6 +135,11 @@ done echo "synth -top $top_level" fi + # link design + if echo "$process" | grep -q "link"; then + echo "link_design -top $top_level" + fi + # convert to netlist if echo "$process" | grep -q "netlist"; then echo "proc; opt" diff --git a/hw/unittest/Makefile b/hw/unittest/Makefile index 5722ec9bc..f37d6ae1b 100644 --- a/hw/unittest/Makefile +++ b/hw/unittest/Makefile @@ -5,6 +5,8 @@ all: $(MAKE) -C cache_top $(MAKE) -C core_top $(MAKE) -C issue_top + $(MAKE) -C local_mem_top + $(MAKE) -C mem_unit_top run: $(MAKE) -C cache run @@ -13,6 +15,8 @@ run: $(MAKE) -C cache_top run $(MAKE) -C core_top run $(MAKE) -C issue_top run + $(MAKE) -C local_mem_top run + $(MAKE) -C mem_unit_top run clean: $(MAKE) -C cache clean @@ -20,4 +24,6 @@ clean: $(MAKE) -C mem_streamer clean $(MAKE) -C cache_top clean $(MAKE) -C core_top clean - $(MAKE) -C issue_top clean \ No newline at end of file + $(MAKE) -C issue_top clean + $(MAKE) -C local_mem_top clean + $(MAKE) -C mem_unit_top clean \ No newline at end of file diff --git a/hw/unittest/common.mk b/hw/unittest/common.mk index 48aefd415..71f6914bf 100644 --- a/hw/unittest/common.mk +++ b/hw/unittest/common.mk @@ -25,7 +25,7 @@ VL_FLAGS += $(RTL_PKGS) VL_FLAGS += --cc $(TOP) --top-module $(TOP) # Enable Verilator multithreaded simulation -THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())') +THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(mp.cpu_count())') VL_FLAGS += -j $(THREADS) #VL_FLAGS += --threads $(THREADS) diff --git a/hw/unittest/core_top/Makefile b/hw/unittest/core_top/Makefile index d9fbf40f6..f9d037999 100644 --- a/hw/unittest/core_top/Makefile +++ b/hw/unittest/core_top/Makefile @@ -16,7 +16,7 @@ SRCS += $(SRC_DIR)/main.cpp DBG_TRACE_FLAGS := -DDBG_TRACE_CACHE -RTL_PKGS := $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv $(RTL_DIR)/core/VX_trace_pkg.sv +RTL_PKGS := $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs RTL_INCLUDE += -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/fpu -I$(RTL_DIR)/core diff --git a/hw/unittest/issue_top/Makefile b/hw/unittest/issue_top/Makefile index 7e298849c..b6a8b0527 100644 --- a/hw/unittest/issue_top/Makefile +++ b/hw/unittest/issue_top/Makefile @@ -16,7 +16,7 @@ SRCS += $(SRC_DIR)/main.cpp DBG_TRACE_FLAGS := -DDBG_TRACE_CACHE -RTL_PKGS := $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/core/VX_trace_pkg.sv +RTL_PKGS := $(RTL_DIR)/VX_gpu_pkg.sv RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs RTL_INCLUDE += -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/core diff --git a/hw/unittest/local_mem_top/Makefile b/hw/unittest/local_mem_top/Makefile new file mode 100644 index 000000000..22a8adfae --- /dev/null +++ b/hw/unittest/local_mem_top/Makefile @@ -0,0 +1,26 @@ +ROOT_DIR := $(realpath ../../..) +include $(ROOT_DIR)/config.mk + +PROJECT := local_mem_top + +RTL_DIR := $(VORTEX_HOME)/hw/rtl +DPI_DIR := $(VORTEX_HOME)/hw/dpi + +SRC_DIR := $(VORTEX_HOME)/hw/unittest/$(PROJECT) + +CXXFLAGS := -I$(SRC_DIR) -I$(VORTEX_HOME)/hw/unittest/common -I$(VORTEX_HOME)/sim/common +CXXFLAGS += -I$(ROOT_DIR)/hw + +SRCS := $(DPI_DIR)/util_dpi.cpp +SRCS += $(SRC_DIR)/main.cpp + +DBG_TRACE_FLAGS := + +RTL_PKGS := $(RTL_DIR)/VX_gpu_pkg.sv + +RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs +RTL_INCLUDE += -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem + +TOP := VX_local_mem_top + +include ../common.mk \ No newline at end of file diff --git a/hw/unittest/local_mem_top/main.cpp b/hw/unittest/local_mem_top/main.cpp new file mode 100644 index 000000000..5191b4433 --- /dev/null +++ b/hw/unittest/local_mem_top/main.cpp @@ -0,0 +1,49 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "vl_simulator.h" + +#ifndef TRACE_START_TIME +#define TRACE_START_TIME 0ull +#endif + +#ifndef TRACE_STOP_TIME +#define TRACE_STOP_TIME -1ull +#endif + +static uint64_t timestamp = 0; +static bool trace_enabled = false; +static uint64_t trace_start_time = TRACE_START_TIME; +static uint64_t trace_stop_time = TRACE_STOP_TIME; + +double sc_time_stamp() { + return timestamp; +} + +bool sim_trace_enabled() { + if (timestamp >= trace_start_time + && timestamp < trace_stop_time) + return true; + return trace_enabled; +} + +void sim_trace_enable(bool enable) { + trace_enabled = enable; +} + +int main(int argc, char **argv) { + // Initialize Verilators variables + Verilated::commandArgs(argc, argv); + + return 0; +} \ No newline at end of file diff --git a/hw/unittest/mem_unit_top/Makefile b/hw/unittest/mem_unit_top/Makefile new file mode 100644 index 000000000..8809551f4 --- /dev/null +++ b/hw/unittest/mem_unit_top/Makefile @@ -0,0 +1,26 @@ +ROOT_DIR := $(realpath ../../..) +include $(ROOT_DIR)/config.mk + +PROJECT := mem_unit_top + +RTL_DIR := $(VORTEX_HOME)/hw/rtl +DPI_DIR := $(VORTEX_HOME)/hw/dpi + +SRC_DIR := $(VORTEX_HOME)/hw/unittest/$(PROJECT) + +CXXFLAGS := -I$(SRC_DIR) -I$(VORTEX_HOME)/hw/unittest/common -I$(VORTEX_HOME)/sim/common +CXXFLAGS += -I$(ROOT_DIR)/hw + +SRCS := $(DPI_DIR)/util_dpi.cpp +SRCS += $(SRC_DIR)/main.cpp + +DBG_TRACE_FLAGS := + +RTL_PKGS := $(RTL_DIR)/VX_gpu_pkg.sv + +RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs +RTL_INCLUDE += -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/core -I$(RTL_DIR)/fpu + +TOP := VX_mem_unit_top + +include ../common.mk \ No newline at end of file diff --git a/hw/unittest/mem_unit_top/main.cpp b/hw/unittest/mem_unit_top/main.cpp new file mode 100644 index 000000000..5191b4433 --- /dev/null +++ b/hw/unittest/mem_unit_top/main.cpp @@ -0,0 +1,49 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "vl_simulator.h" + +#ifndef TRACE_START_TIME +#define TRACE_START_TIME 0ull +#endif + +#ifndef TRACE_STOP_TIME +#define TRACE_STOP_TIME -1ull +#endif + +static uint64_t timestamp = 0; +static bool trace_enabled = false; +static uint64_t trace_start_time = TRACE_START_TIME; +static uint64_t trace_stop_time = TRACE_STOP_TIME; + +double sc_time_stamp() { + return timestamp; +} + +bool sim_trace_enabled() { + if (timestamp >= trace_start_time + && timestamp < trace_stop_time) + return true; + return trace_enabled; +} + +void sim_trace_enable(bool enable) { + trace_enabled = enable; +} + +int main(int argc, char **argv) { + // Initialize Verilators variables + Verilated::commandArgs(argc, argv); + + return 0; +} \ No newline at end of file diff --git a/kernel/Makefile b/kernel/Makefile index 16d279fa0..201ebc200 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -32,10 +32,6 @@ CFLAGS += -O3 -mcmodel=medany -fno-exceptions -fdata-sections -ffunction-section CFLAGS += -I$(INC_DIR) -I$(ROOT_DIR)/hw CFLAGS += -DXLEN_$(XLEN) -ifeq ($(VM_ENABLE), 1) -CFLAGS += -DVM_ENABLE -endif - PROJECT := libvortex SRCS = $(SRC_DIR)/vx_start.S $(SRC_DIR)/vx_syscalls.c $(SRC_DIR)/vx_print.S $(SRC_DIR)/tinyprintf.c $(SRC_DIR)/vx_print.c $(SRC_DIR)/vx_spawn.c $(SRC_DIR)/vx_serial.S $(SRC_DIR)/vx_perf.c diff --git a/kernel/include/vx_intrinsics.h b/kernel/include/vx_intrinsics.h index 6000065e9..5d16d44da 100644 --- a/kernel/include/vx_intrinsics.h +++ b/kernel/include/vx_intrinsics.h @@ -221,6 +221,24 @@ inline void vx_fence() { __asm__ volatile ("fence iorw, iorw"); } +//Matrix load +inline void vx_matrix_load(unsigned dest, unsigned addr) +{ + asm volatile (".insn i 0x7b, 0, x0, %0(%1)" :: "i"(dest), "r"(addr)); +} + +//Matrix Store +inline void vx_matrix_store(unsigned addr) +{ + asm volatile (".insn i 0x7b, 1, x0, 0(%0)" :: "r"(addr)); +} + +//Matrix Mul +inline void vx_matrix_mul() +{ + asm volatile (".insn i 0x7b, 2, x0, 0(x0)"); +} + #ifdef __cplusplus } #endif diff --git a/kernel/scripts/vxbin.py b/kernel/scripts/vxbin.py index 501d8949a..1dcd6a099 100755 --- a/kernel/scripts/vxbin.py +++ b/kernel/scripts/vxbin.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Copyright 2019-2023 # diff --git a/miscs/docker/Dockerfile.ubuntu b/miscs/docker/Dockerfile.ubuntu index c3e72a0f4..64bb5813d 100644 --- a/miscs/docker/Dockerfile.ubuntu +++ b/miscs/docker/Dockerfile.ubuntu @@ -21,7 +21,6 @@ ARG DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y \ software-properties-common \ build-essential \ - python \ python3 \ git \ wget \ @@ -39,7 +38,7 @@ RUN git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git /v WORKDIR /vortex # install system dependencies -RUN ./ci/system_updates.sh +RUN ./ci/install_dependencies.sh # Configure the build folder RUN mkdir build && cd build && ../configure diff --git a/runtime/common/common.h b/runtime/common/common.h index 62a807904..b52d41058 100644 --- a/runtime/common/common.h +++ b/runtime/common/common.h @@ -13,11 +13,12 @@ #pragma once +#include #include #include #include #include -#include +#include #include #include diff --git a/runtime/common/scope.cpp b/runtime/common/scope.cpp index 33b13cab4..8f8670944 100644 --- a/runtime/common/scope.cpp +++ b/runtime/common/scope.cpp @@ -28,7 +28,11 @@ #include #include -#define FRAME_FLUSH_SIZE 100 +#define SAMPLE_FLUSH_SIZE 100 + +#define TIMEOUT_TIME (60*60) + +#define MAX_DELAY_CYCLES 10000 #define MMIO_SCOPE_READ (AFU_IMAGE_MMIO_SCOPE_READ * 4) #define MMIO_SCOPE_WRITE (AFU_IMAGE_MMIO_SCOPE_WRITE * 4) @@ -39,6 +43,7 @@ #define CMD_GET_DATA 3 #define CMD_SET_START 4 #define CMD_SET_STOP 5 +#define CMD_SET_DEPTH 6 #define CHECK_ERR(_expr) \ do { \ @@ -58,8 +63,8 @@ struct tap_signal_t { struct tap_t { uint32_t id; uint32_t width; - uint32_t frames; - uint32_t cur_frame; + uint32_t samples; + uint32_t cur_sample; uint64_t cycle_time; std::string path; std::vector signals; @@ -67,6 +72,10 @@ struct tap_t { static scope_callback_t g_callback; +static bool g_running = false; + +static std::mutex g_stop_mutex; + using json = nlohmann::json; static std::vector split(const std::string &s, char delimiter) { @@ -90,7 +99,7 @@ static void dump_module(std::ofstream& ofs, auto itt = tails.find(name); if (itt != tails.end()) { for (auto& signal : itt->second->signals) { - ofs << indent << " $var reg " << signal.width << " " << signal.id << " " << signal.name << " $end" << std::endl; + ofs << indent << " $var wire " << signal.width << " " << signal.id << " " << signal.name << " $end" << std::endl; } } @@ -108,7 +117,7 @@ static void dump_header(std::ofstream& ofs, std::vector& taps) { ofs << "$version Generated by Vortex Scope Analyzer $end" << std::endl; ofs << "$timescale 1 ns $end" << std::endl; ofs << "$scope module TOP $end" << std::endl; - ofs << " $var reg 1 0 clk $end" << std::endl; + ofs << " $var wire 1 0 clk $end" << std::endl; std::unordered_map> hierarchy; std::unordered_set heads; @@ -135,22 +144,33 @@ static void dump_header(std::ofstream& ofs, std::vector& taps) { ofs << "enddefinitions $end" << std::endl; } -static tap_t* find_nearest_tap(std::vector& taps) { - tap_t* nearest = nullptr; +// return the earliest tap that has data to dump +static tap_t* find_earliest_tap(std::vector& taps) { + tap_t* earliest = nullptr; for (auto& tap : taps) { - if (tap.cur_frame == tap.frames) - continue; - if (nearest != nullptr) { - if (tap.cycle_time < nearest->cycle_time) - nearest = &tap; + if (tap.samples == 0) + continue; // skip empty taps + if (tap.cur_sample == tap.samples) + continue; // skip finished taps + if (earliest != nullptr) { + if (tap.cycle_time < earliest->cycle_time) + earliest = &tap; } else { - nearest = &tap; + earliest = &tap; } } - return nearest; + return earliest; } -static uint64_t advance_time(std::ofstream& ofs, uint64_t next_time, uint64_t cur_time) { +static uint64_t advance_clock(std::ofstream& ofs, uint64_t cur_time, uint64_t next_time) { + uint64_t delta = next_time - cur_time; + if (delta > MAX_DELAY_CYCLES) { + ofs << '#' << (cur_time * 2 + 0) << std::endl; + ofs << "bx 0" << std::endl; + ofs << '#' << (cur_time * 2 + 1) << std::endl; + ofs << "bx 0" << std::endl; + cur_time = next_time - MAX_DELAY_CYCLES; + } while (cur_time < next_time) { ofs << '#' << (cur_time * 2 + 0) << std::endl; ofs << "b0 0" << std::endl; @@ -163,7 +183,7 @@ static uint64_t advance_time(std::ofstream& ofs, uint64_t next_time, uint64_t cu static int dump_tap(std::ofstream& ofs, tap_t* tap, vx_device_h hdevice) { uint32_t signal_offset = 0; - uint32_t frame_offset = 0; + uint32_t sample_offset = 0; uint64_t word; std::vector signal_data(tap->width); @@ -176,24 +196,24 @@ static int dump_tap(std::ofstream& ofs, tap_t* tap, vx_device_h hdevice) { CHECK_ERR(g_callback.registerWrite(hdevice, cmd_data)); CHECK_ERR(g_callback.registerRead(hdevice, &word)); do { - uint32_t word_offset = frame_offset % 64; + uint32_t word_offset = sample_offset % 64; signal_data[signal_width - signal_offset - 1] = ((word >> word_offset) & 0x1) ? '1' : '0'; ++signal_offset; - ++frame_offset; + ++sample_offset; if (signal_offset == signal_width) { signal_data[signal_width] = 0; // string null termination ofs << 'b' << signal_data.data() << ' ' << signal_it->id << std::endl; - if (frame_offset == tap->width) { - // end-of-frame - ++tap->cur_frame; - if (tap->cur_frame != tap->frames) { + if (sample_offset == tap->width) { + // end-of-sample + ++tap->cur_sample; + if (tap->cur_sample != tap->samples) { // read next delta CHECK_ERR(g_callback.registerWrite(hdevice, cmd_data)); CHECK_ERR(g_callback.registerRead(hdevice, &word)); tap->cycle_time += 1 + word; - if (0 == (tap->cur_frame % FRAME_FLUSH_SIZE)) { + if (0 == (tap->cur_sample % SAMPLE_FLUSH_SIZE)) { ofs << std::flush; - std::cout << std::dec << "[SCOPE] flush tap #" << tap->id << ": "<< tap->cur_frame << "/" << tap->frames << " frames, next_time=" << tap->cycle_time << std::endl; + std::cout << std::dec << "[SCOPE] flush tap #" << tap->id << ": "<< tap->cur_sample << "/" << tap->samples << " samples, next_time=" << tap->cycle_time << std::endl; } } break; @@ -202,8 +222,8 @@ static int dump_tap(std::ofstream& ofs, tap_t* tap, vx_device_h hdevice) { ++signal_it; signal_width = signal_it->width; } - } while ((frame_offset % 64) != 0); - } while (frame_offset != tap->width); + } while ((sample_offset % 64) != 0); + } while (sample_offset != tap->width); return 0; } @@ -241,6 +261,20 @@ int vx_scope_start(scope_callback_t* callback, vx_device_h hdevice, uint64_t sta } } + // setup capture size + const char* capture_size_env = std::getenv("SCOPE_DEPTH"); + if (capture_size_env != nullptr) { + std::stringstream ss(capture_size_env); + uint32_t capture_size; + if (ss >> capture_size) { + for (auto& tap : json_obj["taps"]) { + auto id = tap["id"].get(); + uint64_t cmd_depth = (capture_size << 11) | (id << 3) | CMD_SET_DEPTH; + CHECK_ERR(g_callback.registerWrite(hdevice, cmd_depth)); + } + } + } + // set stop time if (stop_time != uint64_t(-1)) { std::cout << "[SCOPE] stop time: " << std::dec << stop_time << "s" << std::endl; @@ -261,13 +295,39 @@ int vx_scope_start(scope_callback_t* callback, vx_device_h hdevice, uint64_t sta } } + g_running = true; + + // create auto-stop thread + uint32_t timeout_time = TIMEOUT_TIME; + const char* env_timeout = std::getenv("SCOPE_TIMEOUT"); + if (env_timeout != nullptr) { + std::stringstream ss(env_timeout); + uint32_t env_value; + if (ss >> env_value) { + timeout_time = env_value; + std::cout << "[SCOPE] timeout time=" << env_value << std::endl; + } + } + std::thread([hdevice, timeout_time]() { + std::this_thread::sleep_for(std::chrono::seconds(timeout_time)); + std::cout << "[SCOPE] auto-stop timeout!" << std::endl; + vx_scope_stop(hdevice); + }).detach(); + return 0; } int vx_scope_stop(vx_device_h hdevice) { + std::lock_guard lock(g_stop_mutex); + if (nullptr == hdevice) return -1; + if (!g_running) + return 0; + + g_running = false; + std::vector taps; { @@ -285,8 +345,8 @@ int vx_scope_stop(vx_device_h hdevice) { _tap.width = tap["width"].get(); _tap.path = tap["path"].get(); _tap.cycle_time = 0; - _tap.frames = 0; - _tap.cur_frame = 0; + _tap.samples = 0; + _tap.cur_sample = 0; for (auto& signal : tap["signals"]) { auto name = signal[0].get(); @@ -299,19 +359,15 @@ int vx_scope_stop(vx_device_h hdevice) { } } - // stop recording + std::cout << "[SCOPE] stop recording..." << std::endl; + for (auto& tap : taps) { uint64_t cmd_stop = (0 << 11) | (tap.id << 3) | CMD_SET_STOP; CHECK_ERR(g_callback.registerWrite(hdevice, cmd_stop)); } - std::cout << "[SCOPE] trace dump begin..." << std::endl; - - std::ofstream ofs("scope.vcd"); - - dump_header(ofs, taps); + std::cout << "[SCOPE] load trace info..." << std::endl; - // load trace info for (auto& tap : taps) { uint64_t count, start, delta; @@ -319,39 +375,51 @@ int vx_scope_stop(vx_device_h hdevice) { uint64_t cmd_count = (tap.id << 3) | CMD_GET_COUNT; CHECK_ERR(g_callback.registerWrite(hdevice, cmd_count)); CHECK_ERR(g_callback.registerRead(hdevice, &count)); + if (count == 0) + continue; // get start uint64_t cmd_start = (tap.id << 3) | CMD_GET_START; CHECK_ERR(g_callback.registerWrite(hdevice, cmd_start)); CHECK_ERR(g_callback.registerRead(hdevice, &start)); - // get data + // get delta uint64_t cmd_data = (tap.id << 3) | CMD_GET_DATA; CHECK_ERR(g_callback.registerWrite(hdevice, cmd_data)); CHECK_ERR(g_callback.registerRead(hdevice, &delta)); - tap.frames = count; + tap.samples = count; tap.cycle_time = 1 + start + delta; std::cout << std::dec << "[SCOPE] tap #" << tap.id << ": width=" << tap.width - << ", num_frames=" << tap.frames + << ", num_samples=" << tap.samples << ", start_time=" << tap.cycle_time << ", path=" << tap.path << std::endl; } - uint64_t cur_time = 0; + std::cout << "[SCOPE] dump header..." << std::endl; - while (true) { - // find the nearest tap - auto tap = find_nearest_tap(taps); - if (tap == nullptr) - break; + std::ofstream ofs("scope.vcd"); + + dump_header(ofs, taps); + + std::cout << "[SCOPE] dump taps..." << std::endl; + + uint64_t cur_time = 0; + auto tap = find_earliest_tap(taps); + if (tap != nullptr) { + do { + // advance clock + cur_time = advance_clock(ofs, cur_time, tap->cycle_time); + // dump tap + CHECK_ERR(dump_tap(ofs, tap, hdevice)); + // find the nearest tap + tap = find_earliest_tap(taps); + } while (tap != nullptr); // advance clock - cur_time = advance_time(ofs, tap->cycle_time, cur_time); - // dump tap - CHECK_ERR(dump_tap(ofs, tap, hdevice)); - }; + advance_clock(ofs, cur_time, cur_time + 1); + } std::cout << "[SCOPE] trace dump done! - " << (cur_time/2) << " cycles" << std::endl; diff --git a/runtime/include/vortex.h b/runtime/include/vortex.h index 0446e8f5d..4f1c93418 100644 --- a/runtime/include/vortex.h +++ b/runtime/include/vortex.h @@ -35,6 +35,9 @@ typedef void* vx_buffer_h; #define VX_CAPS_LOCAL_MEM_SIZE 0x6 #define VX_CAPS_ISA_FLAGS 0x7 #define VX_CAPS_NUM_MEM_BANKS 0x8 +#define VX_CAPS_MEM_BANK_SIZE 0x9 +#define VX_CAPS_TC_SIZE 0xA +#define VX_CAPS_TC_NUM 0xB // device isa flags #define VX_ISA_STD_A (1ull << ISA_STD_A) diff --git a/runtime/opae/Makefile b/runtime/opae/Makefile index 1a9810eca..04545c887 100644 --- a/runtime/opae/Makefile +++ b/runtime/opae/Makefile @@ -1,3 +1,4 @@ +ROOT_DIR := $(realpath ../..) include ../common.mk TARGET ?= opaesim @@ -8,8 +9,8 @@ SYN_DIR := $(HW_DIR)/syn/altera/opae SRC_DIR := $(VORTEX_HOME)/runtime/opae -CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors -CXXFLAGS += -I$(INC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw -I$(DESTDIR) +CXXFLAGS += -std=c++17 -Wall -Wextra -pedantic -Wfatal-errors +CXXFLAGS += -I$(INC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw -I$(DESTDIR) -I$(SIM_DIR)/common CXXFLAGS += -DXLEN_$(XLEN) # Position independent code @@ -24,10 +25,11 @@ SRCS = $(SRC_DIR)/vortex.cpp $(SRC_DIR)/driver.cpp # set up target types ifeq ($(TARGET), opaesim) - OPAESIM = $(DESTDIR)/libopae-c-sim.so - CXXFLAGS += -I$(SIM_DIR)/opaesim + BUILD_DEPS = $(DESTDIR)/libopae-c-sim.so + CXXFLAGS += -DOPAESIM -I$(SIM_DIR)/opaesim else - CXXFLAGS += -I$(SYN_DIR) + BUILD_DEPS = $(ROOT_DIR)/hw/syn/altera/opae/vortex_afu.h + CXXFLAGS += -I$(SYN_DIR) -I$(ROOT_DIR)/hw/syn/altera/opae endif # Debugging @@ -47,12 +49,15 @@ PROJECT := libvortex-opae.so all: $(DESTDIR)/$(PROJECT) +$(ROOT_DIR)/hw/syn/altera/opae/vortex_afu.h: + $(MAKE) -C $(ROOT_DIR)/hw/syn/altera/opae swconfig + driver: $(DESTDIR)/libopae-c-sim.so $(DESTDIR)/libopae-c-sim.so: DESTDIR=$(DESTDIR) $(MAKE) -C $(ROOT_DIR)/sim/opaesim $(DESTDIR)/libopae-c-sim.so -$(DESTDIR)/$(PROJECT): $(SRCS) $(OPAESIM) +$(DESTDIR)/$(PROJECT): $(SRCS) $(BUILD_DEPS) $(CXX) $(CXXFLAGS) $(SRCS) $(LDFLAGS) -o $@ clean-driver: diff --git a/runtime/opae/driver.h b/runtime/opae/driver.h index 0d1d4daa7..0a45b6f67 100644 --- a/runtime/opae/driver.h +++ b/runtime/opae/driver.h @@ -13,7 +13,11 @@ #pragma once +#ifdef OPAESIM #include +#else +#include +#endif typedef fpga_result (*pfn_fpgaGetProperties)(fpga_token token, fpga_properties *prop); typedef fpga_result (*pfn_fpgaPropertiesSetObjectType)(fpga_properties prop, fpga_objtype objtype); diff --git a/runtime/opae/vortex.cpp b/runtime/opae/vortex.cpp index 06458fa1f..38ee514ab 100755 --- a/runtime/opae/vortex.cpp +++ b/runtime/opae/vortex.cpp @@ -163,11 +163,6 @@ class vx_device { }); { - // retrieve FPGA global memory size - CHECK_FPGA_ERR(api_.fpgaPropertiesGetLocalMemorySize(filter, &global_mem_size_), { - global_mem_size_ = GLOBAL_MEM_SIZE; - }); - // Load ISA CAPS CHECK_FPGA_ERR(api_.fpgaReadMMIO64(fpga_, 0, MMIO_ISA_CAPS, &isa_caps_), { api_.fpgaClose(fpga_); @@ -179,6 +174,12 @@ class vx_device { api_.fpgaClose(fpga_); return -1; }); + + // Determine global memory size + uint64_t num_banks, bank_size; + this->get_caps(VX_CAPS_NUM_MEM_BANKS, &num_banks); + this->get_caps(VX_CAPS_MEM_BANK_SIZE, &bank_size); + global_mem_size_ = num_banks * bank_size; } #ifdef SCOPE @@ -194,11 +195,10 @@ class vx_device { return device->api_.fpgaReadMMIO64(device->fpga_, 0, MMIO_SCOPE_READ, value); }; - int ret = vx_scope_start(&callback, this, 0, -1); - if (ret != 0) { + CHECK_ERR(vx_scope_start(&callback, this, -1, -1), { api_.fpgaClose(fpga_); - return ret; - } + return err; + }); } #endif return 0; @@ -206,7 +206,6 @@ class vx_device { int get_caps(uint32_t caps_id, uint64_t * value) { uint64_t _value; - switch (caps_id) { case VX_CAPS_VERSION: _value = (dev_caps_ >> 0) & 0xff; @@ -227,13 +226,16 @@ class vx_device { _value = global_mem_size_; break; case VX_CAPS_LOCAL_MEM_SIZE: - _value = 1ull << ((dev_caps_ >> 48) & 0xff); + _value = 1ull << ((dev_caps_ >> 40) & 0xff); break; case VX_CAPS_ISA_FLAGS: _value = isa_caps_; break; case VX_CAPS_NUM_MEM_BANKS: - _value = MEMORY_BANKS; + _value = 1 << ((dev_caps_ >> 48) & 0x7); + break; + case VX_CAPS_MEM_BANK_SIZE: + _value = 1ull << (20 + ((dev_caps_ >> 51) & 0x1f)); break; default: fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id); diff --git a/runtime/rtlsim/Makefile b/runtime/rtlsim/Makefile index f6adbf8c8..a7b15d9ac 100644 --- a/runtime/rtlsim/Makefile +++ b/runtime/rtlsim/Makefile @@ -4,7 +4,7 @@ DESTDIR ?= $(CURDIR)/.. SRC_DIR := $(VORTEX_HOME)/runtime/rtlsim -CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors +CXXFLAGS += -std=c++17 -Wall -Wextra -pedantic -Wfatal-errors CXXFLAGS += -I$(INC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw -I$(SIM_DIR)/rtlsim -I$(COMMON_DIR) -I$(SIM_DIR)/common CXXFLAGS += -DXLEN_$(XLEN) diff --git a/runtime/rtlsim/vortex.cpp b/runtime/rtlsim/vortex.cpp index 91df7f7e8..7ba7f9471 100644 --- a/runtime/rtlsim/vortex.cpp +++ b/runtime/rtlsim/vortex.cpp @@ -80,6 +80,9 @@ class vx_device { case VX_CAPS_NUM_MEM_BANKS: _value = MEMORY_BANKS; break; + case VX_CAPS_MEM_BANK_SIZE: + _value = 1ull << (MEM_ADDR_WIDTH / MEMORY_BANKS); + break; default: std::cout << "invalid caps id: " << caps_id << std::endl; std::abort(); diff --git a/runtime/simx/Makefile b/runtime/simx/Makefile index 31ab483e7..9480f5b6a 100644 --- a/runtime/simx/Makefile +++ b/runtime/simx/Makefile @@ -4,16 +4,12 @@ DESTDIR ?= $(CURDIR)/.. SRC_DIR := $(VORTEX_HOME)/runtime/simx -CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors +CXXFLAGS += -std=c++17 -Wall -Wextra -Wfatal-errors CXXFLAGS += -fPIC -Wno-maybe-uninitialized CXXFLAGS += -I$(INC_DIR) -I../common -I$(ROOT_DIR)/hw -I$(SIM_DIR)/simx -I$(COMMON_DIR) -I$(SIM_DIR)/common CXXFLAGS += $(CONFIGS) CXXFLAGS += -DXLEN_$(XLEN) -ifeq ($(VM_ENABLE), 1) -CXXFLAGS += -DVM_ENABLE -endif - LDFLAGS += -shared -pthread LDFLAGS += -L$(DESTDIR) -lsimx @@ -46,4 +42,4 @@ clean-runtime: clean: clean-driver clean-runtime -.PHONY: all driver clean-driver clean-runtime clean \ No newline at end of file +.PHONY: all driver clean-driver clean-runtime clean diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 1c8f47eaf..8e4351e0a 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -56,7 +56,8 @@ class vx_device { { // attach memory module processor_.attach_ram(&ram_); -#ifdef VM_ENABLE +#ifdef VM_ENABLE + std::cout << "*** VM ENABLED!! ***"<< std::endl; CHECK_ERR(init_VM(), ); #endif } @@ -93,6 +94,12 @@ class vx_device { case VX_CAPS_NUM_CORES: _value = NUM_CORES * NUM_CLUSTERS; break; + case VX_CAPS_TC_SIZE: + _value = TC_SIZE; + break; + case VX_CAPS_TC_NUM: + _value = TC_NUM; + break; case VX_CAPS_CACHE_LINE_SIZE: _value = CACHE_BLOCK_SIZE; break; @@ -108,6 +115,9 @@ class vx_device { case VX_CAPS_NUM_MEM_BANKS: _value = MEMORY_BANKS; break; + case VX_CAPS_MEM_BANK_SIZE: + _value = 1ull << (MEM_ADDR_WIDTH / MEMORY_BANKS); + break; default: std::cout << "invalid caps id: " << caps_id << std::endl; std::abort(); @@ -432,6 +442,12 @@ class vx_device { uint64_t pt_addr = 0; // Reserve space for PT DBGPRINT("[RT:init_VM] Initialize VM\n"); + DBGPRINT("* VM_ADDR_MODE=0x%lx", VM_ADDR_MODE); + DBGPRINT("* PAGE_TABLE_BASE_ADDR=0x%lx", PAGE_TABLE_BASE_ADDR); + DBGPRINT("* PT_LEVEL=0x%lx", PT_LEVEL); + DBGPRINT("* PT_SIZE=0x%lx", PT_SIZE); + DBGPRINT("* PTE_SIZE=0x%lx", PTE_SIZE); + DBGPRINT("* TLB_SIZE=0x%lx", TLB_SIZE); CHECK_ERR(mem_reserve(PAGE_TABLE_BASE_ADDR, PT_SIZE_LIMIT, VX_MEM_READ_WRITE), { return err; }); diff --git a/runtime/stub/Makefile b/runtime/stub/Makefile index ae6e27ed1..8315bd8af 100644 --- a/runtime/stub/Makefile +++ b/runtime/stub/Makefile @@ -4,7 +4,7 @@ DESTDIR ?= $(CURDIR)/.. SRC_DIR := $(VORTEX_HOME)/runtime/stub -CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors +CXXFLAGS += -std=c++17 -Wall -Wextra -pedantic -Wfatal-errors CXXFLAGS += -I$(INC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw -I$(SIM_DIR)/common CXXFLAGS += -fPIC diff --git a/runtime/xrt/Makefile b/runtime/xrt/Makefile index 66d3e481b..f255002f2 100644 --- a/runtime/xrt/Makefile +++ b/runtime/xrt/Makefile @@ -6,8 +6,9 @@ DESTDIR ?= $(CURDIR)/.. SRC_DIR := $(VORTEX_HOME)/runtime/xrt -CXXFLAGS += -std=c++14 -Wall -Wextra -Wfatal-errors +CXXFLAGS += -std=c++17 -Wall -Wextra -Wfatal-errors CXXFLAGS += -I$(INC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw -I$(XILINX_XRT)/include -I$(SIM_DIR)/common +CXXFLAGS += -DXLEN_$(XLEN) CXXFLAGS += -fPIC LDFLAGS += -shared -pthread @@ -39,6 +40,11 @@ ifdef SCOPE SRCS += $(COMMON_DIR)/scope.cpp endif +# Enable ILA logic analyzer +ifdef CHIPSCOPE + CXXFLAGS += -DCHIPSCOPE +endif + all: $(DESTDIR)/$(PROJECT) driver: $(DESTDIR)/libxrtsim.so diff --git a/runtime/xrt/vortex.cpp b/runtime/xrt/vortex.cpp index 5f4e27ff2..d71f2e142 100644 --- a/runtime/xrt/vortex.cpp +++ b/runtime/xrt/vortex.cpp @@ -18,15 +18,15 @@ #endif // XRT includes -#ifndef XRTSIM +#ifdef XRTSIM +#include +#else #include "experimental/xrt_bo.h" #include "experimental/xrt_device.h" #include "experimental/xrt_error.h" #include "experimental/xrt_ip.h" #include "experimental/xrt_kernel.h" #include "experimental/xrt_xclbin.h" -#else -#include #endif #include @@ -46,10 +46,10 @@ using namespace vortex; #define MMIO_CTL_ADDR 0x00 #define MMIO_DEV_ADDR 0x10 -#define MMIO_ISA_ADDR 0x1C -#define MMIO_DCR_ADDR 0x28 -#define MMIO_SCP_ADDR 0x34 -#define MMIO_MEM_ADDR 0x40 +#define MMIO_ISA_ADDR 0x18 +#define MMIO_DCR_ADDR 0x20 +#define MMIO_SCP_ADDR 0x28 +#define MMIO_MEM_ADDR 0x30 #define CTL_AP_START (1 << 0) #define CTL_AP_DONE (1 << 1) @@ -58,21 +58,6 @@ using namespace vortex; #define CTL_AP_RESET (1 << 4) #define CTL_AP_RESTART (1 << 7) -struct platform_info_t { - const char *prefix_name; - uint8_t lg2_num_banks; - uint8_t lg2_bank_size; - uint64_t mem_base; -}; - -static const platform_info_t g_platforms[] = { - {"vortex_xrtsim", 4, 0x10, 0x0}, // 64 KB banks - {"xilinx_u50", 4, 0x1C, 0x0}, // 16 MB banks - {"xilinx_u200", 4, 0x1C, 0x0}, // 16 MB banks - {"xilinx_u280", 4, 0x1C, 0x0}, // 16 MB banks - {"xilinx_vck5000", 0, 0x21, 0xC000000000}, -}; - #ifdef CPP_API typedef xrt::device xrt_device_t; @@ -110,25 +95,6 @@ static void dump_xrt_error(xrtDeviceHandle xrtDevice, xrtErrorCode err) { } #endif -static int get_platform_info(const std::string &device_name, - platform_info_t *platform_info) { - for (size_t i = 0; i < (sizeof(g_platforms) / sizeof(platform_info_t)); ++i) { - auto &platform = g_platforms[i]; - if (device_name.rfind(platform.prefix_name, 0) == 0) { - *platform_info = platform; - return 0; - } - } - return -1; -} - -/* -static void wait_for_enter(const std::string &msg) { - std::cout << msg << std::endl; - std::cin.ignore(std::numeric_limits::max(), '\n'); -} -*/ - /////////////////////////////////////////////////////////////////////////////// class vx_device { @@ -185,58 +151,6 @@ class vx_device { auto xclbin = xrt::xclbin(xlbin_path_s); auto device_name = xrtDevice.get_info(); - /*{ - uint32_t num_banks = 0; - uint64_t bank_size = 0; - uint64_t mem_base = 0; - - auto mem_json = - nlohmann::json::parse(xrtDevice.get_info()); if - (!mem_json.is_null()) { uint32_t index = 0; for (auto& mem : - mem_json["board"]["memory"]["memories"]) { auto enabled = - mem["enabled"].get(); if (enabled == "true") { if (index == 0) - { mem_base = std::stoull(mem["base_address"].get(), nullptr, - 16); bank_size = std::stoull(mem["range_bytes"].get(), nullptr, - 16); - } - ++index; - } - } - num_banks = index; - } - - fprintf(stderr, "[VXDRV] memory description: base=0x%lx, size=0x%lx, - count=%d\n", mem_base, bank_size, num_banks); - }*/ - - /*{ - std::cout << "Device" << device_index << " : " << - xrtDevice.get_info() << std::endl; std::cout << " - bdf : " << xrtDevice.get_info() << std::endl; - std::cout << " kdma : " << - xrtDevice.get_info() << std::endl; std::cout << " - max_freq : " << - xrtDevice.get_info() << - std::endl; std::cout << " memory : " << - xrtDevice.get_info() << std::endl; std::cout << " - thermal : " << xrtDevice.get_info() << - std::endl; std::cout << " m2m : " << std::boolalpha << - xrtDevice.get_info() << std::dec << std::endl; - std::cout << " nodma : " << std::boolalpha << - xrtDevice.get_info() << std::dec << std::endl; - - std::cout << "Memory info :" << std::endl; - for (const auto& mem_bank : xclbin.get_mems()) { - std::cout << " index : " << mem_bank.get_index() << std::endl; - std::cout << " tag : " << mem_bank.get_tag() << std::endl; - std::cout << " type : " << (int)mem_bank.get_type() << std::endl; - std::cout << " base_address : 0x" << std::hex << - mem_bank.get_base_address() << std::endl; std::cout << " size : 0x" << - (mem_bank.get_size_kb() * 1000) << std::dec << std::endl; std::cout << " - used :" << mem_bank.get_used() << std::endl; - } - }*/ - #else CHECK_HANDLE(xrtDevice, xrtDeviceOpen(device_index), { @@ -262,7 +176,7 @@ class vx_device { return -1; }); #else - xrtKernelHandle xrtKernel = nullptr; + xrtKernelHandle xrtKernel = xrtDevice; #endif // get device name @@ -277,34 +191,10 @@ class vx_device { xrtDevice_ = xrtDevice; xrtKernel_ = xrtKernel; - CHECK_ERR(get_platform_info(device_name, &platform_), { - fprintf(stderr, "[VXDRV] Error: platform not supported: %s\n", device_name.c_str()); - return err; - }); - CHECK_ERR(this->write_register(MMIO_CTL_ADDR, CTL_AP_RESET), { return err; }); - uint32_t num_banks = 1 << platform_.lg2_num_banks; - uint64_t bank_size = 1ull << platform_.lg2_bank_size; - - for (uint32_t i = 0; i < num_banks; ++i) { - uint32_t reg_addr = MMIO_MEM_ADDR + (i * 12); - uint64_t reg_value = platform_.mem_base + i * bank_size; - - CHECK_ERR(this->write_register(reg_addr, reg_value & 0xffffffff), { - return err; - }); - - CHECK_ERR(this->write_register(reg_addr + 4, (reg_value >> 32) & 0xffffffff), { - return err; - }); - #ifndef BANK_INTERLEAVE - break; - #endif - } - CHECK_ERR(this->read_register(MMIO_DEV_ADDR, (uint32_t *)&dev_caps_), { return err; }); @@ -321,8 +211,18 @@ class vx_device { return err; }); + uint64_t num_banks; + this->get_caps(VX_CAPS_NUM_MEM_BANKS, &num_banks); + lg2_num_banks_ = log2ceil(num_banks); + + uint64_t bank_size; + this->get_caps(VX_CAPS_MEM_BANK_SIZE, &bank_size); + lg2_bank_size_ = log2ceil(bank_size); + global_mem_size_ = num_banks * bank_size; + printf("info: device name=%s, memory_capacity=0x%lx bytes, memory_banks=%ld.\n", device_name.c_str(), global_mem_size_, num_banks); + #ifdef BANK_INTERLEAVE xrtBuffers_.reserve(num_banks); for (uint32_t i = 0; i < num_banks; ++i) { @@ -365,14 +265,17 @@ class vx_device { *value = (((uint64_t)value_hi) << 32) | value_lo; return 0; }; - int ret = vx_scope_start(&callback, device, 0, -1); - if (ret != 0) { - delete device; - return ret; - } + CHECK_ERR(vx_scope_start(&callback, this, -1, -1), { + return err; + }); } #endif + #ifdef CHIPSCOPE + std::cout << "\nPress ENTER to continue after setting up ILA trigger..." << std::endl; + std::cin.ignore(std::numeric_limits::max(), '\n'); + #endif + return 0; } @@ -399,13 +302,16 @@ class vx_device { _value = global_mem_size_; break; case VX_CAPS_LOCAL_MEM_SIZE: - _value = 1ull << ((dev_caps_ >> 48) & 0xff); + _value = 1ull << ((dev_caps_ >> 40) & 0xff); break; case VX_CAPS_ISA_FLAGS: _value = isa_caps_; break; case VX_CAPS_NUM_MEM_BANKS: - _value = MEMORY_BANKS; + _value = 1 << ((dev_caps_ >> 48) & 0x7); + break; + case VX_CAPS_MEM_BANK_SIZE: + _value = 1ull << (20 + ((dev_caps_ >> 51) & 0x1f)); break; default: fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id); @@ -523,7 +429,6 @@ class vx_device { return err; }); #endif - DBGPRINT("*** write_register: addr=0x%x, value=0x%x\n", addr, value); return 0; } @@ -536,7 +441,6 @@ class vx_device { return err; }); #endif - DBGPRINT("*** read_register: addr=0x%x, value=0x%x\n", addr, *value); return 0; } @@ -570,14 +474,14 @@ class vx_device { return err; }); #ifdef CPP_API - xrtBuffer.write(host_ptr, asize, bo_offset); - xrtBuffer.sync(XCL_BO_SYNC_BO_TO_DEVICE, asize, bo_offset); + xrtBuffer.write(host_ptr, size, bo_offset); + xrtBuffer.sync(XCL_BO_SYNC_BO_TO_DEVICE, size, bo_offset); #else - CHECK_ERR(xrtBOWrite(xrtBuffer, host_ptr, asize, bo_offset), { + CHECK_ERR(xrtBOWrite(xrtBuffer, host_ptr, size, bo_offset), { dump_xrt_error(xrtDevice_, err); return err; }); - CHECK_ERR(xrtBOSync(xrtBuffer, XCL_BO_SYNC_BO_TO_DEVICE, asize, bo_offset), { + CHECK_ERR(xrtBOSync(xrtBuffer, XCL_BO_SYNC_BO_TO_DEVICE, size, bo_offset), { dump_xrt_error(xrtDevice_, err); return err; }); @@ -616,14 +520,14 @@ class vx_device { return err; }); #ifdef CPP_API - xrtBuffer.sync(XCL_BO_SYNC_BO_FROM_DEVICE, asize, bo_offset); - xrtBuffer.read(host_ptr, asize, bo_offset); + xrtBuffer.sync(XCL_BO_SYNC_BO_FROM_DEVICE, size, bo_offset); + xrtBuffer.read(host_ptr, size, bo_offset); #else - CHECK_ERR(xrtBOSync(xrtBuffer, XCL_BO_SYNC_BO_FROM_DEVICE, asize, bo_offset), { + CHECK_ERR(xrtBOSync(xrtBuffer, XCL_BO_SYNC_BO_FROM_DEVICE, size, bo_offset), { dump_xrt_error(xrtDevice_, err); return err; }); - CHECK_ERR(xrtBORead(xrtBuffer, host_ptr, asize, bo_offset), { + CHECK_ERR(xrtBORead(xrtBuffer, host_ptr, size, bo_offset), { dump_xrt_error(xrtDevice_, err); return err; }); @@ -723,30 +627,30 @@ class vx_device { MemoryAllocator global_mem_; xrt_device_t xrtDevice_; xrt_kernel_t xrtKernel_; - platform_info_t platform_; uint64_t dev_caps_; uint64_t isa_caps_; uint64_t global_mem_size_; DeviceConfig dcrs_; std::unordered_map> mpm_cache_; + uint32_t lg2_num_banks_; + uint32_t lg2_bank_size_; #ifdef BANK_INTERLEAVE std::vector xrtBuffers_; int get_bank_info(uint64_t addr, uint32_t *pIdx, uint64_t *pOff) { - uint32_t num_banks = 1 << platform_.lg2_num_banks; + uint32_t num_banks = 1 << lg2_num_banks_; uint64_t block_addr = addr / CACHE_BLOCK_SIZE; uint32_t index = block_addr & (num_banks - 1); - uint64_t offset = - (block_addr >> platform_.lg2_num_banks) * CACHE_BLOCK_SIZE; + uint64_t offset = (block_addr >> lg2_num_banks_) * CACHE_BLOCK_SIZE; if (pIdx) { *pIdx = index; } if (pOff) { *pOff = offset; } - printf("get_bank_info(addr=0x%lx, bank=%d, offset=0x%lx\n", addr, index, offset); + //printf("get_bank_info(addr=0x%lx, bank=%d, offset=0x%lx\n", addr, index, offset); return 0; } @@ -767,9 +671,9 @@ class vx_device { std::unordered_map xrtBuffers_; int get_bank_info(uint64_t addr, uint32_t *pIdx, uint64_t *pOff) { - uint32_t num_banks = 1 << platform_.lg2_num_banks; - uint64_t bank_size = 1ull << platform_.lg2_bank_size; - uint32_t index = addr >> platform_.lg2_bank_size; + uint32_t num_banks = 1 << lg2_num_banks_; + uint64_t bank_size = 1ull << lg2_bank_size_; + uint32_t index = addr >> lg2_bank_size_; uint64_t offset = addr & (bank_size - 1); if (index > num_banks) { fprintf(stderr, "[VXDRV] Error: address out of range: 0x%lx\n", addr); @@ -781,8 +685,7 @@ class vx_device { if (pOff) { *pOff = offset; } - printf("get_bank_info(addr=0x%lx, bank=%d, offset=0x%lx\n", addr, index, - offset); + //printf("get_bank_info(addr=0x%lx, bank=%d, offset=0x%lx\n", addr, index, offset); return 0; } @@ -797,7 +700,7 @@ class vx_device { } } else { printf("allocating bank%d...\n", bank_id); - uint64_t bank_size = 1ull << platform_.lg2_bank_size; + uint64_t bank_size = 1ull << lg2_bank_size_; #ifdef CPP_API xrt::bo xrtBuffer(xrtDevice_, bank_size, xrt::bo::flags::normal, bank_id); #else diff --git a/hw/syn/xilinx/xrt/xrt.ini b/runtime/xrt/xrt.ini.in similarity index 54% rename from hw/syn/xilinx/xrt/xrt.ini rename to runtime/xrt/xrt.ini.in index 094219112..90affb447 100644 --- a/hw/syn/xilinx/xrt/xrt.ini +++ b/runtime/xrt/xrt.ini.in @@ -1,9 +1,9 @@ -[Runtime] +[Runtime] runtime_log=console [Emulation] -#debug_mode=batch -#user_pre_sim_script=xsim.tcl +debug_mode=batch +user_pre_sim_script=@VORTEX_HOME@/runtime/xrt/xsim.tcl [Debug] profile=true diff --git a/hw/syn/xilinx/xrt/scripts/xsim.tcl b/runtime/xrt/xsim.tcl similarity index 88% rename from hw/syn/xilinx/xrt/scripts/xsim.tcl rename to runtime/xrt/xsim.tcl index 061bc17ab..ccdc1262f 100644 --- a/hw/syn/xilinx/xrt/scripts/xsim.tcl +++ b/runtime/xrt/xsim.tcl @@ -14,12 +14,9 @@ # limitations under the License. # -#log_wave -r * -#run all -#exit +log_wave -r * -open_vcd xsim_dump.vcd -log_vcd /* -run all -close_vcd -exit +#open_vcd xsim_dump.vcd +#log_vcd /* +#run all +#close_vcd \ No newline at end of file diff --git a/sim/common/bitmanip.h b/sim/common/bitmanip.h index a6cd87ff1..4bfe56961 100644 --- a/sim/common/bitmanip.h +++ b/sim/common/bitmanip.h @@ -1,10 +1,10 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -16,32 +16,54 @@ #include #include -constexpr uint32_t count_leading_zeros(uint32_t value) { - return value ? __builtin_clz(value) : 32; +template +constexpr uint32_t count_leading_zeros(T value) { + static_assert(std::is_integral::value, "invalid data type"); + if constexpr (sizeof(T) > 4) { + return value ? __builtin_clzll(value) - (64 - sizeof(T) * 8) : sizeof(T) * 8; + } else { + return value ? __builtin_clz(value) - (32 - sizeof(T) * 8) : sizeof(T) * 8; + } } -constexpr uint32_t count_trailing_zeros(uint32_t value) { - return value ? __builtin_ctz(value) : 32; +template +constexpr uint32_t count_trailing_zeros(T value) { + static_assert(std::is_integral::value, "invalid data type"); + if constexpr (sizeof(T) > 4) { + return value ? __builtin_ctzll(value) : (sizeof(T) * 8); + } else { + return value ? __builtin_ctz(value) : (sizeof(T) * 8); + } } -constexpr bool ispow2(uint32_t value) { +template +constexpr bool ispow2(T value) { + static_assert(std::is_integral::value, "invalid data type"); return value && !(value & (value - 1)); } -constexpr uint32_t log2ceil(uint32_t value) { - return 32 - count_leading_zeros(value - 1); +template +constexpr uint32_t log2ceil(T value) { + static_assert(std::is_integral::value, "invalid data type"); + return (sizeof(T) * 8) - count_leading_zeros(value - 1); } -inline unsigned log2up(uint32_t value) { +template +inline unsigned log2up(T value) { + static_assert(std::is_integral::value, "invalid data type"); return std::max(1, log2ceil(value)); } -constexpr unsigned log2floor(uint32_t value) { - return 31 - count_leading_zeros(value); +template +constexpr unsigned log2floor(T value) { + static_assert(std::is_integral::value, "invalid data type"); + return (sizeof(T) * 8 - 1) - count_leading_zeros(value); } -constexpr unsigned ceil2(uint32_t value) { - return 32 - count_leading_zeros(value); +template +constexpr unsigned ceil2(T value) { + static_assert(std::is_integral::value, "invalid data type"); + return (sizeof(T) * 8) - count_leading_zeros(value); } inline uint64_t bit_clr(uint64_t bits, uint32_t index) { @@ -86,7 +108,7 @@ template T sext(const T& word, uint32_t width) { assert(width > 1); assert(width <= (sizeof(T) * 8)); - if (width == (sizeof(T) * 8)) + if (width == (sizeof(T) * 8)) return word; T mask((static_cast(1) << width) - 1); return ((word >> (width - 1)) & 0x1) ? (word | ~mask) : (word & mask); @@ -96,7 +118,7 @@ template T zext(const T& word, uint32_t width) { assert(width > 1); assert(width <= (sizeof(T) * 8)); - if (width == (sizeof(T) * 8)) + if (width == (sizeof(T) * 8)) return word; T mask((static_cast(1) << width) - 1); return word & mask; diff --git a/runtime/common/malloc.h b/sim/common/mem_alloc.h similarity index 96% rename from runtime/common/malloc.h rename to sim/common/mem_alloc.h index ca386031a..5e31d0ea0 100644 --- a/runtime/common/malloc.h +++ b/sim/common/mem_alloc.h @@ -80,13 +80,14 @@ class MemoryAllocator { // Check if the reservation is within memory capacity bounds if (addr + size > baseAddress_ + capacity_) { - printf("error: address range out of bounds\n"); + printf("error: address range out of bounds - requested=0x%lx, base+capacity=0x%lx\n", (addr + size), (baseAddress_ +capacity_)); return -1; } // Ensure the reservation does not overlap with existing pages - if (hasPageOverlap(addr, size)) { - printf("error: address range overlaps with existing allocation\n"); + uint64_t overlapStart, overlapEnd; + if (hasPageOverlap(addr, size, &overlapStart, &overlapEnd)) { + printf("error: address range overlaps with existing allocation - requested=[0x%lx-0x%lx], existing=[0x%lx, 0x%lx]\n", addr, addr+size, overlapStart, overlapEnd); return -1; } @@ -523,15 +524,15 @@ class MemoryAllocator { return false; } - bool hasPageOverlap(uint64_t start, uint64_t size) { + bool hasPageOverlap(uint64_t start, uint64_t size, uint64_t* overlapStart, uint64_t* overlapEnd) { page_t* current = pages_; while (current != nullptr) { uint64_t pageStart = current->addr; uint64_t pageEnd = pageStart + current->size; - uint64_t requestEnd = start + size; - if ((start >= pageStart && start < pageEnd) || // Start of request is inside the page - (requestEnd > pageStart && requestEnd <= pageEnd) || // End of request is inside the page - (start <= pageStart && requestEnd >= pageEnd)) { // Request envelops the page + uint64_t end = start + size; + if ((start <= pageEnd) && (end >= pageStart)) { + *overlapStart = pageStart; + *overlapEnd = pageEnd; return true; } current = current->next; diff --git a/sim/common/mp_macros.h b/sim/common/mp_macros.h new file mode 100644 index 000000000..fde5ac79e --- /dev/null +++ b/sim/common/mp_macros.h @@ -0,0 +1,327 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +// macro primitives + +#define MP_COMMA , +#define MP_REM(...) __VA_ARGS__ +#define MP_EAT(...) + +#define MP_STRINGIZE_(x) #x +#define MP_STRINGIZE(x) MP_STRINGIZE_(x) + +#define MP_CONCAT_(x, ...) x ## __VA_ARGS__ +#define MP_CONCAT(x, ...) MP_CONCAT_(x, __VA_ARGS__) + +#define MP_COUNTOF(arr) (sizeof(arr) / sizeof(arr[0])) + +// conditional macro + +#define MP_IIF_0(x, y) y +#define MP_IIF_1(x, y) x +#define MP_IIF(c) MP_CONCAT(MP_IIF_, c) + +#define MP_PAIR_FIRST(a, b) a +#define MP_PAIR_SECOND(a, b) b + +// pair macros + +#define MP_PAIR(x) MP_REM x +#define MP_PAIR_HEAD_(x, ...) MP_PAIR(x) +#define MP_PAIR_PROBE_(...) (__VA_ARGS__), +#define MP_PAIR_L_(...) MP_PAIR_HEAD_(__VA_ARGS__) +#define MP_PAIR_L(x) MP_PAIR_L_(MP_PAIR_PROBE_ x,) +#define MP_PAIR_R(x) MP_EAT x + +// separator macros + +#define MP_SEP_COMMA() , +#define MP_SEP_SEMICOLON() ; +#define MP_SEP_PLUS() + +#define MP_SEP_AND() & +#define MP_SEP_OR() | +#define MP_SEP_COLON() : +#define MP_SEP_SPACE() /**/ +#define MP_SEP_LESS() < +#define MP_SEP_GREATER() > +#define MP_SEP_ANDL() && +#define MP_SEP_ORL() || + +// MAKE_UNIQUE macro + +#define MP_MAKE_UNIQUE(x) MP_CONCAT(x, __COUNTER__) + +// increment macro + +#define MP_INC(x) MP_INC_ ## x +#define MP_INC_0 1 +#define MP_INC_1 2 +#define MP_INC_2 3 +#define MP_INC_3 4 +#define MP_INC_4 5 +#define MP_INC_5 6 +#define MP_INC_6 7 +#define MP_INC_7 8 +#define MP_INC_8 9 +#define MP_INC_9 10 +#define MP_INC_10 11 +#define MP_INC_11 12 +#define MP_INC_12 13 +#define MP_INC_13 14 +#define MP_INC_14 15 +#define MP_INC_15 16 +#define MP_INC_16 17 +#define MP_INC_17 18 +#define MP_INC_18 19 +#define MP_INC_19 20 +#define MP_INC_20 21 +#define MP_INC_21 22 +#define MP_INC_22 23 +#define MP_INC_23 24 +#define MP_INC_24 25 +#define MP_INC_25 26 +#define MP_INC_26 27 +#define MP_INC_27 28 +#define MP_INC_28 29 +#define MP_INC_29 30 +#define MP_INC_30 31 +#define MP_INC_31 32 +#define MP_INC_32 33 +#define MP_INC_33 34 +#define MP_INC_34 35 +#define MP_INC_35 36 +#define MP_INC_36 37 +#define MP_INC_37 38 +#define MP_INC_38 39 +#define MP_INC_39 40 +#define MP_INC_40 41 +#define MP_INC_41 42 +#define MP_INC_42 43 +#define MP_INC_43 44 +#define MP_INC_44 45 +#define MP_INC_45 46 +#define MP_INC_46 47 +#define MP_INC_47 48 +#define MP_INC_48 49 +#define MP_INC_49 50 +#define MP_INC_50 51 +#define MP_INC_51 52 +#define MP_INC_52 53 +#define MP_INC_53 54 +#define MP_INC_54 55 +#define MP_INC_55 56 +#define MP_INC_56 57 +#define MP_INC_57 58 +#define MP_INC_58 59 +#define MP_INC_59 60 +#define MP_INC_60 61 +#define MP_INC_61 62 +#define MP_INC_62 63 +#define MP_INC_63 64 + +// NARG macro + +#define MP_NARG_N(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10,_11,_12,_13,_14,_15,_16, \ + _17,_18,_19,_20,_21,_22,_23,_24,_25,_26,_27,_28,_29,_30,_31,_32, \ + _33,_34,_35,_36,_37,_38,_39,_40,_41,_42,_43,_44,_45,_46,_47,_48, \ + _49,_50,_51,_52,_53,_54,_55,_56,_57,_58,_59,_60,_61,_62,_63, N, ...) N + +#define MP_NARG_R() 63,62,61,60,59,58,57,56,55,54,53,52,51,50,49,48, \ + 47,46,45,44,43,42,41,40,39,38,37,36,35,34,33,32, \ + 31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16, \ + 15,14,13,12,11,10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +#define MP_NARG_(...) MP_NARG_N(__VA_ARGS__) +#define MP_NARG(...) MP_NARG_(__VA_ARGS__, MP_NARG_R()) + +// FOR_EACH macro + +#define MP_FOR_EACH_1(idx, func, arg, sep, ...) func(arg, idx, __VA_ARGS__) +#define MP_FOR_EACH_2(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_1(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_3(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_2(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_4(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_3(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_5(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_4(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_6(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_5(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_7(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_6(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_8(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_7(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_9(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_8(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_10(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_9(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_11(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_10(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_12(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_11(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_13(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_12(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_14(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_13(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_15(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_14(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_16(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_15(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_17(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_16(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_18(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_17(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_19(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_18(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_20(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_19(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_21(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_20(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_22(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_21(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_23(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_22(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_24(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_23(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_25(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_24(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_26(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_25(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_27(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_26(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_28(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_27(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_29(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_28(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_30(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_29(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_31(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_30(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_32(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_31(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_33(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_32(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_34(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_33(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_35(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_34(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_36(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_35(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_37(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_36(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_38(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_37(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_39(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_38(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_40(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_39(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_41(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_40(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_42(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_41(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_43(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_42(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_44(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_43(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_45(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_44(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_46(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_45(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_47(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_46(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_48(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_47(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_49(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_48(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_50(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_49(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_51(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_50(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_52(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_51(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_53(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_52(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_54(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_53(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_55(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_54(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_56(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_55(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_57(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_56(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_58(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_57(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_59(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_58(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_60(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_59(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_61(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_60(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_62(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_61(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_63(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_62(MP_INC(idx), func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH_64(idx, func, arg, sep, x, ...) func(arg, idx, x) sep() MP_FOR_EACH_63(MP_INC(idx), func, arg, sep, __VA_ARGS__) + +#define MP_FOR_EACH_(N, func, arg, sep, ...) MP_CONCAT(MP_FOR_EACH_, N)(0, func, arg, sep, __VA_ARGS__) +#define MP_FOR_EACH(func, arg, sep, ...) MP_FOR_EACH_(MP_NARG(__VA_ARGS__), func, arg, sep, __VA_ARGS__) + +// REVERSE_FOR_EACH macro + +#define MP_REVERSE_FOR_EACH_1(func, arg, sep, ...) func(arg, 0, __VA_ARGS__) +#define MP_REVERSE_FOR_EACH_2(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_1(func, arg, sep, __VA_ARGS__) sep() func(arg, 1, x) +#define MP_REVERSE_FOR_EACH_3(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_2(func, arg, sep, __VA_ARGS__) sep() func(arg, 2, x) +#define MP_REVERSE_FOR_EACH_4(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_3(func, arg, sep, __VA_ARGS__) sep() func(arg, 3, x) +#define MP_REVERSE_FOR_EACH_5(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_4(func, arg, sep, __VA_ARGS__) sep() func(arg, 4, x) +#define MP_REVERSE_FOR_EACH_6(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_5(func, arg, sep, __VA_ARGS__) sep() func(arg, 5, x) +#define MP_REVERSE_FOR_EACH_7(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_6(func, arg, sep, __VA_ARGS__) sep() func(arg, 6, x) +#define MP_REVERSE_FOR_EACH_8(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_7(func, arg, sep, __VA_ARGS__) sep() func(arg, 7, x) +#define MP_REVERSE_FOR_EACH_9(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_8(func, arg, sep, __VA_ARGS__) sep() func(arg, 8, x) +#define MP_REVERSE_FOR_EACH_10(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_9(func, arg, sep, __VA_ARGS__) sep() func(arg, 9, x) +#define MP_REVERSE_FOR_EACH_11(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_10(func, arg, sep, __VA_ARGS__) sep() func(arg, 10, x) +#define MP_REVERSE_FOR_EACH_12(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_11(func, arg, sep, __VA_ARGS__) sep() func(arg, 11, x) +#define MP_REVERSE_FOR_EACH_13(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_12(func, arg, sep, __VA_ARGS__) sep() func(arg, 12, x) +#define MP_REVERSE_FOR_EACH_14(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_13(func, arg, sep, __VA_ARGS__) sep() func(arg, 13, x) +#define MP_REVERSE_FOR_EACH_15(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_14(func, arg, sep, __VA_ARGS__) sep() func(arg, 14, x) +#define MP_REVERSE_FOR_EACH_16(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_15(func, arg, sep, __VA_ARGS__) sep() func(arg, 15, x) +#define MP_REVERSE_FOR_EACH_17(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_16(func, arg, sep, __VA_ARGS__) sep() func(arg, 16, x) +#define MP_REVERSE_FOR_EACH_18(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_17(func, arg, sep, __VA_ARGS__) sep() func(arg, 17, x) +#define MP_REVERSE_FOR_EACH_19(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_18(func, arg, sep, __VA_ARGS__) sep() func(arg, 18, x) +#define MP_REVERSE_FOR_EACH_20(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_19(func, arg, sep, __VA_ARGS__) sep() func(arg, 19, x) +#define MP_REVERSE_FOR_EACH_21(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_20(func, arg, sep, __VA_ARGS__) sep() func(arg, 20, x) +#define MP_REVERSE_FOR_EACH_22(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_21(func, arg, sep, __VA_ARGS__) sep() func(arg, 21, x) +#define MP_REVERSE_FOR_EACH_23(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_22(func, arg, sep, __VA_ARGS__) sep() func(arg, 22, x) +#define MP_REVERSE_FOR_EACH_24(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_23(func, arg, sep, __VA_ARGS__) sep() func(arg, 23, x) +#define MP_REVERSE_FOR_EACH_25(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_24(func, arg, sep, __VA_ARGS__) sep() func(arg, 24, x) +#define MP_REVERSE_FOR_EACH_26(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_25(func, arg, sep, __VA_ARGS__) sep() func(arg, 25, x) +#define MP_REVERSE_FOR_EACH_27(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_26(func, arg, sep, __VA_ARGS__) sep() func(arg, 26, x) +#define MP_REVERSE_FOR_EACH_28(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_27(func, arg, sep, __VA_ARGS__) sep() func(arg, 27, x) +#define MP_REVERSE_FOR_EACH_29(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_28(func, arg, sep, __VA_ARGS__) sep() func(arg, 28, x) +#define MP_REVERSE_FOR_EACH_30(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_29(func, arg, sep, __VA_ARGS__) sep() func(arg, 29, x) +#define MP_REVERSE_FOR_EACH_31(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_30(func, arg, sep, __VA_ARGS__) sep() func(arg, 30, x) +#define MP_REVERSE_FOR_EACH_32(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_31(func, arg, sep, __VA_ARGS__) sep() func(arg, 31, x) +#define MP_REVERSE_FOR_EACH_33(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_32(func, arg, sep, __VA_ARGS__) sep() func(arg, 32, x) +#define MP_REVERSE_FOR_EACH_34(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_33(func, arg, sep, __VA_ARGS__) sep() func(arg, 33, x) +#define MP_REVERSE_FOR_EACH_35(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_34(func, arg, sep, __VA_ARGS__) sep() func(arg, 34, x) +#define MP_REVERSE_FOR_EACH_36(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_35(func, arg, sep, __VA_ARGS__) sep() func(arg, 35, x) +#define MP_REVERSE_FOR_EACH_37(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_36(func, arg, sep, __VA_ARGS__) sep() func(arg, 36, x) +#define MP_REVERSE_FOR_EACH_38(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_37(func, arg, sep, __VA_ARGS__) sep() func(arg, 37, x) +#define MP_REVERSE_FOR_EACH_39(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_38(func, arg, sep, __VA_ARGS__) sep() func(arg, 38, x) +#define MP_REVERSE_FOR_EACH_40(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_39(func, arg, sep, __VA_ARGS__) sep() func(arg, 39, x) +#define MP_REVERSE_FOR_EACH_41(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_40(func, arg, sep, __VA_ARGS__) sep() func(arg, 40, x) +#define MP_REVERSE_FOR_EACH_42(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_41(func, arg, sep, __VA_ARGS__) sep() func(arg, 41, x) +#define MP_REVERSE_FOR_EACH_43(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_42(func, arg, sep, __VA_ARGS__) sep() func(arg, 42, x) +#define MP_REVERSE_FOR_EACH_44(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_43(func, arg, sep, __VA_ARGS__) sep() func(arg, 43, x) +#define MP_REVERSE_FOR_EACH_45(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_44(func, arg, sep, __VA_ARGS__) sep() func(arg, 44, x) +#define MP_REVERSE_FOR_EACH_46(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_45(func, arg, sep, __VA_ARGS__) sep() func(arg, 45, x) +#define MP_REVERSE_FOR_EACH_47(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_46(func, arg, sep, __VA_ARGS__) sep() func(arg, 46, x) +#define MP_REVERSE_FOR_EACH_48(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_47(func, arg, sep, __VA_ARGS__) sep() func(arg, 47, x) +#define MP_REVERSE_FOR_EACH_49(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_48(func, arg, sep, __VA_ARGS__) sep() func(arg, 48, x) +#define MP_REVERSE_FOR_EACH_50(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_49(func, arg, sep, __VA_ARGS__) sep() func(arg, 49, x) +#define MP_REVERSE_FOR_EACH_51(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_50(func, arg, sep, __VA_ARGS__) sep() func(arg, 50, x) +#define MP_REVERSE_FOR_EACH_52(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_51(func, arg, sep, __VA_ARGS__) sep() func(arg, 51, x) +#define MP_REVERSE_FOR_EACH_53(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_52(func, arg, sep, __VA_ARGS__) sep() func(arg, 52, x) +#define MP_REVERSE_FOR_EACH_54(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_53(func, arg, sep, __VA_ARGS__) sep() func(arg, 53, x) +#define MP_REVERSE_FOR_EACH_55(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_54(func, arg, sep, __VA_ARGS__) sep() func(arg, 54, x) +#define MP_REVERSE_FOR_EACH_56(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_55(func, arg, sep, __VA_ARGS__) sep() func(arg, 55, x) +#define MP_REVERSE_FOR_EACH_57(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_56(func, arg, sep, __VA_ARGS__) sep() func(arg, 56, x) +#define MP_REVERSE_FOR_EACH_58(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_57(func, arg, sep, __VA_ARGS__) sep() func(arg, 57, x) +#define MP_REVERSE_FOR_EACH_59(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_58(func, arg, sep, __VA_ARGS__) sep() func(arg, 58, x) +#define MP_REVERSE_FOR_EACH_60(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_59(func, arg, sep, __VA_ARGS__) sep() func(arg, 59, x) +#define MP_REVERSE_FOR_EACH_61(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_60(func, arg, sep, __VA_ARGS__) sep() func(arg, 60, x) +#define MP_REVERSE_FOR_EACH_62(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_61(func, arg, sep, __VA_ARGS__) sep() func(arg, 61, x) +#define MP_REVERSE_FOR_EACH_63(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_62(func, arg, sep, __VA_ARGS__) sep() func(arg, 62, x) +#define MP_REVERSE_FOR_EACH_64(func, arg, sep, x, ...) MP_REVERSE_FOR_EACH_63(func, arg, sep, __VA_ARGS__) sep() func(arg, 63, x) + +#define MP_REVERSE_FOR_EACH_(N, func, arg, sep, ...) MP_CONCAT(MP_REVERSE_FOR_EACH_, N)(func, arg, sep, __VA_ARGS__) +#define MP_REVERSE_FOR_EACH(func, arg, sep, ...) MP_REVERSE_FOR_EACH_(MP_NARG(__VA_ARGS__), func, arg, sep, __VA_ARGS__) + +#define MP_FIRST_ARG_(N, ...) N +#define MP_FIRST_ARG(...) MP_FIRST_ARG_(__VA_ARGS__, ignore) + +// MP_REPEAT macro + +#define MP_REPEAT_0(func, sep) +#define MP_REPEAT_1(func, sep) func(0) +#define MP_REPEAT_2(func, sep) MP_REPEAT_1(func, sep) sep func(1) +#define MP_REPEAT_3(func, sep) MP_REPEAT_2(func, sep) sep func(2) +#define MP_REPEAT_4(func, sep) MP_REPEAT_3(func, sep) sep func(3) +#define MP_REPEAT_5(func, sep) MP_REPEAT_4(func, sep) sep func(4) +#define MP_REPEAT_6(func, sep) MP_REPEAT_5(func, sep) sep func(5) +#define MP_REPEAT_7(func, sep) MP_REPEAT_6(func, sep) sep func(6) +#define MP_REPEAT_8(func, sep) MP_REPEAT_7(func, sep) sep func(7) +#define MP_REPEAT_9(func, sep) MP_REPEAT_8(func, sep) sep func(8) +#define MP_REPEAT_10(func, sep) MP_REPEAT_9(func, sep) sep func(9) +#define MP_REPEAT_11(func, sep) MP_REPEAT_10(func, sep) sep func(10) +#define MP_REPEAT_12(func, sep) MP_REPEAT_11(func, sep) sep func(11) +#define MP_REPEAT_13(func, sep) MP_REPEAT_12(func, sep) sep func(12) +#define MP_REPEAT_14(func, sep) MP_REPEAT_13(func, sep) sep func(13) +#define MP_REPEAT_15(func, sep) MP_REPEAT_14(func, sep) sep func(14) +#define MP_REPEAT_16(func, sep) MP_REPEAT_15(func, sep) sep func(15) +#define MP_REPEAT_17(func, sep) MP_REPEAT_16(func, sep) sep func(16) +#define MP_REPEAT_18(func, sep) MP_REPEAT_17(func, sep) sep func(17) +#define MP_REPEAT_19(func, sep) MP_REPEAT_18(func, sep) sep func(18) +#define MP_REPEAT_20(func, sep) MP_REPEAT_19(func, sep) sep func(19) +#define MP_REPEAT_21(func, sep) MP_REPEAT_20(func, sep) sep func(20) +#define MP_REPEAT_22(func, sep) MP_REPEAT_21(func, sep) sep func(21) +#define MP_REPEAT_23(func, sep) MP_REPEAT_22(func, sep) sep func(22) +#define MP_REPEAT_24(func, sep) MP_REPEAT_23(func, sep) sep func(23) +#define MP_REPEAT_25(func, sep) MP_REPEAT_24(func, sep) sep func(24) +#define MP_REPEAT_26(func, sep) MP_REPEAT_25(func, sep) sep func(25) +#define MP_REPEAT_27(func, sep) MP_REPEAT_26(func, sep) sep func(26) +#define MP_REPEAT_28(func, sep) MP_REPEAT_27(func, sep) sep func(27) +#define MP_REPEAT_29(func, sep) MP_REPEAT_28(func, sep) sep func(28) +#define MP_REPEAT_30(func, sep) MP_REPEAT_29(func, sep) sep func(29) +#define MP_REPEAT_31(func, sep) MP_REPEAT_30(func, sep) sep func(30) +#define MP_REPEAT_32(func, sep) MP_REPEAT_31(func, sep) sep func(31) +#define MP_REPEAT(N, func, sep) MP_CONCAT(MP_REPEAT_, N)(func, sep) diff --git a/sim/opaesim/Makefile b/sim/opaesim/Makefile index 2e549ca74..b04f8ddb4 100644 --- a/sim/opaesim/Makefile +++ b/sim/opaesim/Makefile @@ -30,21 +30,23 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_AFU DBG_SCOPE_FLAGS += -DDBG_SCOPE_ISSUE DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU -DBG_SCOPE_FLAGS += -DDBG_SCOPE_MSCHED # AFU parameters -CONFIGS += -DPLATFORM_PROVIDES_LOCAL_MEMORY -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BANKS,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=2 +ifeq (,$(findstring PLATFORM_MEMORY_BANKS,$(CONFIGS))) + CONFIGS += -DPLATFORM_MEMORY_BANKS=2 endif -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=26 +ifeq (,$(findstring PLATFORM_MEMORY_ADDR_WIDTH,$(CONFIGS))) + ifeq ($(XLEN),64) + CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=47 + else + CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=31 + endif endif -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=512 +ifeq (,$(findstring PLATFORM_MEMORY_DATA_WIDTH,$(CONFIGS))) + CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=512 endif -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH=4 +ifeq (,$(findstring PLATFORM_MEMORY_BURST_CNT_WIDTH,$(CONFIGS))) + CONFIGS += -DPLATFORM_MEMORY_BURST_CNT_WIDTH=4 endif DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS) @@ -54,12 +56,12 @@ SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp SRCS += $(SRC_DIR)/fpga.cpp $(SRC_DIR)/opae_sim.cpp RTL_PKGS = $(AFU_DIR)/local_mem_cfg_pkg.sv $(AFU_DIR)/ccip/ccip_if_pkg.sv -RTL_PKGS += $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv $(RTL_DIR)/core/VX_trace_pkg.sv +RTL_PKGS += $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - RTL_PKGS += $(THIRD_PARTY_DIR)/fpnew/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/fpnew/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv - FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src + RTL_PKGS += $(THIRD_PARTY_DIR)/cvfpu/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv + FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -I$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -I$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/cvfpu/src endif RTL_INCLUDE = -I$(SRC_DIR) -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) RTL_INCLUDE += -I$(AFU_DIR) -I$(AFU_DIR)/ccip @@ -67,19 +69,19 @@ RTL_INCLUDE += -I$(AFU_DIR) -I$(AFU_DIR)/ccip TOP = vortex_afu_shim VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic -VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO -Wno-GENUNNAMED +VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO VL_FLAGS += --x-initial unique --x-assign unique VL_FLAGS += -DSIMULATION -DSV_DPI VL_FLAGS += -DXLEN_$(XLEN) VL_FLAGS += $(CONFIGS) -VL_FLAGS += $(SRC_DIR)/verilator.vlt +VL_FLAGS += verilator.vlt VL_FLAGS += $(RTL_INCLUDE) VL_FLAGS += $(RTL_PKGS) CXXFLAGS += $(CONFIGS) # Enable Verilator multithreaded simulation -THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())') +THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(mp.cpu_count())') VL_FLAGS += -j $(THREADS) #VL_FLAGS += --threads $(THREADS) diff --git a/sim/opaesim/fpga.cpp b/sim/opaesim/fpga.cpp index 6c8ce8b2f..d16ef97a1 100644 --- a/sim/opaesim/fpga.cpp +++ b/sim/opaesim/fpga.cpp @@ -93,6 +93,8 @@ extern fpga_result fpgaClose(fpga_handle handle) { return FPGA_INVALID_PARAM; auto sim = reinterpret_cast(handle); + sim->shutdown(); + delete sim; return FPGA_OK; diff --git a/sim/opaesim/opae_sim.cpp b/sim/opaesim/opae_sim.cpp index 7a1bae3e4..fe1832c1b 100644 --- a/sim/opaesim/opae_sim.cpp +++ b/sim/opaesim/opae_sim.cpp @@ -35,21 +35,12 @@ #include #include -//#ifndef MEMORY_BANKS - #ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS - #define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS - #else - #define MEMORY_BANKS 2 - #endif -//#endif +#define PLATFORM_MEMORY_DATA_SIZE (PLATFORM_MEMORY_DATA_WIDTH/8) #ifndef MEM_CLOCK_RATIO #define MEM_CLOCK_RATIO 1 #endif -#undef MEM_BLOCK_SIZE -#define MEM_BLOCK_SIZE (PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH / 8) - #define CACHE_BLOCK_SIZE 64 #define CCI_LATENCY 8 @@ -87,8 +78,9 @@ static uint64_t trace_stop_time = TRACE_STOP_TIME; bool sim_trace_enabled() { if (timestamp >= trace_start_time - && timestamp < trace_stop_time) + && timestamp < trace_stop_time) { return true; + } return trace_enabled; } @@ -119,6 +111,9 @@ class opae_sim::Impl { for (auto& buffer : host_buffers_) { aligned_free(buffer.second.data); } + if (ram_) { + delete ram_; + } #ifdef VCD_OUTPUT if (tfp_) { tfp_->close(); @@ -128,13 +123,10 @@ class opae_sim::Impl { if (device_) { delete device_; } - if (ram_) { - delete ram_; - } } int init() { - // force random values for unitialized signals + // force random values for uninitialized signals Verilated::randReset(VERILATOR_RESET_VALUE); Verilated::randSeed(50); @@ -151,41 +143,43 @@ class opae_sim::Impl { tfp_->open("trace.vcd"); #endif + // allocate RAM ram_ = new RAM(0, RAM_PAGE_SIZE); - #ifndef NDEBUG - // dump device configuration - std::cout << "CONFIGS:" - << " num_threads=" << NUM_THREADS - << ", num_warps=" << NUM_WARPS - << ", num_cores=" << NUM_CORES - << ", num_clusters=" << NUM_CLUSTERS - << ", socket_size=" << SOCKET_SIZE - << ", local_mem_base=0x" << std::hex << LMEM_BASE_ADDR << std::dec - << ", num_barriers=" << NUM_BARRIERS - << std::endl; - #endif + // calculate memory bank size + mem_bank_size_ = 1ull << PLATFORM_MEMORY_ADDR_WIDTH; + // reset the device this->reset(); + // Turn on assertion after reset + Verilated::assertOn(true); + // launch execution thread future_ = std::async(std::launch::async, [&]{ - while (!stop_) { - std::lock_guard guard(mutex_); - this->tick(); - } + while (!stop_) { + std::lock_guard guard(mutex_); + this->tick(); + } }); return 0; } + void shutdown() { + stop_ = true; + if (future_.valid()) { + future_.wait(); + } + } + int prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags) { auto alloc = aligned_malloc(len, CACHE_BLOCK_SIZE); if (alloc == NULL) return -1; // set uninitialized data to "baadf00d" for (uint32_t i = 0; i < len; ++i) { - ((uint8_t*)alloc)[i] = (0xbaadf00d >> ((i & 0x3) * 8)) & 0xff; + ((uint8_t*)alloc)[i] = (0xbaadf00d >> ((i & 0x3) * 8)) & 0xff; } host_buffer_t buffer; buffer.data = (uint64_t*)alloc; @@ -214,8 +208,9 @@ class opae_sim::Impl { std::lock_guard guard(mutex_); // simulate CPU-GPU latency - for (uint32_t i = 0; i < CPU_GPU_LATENCY; ++i) + for (uint32_t i = 0; i < CPU_GPU_LATENCY; ++i) { this->tick(); + } // simulate mmio request device_->vcp2af_sRxPort_c0_mmioRdValid = 1; @@ -232,8 +227,9 @@ class opae_sim::Impl { std::lock_guard guard(mutex_); // simulate CPU-GPU latency - for (uint32_t i = 0; i < CPU_GPU_LATENCY; ++i) + for (uint32_t i = 0; i < CPU_GPU_LATENCY; ++i) { this->tick(); + } // simulate mmio request device_->vcp2af_sRxPort_c0_mmioWrValid = 1; @@ -270,16 +266,6 @@ class opae_sim::Impl { } device_->reset = 0; - - for (int i = 0; i < RESET_DELAY; ++i) { - device_->clk = 0; - this->eval(); - device_->clk = 1; - this->eval(); - } - - // Turn on assertion after reset - Verilated::assertOn(true); } void tick() { @@ -300,13 +286,13 @@ class opae_sim::Impl { } } + dram_sim_.tick(); + device_->clk = 0; this->eval(); device_->clk = 1; this->eval(); - dram_sim_.tick(); - #ifndef NDEBUG fflush(stdout); #endif @@ -341,13 +327,14 @@ class opae_sim::Impl { void sRxPort_bus_eval() { // check mmio request bool mmio_req_enabled = device_->vcp2af_sRxPort_c0_mmioRdValid - || device_->vcp2af_sRxPort_c0_mmioWrValid; + || device_->vcp2af_sRxPort_c0_mmioWrValid; // schedule CCI read responses std::list::iterator cci_rd_it(cci_reads_.end()); for (auto it = cci_reads_.begin(), ie = cci_reads_.end(); it != ie; ++it) { - if (it->cycles_left > 0) + if (it->cycles_left > 0) { it->cycles_left -= 1; + } if ((cci_rd_it == ie) && (it->cycles_left == 0)) { cci_rd_it = it; } @@ -356,8 +343,9 @@ class opae_sim::Impl { // schedule CCI write responses std::list::iterator cci_wr_it(cci_writes_.end()); for (auto it = cci_writes_.begin(), ie = cci_writes_.end(); it != ie; ++it) { - if (it->cycles_left > 0) + if (it->cycles_left > 0) { it->cycles_left -= 1; + } if ((cci_wr_it == ie) && (it->cycles_left == 0)) { cci_wr_it = it; } @@ -375,7 +363,7 @@ class opae_sim::Impl { // send CCI read response (ensure mmio disabled) device_->vcp2af_sRxPort_c0_rspValid = 0; if (!mmio_req_enabled - && (cci_rd_it != cci_reads_.end())) { + && (cci_rd_it != cci_reads_.end())) { device_->vcp2af_sRxPort_c0_rspValid = 1; device_->vcp2af_sRxPort_c0_hdr_resp_type = 0; memcpy(device_->vcp2af_sRxPort_c0_data, cci_rd_it->data.data(), CACHE_BLOCK_SIZE); @@ -419,15 +407,14 @@ class opae_sim::Impl { } void avs_bus_reset() { - for (int b = 0; b < MEMORY_BANKS; ++b) { - pending_mem_reqs_[b].clear(); + for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) { device_->avs_readdatavalid[b] = 0; device_->avs_waitrequest[b] = 0; } } void avs_bus_eval() { - for (int b = 0; b < MEMORY_BANKS; ++b) { + for (int b = 0; b < PLATFORM_MEMORY_BANKS; ++b) { // process memory responses device_->avs_readdatavalid[b] = 0; if (!pending_mem_reqs_[b].empty() @@ -435,7 +422,7 @@ class opae_sim::Impl { auto mem_rd_it = pending_mem_reqs_[b].begin(); auto mem_req = *mem_rd_it; device_->avs_readdatavalid[b] = 1; - memcpy(device_->avs_readdata[b], mem_req->data.data(), MEM_BLOCK_SIZE); + memcpy(device_->avs_readdata[b], mem_req->data.data(), PLATFORM_MEMORY_DATA_SIZE); uint32_t addr = mem_req->addr; pending_mem_reqs_[b].erase(mem_rd_it); delete mem_req; @@ -443,19 +430,20 @@ class opae_sim::Impl { // process memory requests assert(!device_->avs_read[b] || !device_->avs_write[b]); - unsigned byte_addr = (device_->avs_address[b] * MEMORY_BANKS + b) * MEM_BLOCK_SIZE; + uint64_t byte_addr = b * mem_bank_size_ + uint64_t(device_->avs_address[b]) * PLATFORM_MEMORY_DATA_SIZE; if (device_->avs_write[b]) { + // process write request uint64_t byteen = device_->avs_byteenable[b]; uint8_t* data = (uint8_t*)(device_->avs_writedata[b].data()); - for (int i = 0; i < MEM_BLOCK_SIZE; i++) { + for (int i = 0; i < PLATFORM_MEMORY_DATA_SIZE; i++) { if ((byteen >> i) & 0x1) { (*ram_)[byte_addr + i] = data[i]; } } - /*printf("%0ld: [sim] MEM Wr Req: bank=%d, 0x%x, data=0x", timestamp, b, byte_addr); - for (int i = 0; i < MEM_BLOCK_SIZE; i++) { - printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]); + /*printf("%0ld: [sim] MEM Wr Req: bank=%d, addr=0x%lx, byteen=0x%lx, data=0x", timestamp, b, byte_addr, byteen); + for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) { + printf("%02x", data[i]); } printf("\n");*/ @@ -469,22 +457,20 @@ class opae_sim::Impl { dram_queue_.push(mem_req); } else if (device_->avs_read[b]) { + // process read request auto mem_req = new mem_req_t(); mem_req->addr = device_->avs_address[b]; mem_req->bank_id = b; - ram_->read(mem_req->data.data(), byte_addr, MEM_BLOCK_SIZE); + ram_->read(mem_req->data.data(), byte_addr, PLATFORM_MEMORY_DATA_SIZE); mem_req->write = false; mem_req->ready = false; pending_mem_reqs_[b].emplace_back(mem_req); - /*printf("%0ld: [sim] MEM Rd Req: bank=%d, addr=%x, pending={", timestamp, b, mem_req.addr * MEM_BLOCK_SIZE); - for (auto& req : pending_mem_reqs_[b]) { - if (req.cycles_left != 0) - printf(" !%0x", req.addr * MEM_BLOCK_SIZE); - else - printf(" %0x", req.addr * MEM_BLOCK_SIZE); + /*printf("%0ld: [sim] MEM Rd Req: bank=%d, addr=0x%lx, pending={", timestamp, b, byte_addr); + for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) { + printf("%02x", mem_req->data[i]); } - printf("}\n");*/ + printf("\n");*/ // send dram request dram_queue_.push(mem_req); @@ -495,7 +481,7 @@ class opae_sim::Impl { } typedef struct { - std::array data; + std::array data; uint32_t addr; uint32_t bank_id; bool write; @@ -528,9 +514,10 @@ class opae_sim::Impl { bool stop_; std::unordered_map host_buffers_; - int64_t host_buffer_ids_; + uint64_t host_buffer_ids_; + uint64_t mem_bank_size_; - std::list pending_mem_reqs_[MEMORY_BANKS]; + std::list pending_mem_reqs_[PLATFORM_MEMORY_BANKS]; std::list cci_reads_; std::list cci_writes_; @@ -558,6 +545,10 @@ int opae_sim::init() { return impl_->init(); } +void opae_sim::shutdown() { + impl_->shutdown(); +} + int opae_sim::prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags) { return impl_->prepare_buffer(len, buf_addr, wsid, flags); } diff --git a/sim/opaesim/opae_sim.h b/sim/opaesim/opae_sim.h index a04ade0a0..454cc1bf7 100644 --- a/sim/opaesim/opae_sim.h +++ b/sim/opaesim/opae_sim.h @@ -25,6 +25,8 @@ class opae_sim { int init(); + void shutdown(); + int prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags); void release_buffer(uint64_t wsid); diff --git a/sim/opaesim/verilator.vlt b/sim/opaesim/verilator.vlt deleted file mode 100644 index 66a59bd12..000000000 --- a/sim/opaesim/verilator.vlt +++ /dev/null @@ -1,8 +0,0 @@ -`verilator_config - -lint_off -rule BLKANDNBLK -file "*/fpnew/src/*" -lint_off -rule UNOPTFLAT -file "*/fpnew/src/*" -lint_off -file "*/fpnew/src/*" - -lint_off -file "*/afu/opae/ccip/ccip_if_pkg.sv" -lint_off -file "*/afu/opae/local_mem_cfg_pkg.sv" diff --git a/sim/opaesim/verilator.vlt.in b/sim/opaesim/verilator.vlt.in new file mode 100644 index 000000000..0b118e05e --- /dev/null +++ b/sim/opaesim/verilator.vlt.in @@ -0,0 +1,8 @@ +`verilator_config + +lint_off -rule BLKANDNBLK -file "@VORTEX_HOME@/third_party/cvfpu/*" +lint_off -rule UNOPTFLAT -file "@VORTEX_HOME@/third_party/cvfpu/*" +lint_off -file "@VORTEX_HOME@/third_party/cvfpu/*" + +lint_off -file "@VORTEX_HOME@/hw/rtl/afu/opae/ccip/ccip_if_pkg.sv" +lint_off -file "@VORTEX_HOME@/hw/rtl/afu/opae/local_mem_cfg_pkg.sv" diff --git a/sim/opaesim/vortex_afu_shim.sv b/sim/opaesim/vortex_afu_shim.sv index 8c64c8332..e494ada8e 100644 --- a/sim/opaesim/vortex_afu_shim.sv +++ b/sim/opaesim/vortex_afu_shim.sv @@ -1,30 +1,28 @@ // Copyright © 2019-2023 -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -`include "VX_platform.vh" +`include "VX_define.vh" `IGNORE_WARNINGS_BEGIN `include "vortex_afu.vh" `IGNORE_WARNINGS_END -`include "VX_define.vh" - module vortex_afu_shim import local_mem_cfg_pkg::*; import ccip_if_pkg::*; ( // global signals input clk, input reset, // IF signals between CCI and AFU - input logic vcp2af_sRxPort_c0_TxAlmFull, + input logic vcp2af_sRxPort_c0_TxAlmFull, input logic vcp2af_sRxPort_c1_TxAlmFull, input t_ccip_vc vcp2af_sRxPort_c0_hdr_vc_used, @@ -35,15 +33,15 @@ module vortex_afu_shim import local_mem_cfg_pkg::*; import ccip_if_pkg::*; ( input t_ccip_c0_rsp vcp2af_sRxPort_c0_hdr_resp_type, input t_ccip_mdata vcp2af_sRxPort_c0_hdr_mdata, input t_ccip_clData vcp2af_sRxPort_c0_data, - input logic vcp2af_sRxPort_c0_rspValid, - input logic vcp2af_sRxPort_c0_mmioRdValid, - input logic vcp2af_sRxPort_c0_mmioWrValid, + input logic vcp2af_sRxPort_c0_rspValid, + input logic vcp2af_sRxPort_c0_mmioRdValid, + input logic vcp2af_sRxPort_c0_mmioWrValid, input t_ccip_mmioAddr vcp2af_sRxPort_c0_ReqMmioHdr_address, - input logic [1:0] vcp2af_sRxPort_c0_ReqMmioHdr_length, + input logic [1:0] vcp2af_sRxPort_c0_ReqMmioHdr_length, input logic vcp2af_sRxPort_c0_ReqMmioHdr_rsvd, - input t_ccip_tid vcp2af_sRxPort_c0_ReqMmioHdr_tid, - + input t_ccip_tid vcp2af_sRxPort_c0_ReqMmioHdr_tid, + input t_ccip_vc vcp2af_sRxPort_c1_hdr_vc_used, input logic vcp2af_sRxPort_c1_hdr_rsvd1, input logic vcp2af_sRxPort_c1_hdr_hit_miss, @@ -51,51 +49,51 @@ module vortex_afu_shim import local_mem_cfg_pkg::*; import ccip_if_pkg::*; ( input logic vcp2af_sRxPort_c1_hdr_rsvd0, input t_ccip_clNum vcp2af_sRxPort_c1_hdr_cl_num, input t_ccip_c1_rsp vcp2af_sRxPort_c1_hdr_resp_type, - input t_ccip_mdata vcp2af_sRxPort_c1_hdr_mdata, - input logic vcp2af_sRxPort_c1_rspValid, - + input t_ccip_mdata vcp2af_sRxPort_c1_hdr_mdata, + input logic vcp2af_sRxPort_c1_rspValid, + output t_ccip_vc af2cp_sTxPort_c0_hdr_vc_sel, - output logic [1:0] af2cp_sTxPort_c0_hdr_rsvd1, + output logic [1:0] af2cp_sTxPort_c0_hdr_rsvd1, output t_ccip_clLen af2cp_sTxPort_c0_hdr_cl_len, output t_ccip_c0_req af2cp_sTxPort_c0_hdr_req_type, - output logic [5:0] af2cp_sTxPort_c0_hdr_rsvd0, + output logic [5:0] af2cp_sTxPort_c0_hdr_rsvd0, output t_ccip_clAddr af2cp_sTxPort_c0_hdr_address, output t_ccip_mdata af2cp_sTxPort_c0_hdr_mdata, - output logic af2cp_sTxPort_c0_valid, + output logic af2cp_sTxPort_c0_valid, output logic [5:0] af2cp_sTxPort_c1_hdr_rsvd2, output t_ccip_vc af2cp_sTxPort_c1_hdr_vc_sel, output logic af2cp_sTxPort_c1_hdr_sop, - output logic af2cp_sTxPort_c1_hdr_rsvd1, + output logic af2cp_sTxPort_c1_hdr_rsvd1, output t_ccip_clLen af2cp_sTxPort_c1_hdr_cl_len, output t_ccip_c1_req af2cp_sTxPort_c1_hdr_req_type, - output logic [5:0] af2cp_sTxPort_c1_hdr_rsvd0, + output logic [5:0] af2cp_sTxPort_c1_hdr_rsvd0, output t_ccip_clAddr af2cp_sTxPort_c1_hdr_address, output t_ccip_mdata af2cp_sTxPort_c1_hdr_mdata, - output t_ccip_clData af2cp_sTxPort_c1_data, - output logic af2cp_sTxPort_c1_valid, + output t_ccip_clData af2cp_sTxPort_c1_data, + output logic af2cp_sTxPort_c1_valid, output t_ccip_tid af2cp_sTxPort_c2_hdr_tid, - output logic af2cp_sTxPort_c2_mmioRdValid, - output t_ccip_mmioData af2cp_sTxPort_c2_data, - + output logic af2cp_sTxPort_c2_mmioRdValid, + output t_ccip_mmioData af2cp_sTxPort_c2_data, + // Avalon signals for local memory access - output t_local_mem_data avs_writedata [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], - input t_local_mem_data avs_readdata [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], - output t_local_mem_addr avs_address [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], - input logic avs_waitrequest [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], - output logic avs_write [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], - output logic avs_read [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], - output t_local_mem_byte_mask avs_byteenable [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], - output t_local_mem_burst_cnt avs_burstcount [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS], - input avs_readdatavalid [`PLATFORM_PARAM_LOCAL_MEMORY_BANKS] + output t_local_mem_data avs_writedata [`PLATFORM_MEMORY_BANKS], + input t_local_mem_data avs_readdata [`PLATFORM_MEMORY_BANKS], + output t_local_mem_addr avs_address [`PLATFORM_MEMORY_BANKS], + input logic avs_waitrequest [`PLATFORM_MEMORY_BANKS], + output logic avs_write [`PLATFORM_MEMORY_BANKS], + output logic avs_read [`PLATFORM_MEMORY_BANKS], + output t_local_mem_byte_mask avs_byteenable [`PLATFORM_MEMORY_BANKS], + output t_local_mem_burst_cnt avs_burstcount [`PLATFORM_MEMORY_BANKS], + input avs_readdatavalid [`PLATFORM_MEMORY_BANKS] ); t_if_ccip_Rx cp2af_sRxPort; t_if_ccip_Tx af2cp_sTxPort; vortex_afu #( - .NUM_LOCAL_MEM_BANKS(`PLATFORM_PARAM_LOCAL_MEMORY_BANKS) + .NUM_LOCAL_MEM_BANKS(`PLATFORM_MEMORY_BANKS) ) afu ( .clk(clk), .reset(reset), @@ -119,7 +117,7 @@ always @ (*) begin c0_RxHdr.reqMmioHdr.address = vcp2af_sRxPort_c0_ReqMmioHdr_address; c0_RxHdr.reqMmioHdr.length = vcp2af_sRxPort_c0_ReqMmioHdr_length; c0_RxHdr.reqMmioHdr.rsvd = vcp2af_sRxPort_c0_ReqMmioHdr_rsvd; - c0_RxHdr.reqMmioHdr.tid = vcp2af_sRxPort_c0_ReqMmioHdr_tid; + c0_RxHdr.reqMmioHdr.tid = vcp2af_sRxPort_c0_ReqMmioHdr_tid; end else begin c0_RxHdr.rspMemHdr.vc_used = vcp2af_sRxPort_c0_hdr_vc_used; c0_RxHdr.rspMemHdr.rsvd1 = vcp2af_sRxPort_c0_hdr_rsvd1; @@ -134,7 +132,7 @@ end assign cp2af_sRxPort.c0TxAlmFull = vcp2af_sRxPort_c0_TxAlmFull; assign cp2af_sRxPort.c1TxAlmFull = vcp2af_sRxPort_c1_TxAlmFull; -assign cp2af_sRxPort.c0.hdr = c0_RxHdr; +assign cp2af_sRxPort.c0.hdr = c0_RxHdr; assign cp2af_sRxPort.c0.data = vcp2af_sRxPort_c0_data; assign cp2af_sRxPort.c0.rspValid = vcp2af_sRxPort_c0_rspValid; assign cp2af_sRxPort.c0.mmioRdValid = vcp2af_sRxPort_c0_mmioRdValid; @@ -147,8 +145,8 @@ assign cp2af_sRxPort.c1.hdr.format = vcp2af_sRxPort_c1_hdr_format; assign cp2af_sRxPort.c1.hdr.rsvd0 = vcp2af_sRxPort_c1_hdr_rsvd0; assign cp2af_sRxPort.c1.hdr.cl_num = vcp2af_sRxPort_c1_hdr_cl_num; assign cp2af_sRxPort.c1.hdr.resp_type = vcp2af_sRxPort_c1_hdr_resp_type; -assign cp2af_sRxPort.c1.hdr.mdata = vcp2af_sRxPort_c1_hdr_mdata; -assign cp2af_sRxPort.c1.rspValid = vcp2af_sRxPort_c1_rspValid; +assign cp2af_sRxPort.c1.hdr.mdata = vcp2af_sRxPort_c1_hdr_mdata; +assign cp2af_sRxPort.c1.rspValid = vcp2af_sRxPort_c1_rspValid; assign af2cp_sTxPort_c0_hdr_vc_sel = af2cp_sTxPort.c0.hdr.vc_sel; assign af2cp_sTxPort_c0_hdr_rsvd1 = af2cp_sTxPort.c0.hdr.rsvd1; @@ -168,11 +166,11 @@ assign af2cp_sTxPort_c1_hdr_req_type = af2cp_sTxPort.c1.hdr.req_type; assign af2cp_sTxPort_c1_hdr_rsvd0 = af2cp_sTxPort.c1.hdr.rsvd0; assign af2cp_sTxPort_c1_hdr_address = af2cp_sTxPort.c1.hdr.address; assign af2cp_sTxPort_c1_hdr_mdata = af2cp_sTxPort.c1.hdr.mdata; -assign af2cp_sTxPort_c1_data = af2cp_sTxPort.c1.data; +assign af2cp_sTxPort_c1_data = af2cp_sTxPort.c1.data; assign af2cp_sTxPort_c1_valid = af2cp_sTxPort.c1.valid; -assign af2cp_sTxPort_c2_hdr_tid = af2cp_sTxPort.c2.hdr.tid; -assign af2cp_sTxPort_c2_mmioRdValid = af2cp_sTxPort.c2.mmioRdValid; +assign af2cp_sTxPort_c2_hdr_tid = af2cp_sTxPort.c2.hdr.tid; +assign af2cp_sTxPort_c2_mmioRdValid = af2cp_sTxPort.c2.mmioRdValid; assign af2cp_sTxPort_c2_data = af2cp_sTxPort.c2.data; endmodule diff --git a/sim/rtlsim/Makefile b/sim/rtlsim/Makefile index 3deffc759..50b9c5c1f 100644 --- a/sim/rtlsim/Makefile +++ b/sim/rtlsim/Makefile @@ -26,12 +26,12 @@ DBG_TRACE_FLAGS += -DDBG_TRACE_GBAR DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS) -RTL_PKGS = $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv $(RTL_DIR)/core/VX_trace_pkg.sv +RTL_PKGS = $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - RTL_PKGS += $(THIRD_PARTY_DIR)/fpnew/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/fpnew/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv - FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src + RTL_PKGS += $(THIRD_PARTY_DIR)/cvfpu/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv + FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -I$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -I$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/cvfpu/src endif RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) @@ -48,20 +48,20 @@ endif VL_FLAGS = --exe VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic -VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO -Wno-GENUNNAMED +VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO VL_FLAGS += --x-initial unique --x-assign unique -VL_FLAGS += $(SRC_DIR)/verilator.vlt +VL_FLAGS += verilator.vlt VL_FLAGS += -DSIMULATION -DSV_DPI VL_FLAGS += -DXLEN_$(XLEN) VL_FLAGS += $(CONFIGS) VL_FLAGS += $(RTL_INCLUDE) VL_FLAGS += $(RTL_PKGS) -VL_FLAGS += --cc $(TOP) --top-module $(TOP) +VL_FLAGS += --cc Vortex --top-module Vortex CXXFLAGS += $(CONFIGS) # Enable Verilator multithreaded simulation -THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())') +THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(mp.cpu_count())') VL_FLAGS += -j $(THREADS) #VL_FLAGS += --threads $(THREADS) diff --git a/sim/rtlsim/main.cpp b/sim/rtlsim/main.cpp index ea0ba9b95..16ce79550 100644 --- a/sim/rtlsim/main.cpp +++ b/sim/rtlsim/main.cpp @@ -33,13 +33,11 @@ const char* program = nullptr; static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "rh?")) != -1) { + while ((c = getopt(argc, argv, "rh")) != -1) { switch (c) { case 'h': - case '?': - show_usage(); - exit(0); - break; + show_usage(); + exit(0); default: show_usage(); exit(-1); diff --git a/sim/rtlsim/processor.cpp b/sim/rtlsim/processor.cpp index e5e00f49e..32f4b4e1e 100644 --- a/sim/rtlsim/processor.cpp +++ b/sim/rtlsim/processor.cpp @@ -13,13 +13,7 @@ #include "processor.h" -#ifdef AXI_BUS -#include "VVortex_axi.h" -typedef VVortex_axi Device; -#else #include "VVortex.h" -typedef VVortex Device; -#endif #ifdef VCD_OUTPUT #include @@ -41,14 +35,6 @@ typedef VVortex Device; #include #include -#ifndef MEMORY_BANKS - #ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS - #define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS - #else - #define MEMORY_BANKS 2 - #endif -#endif - #ifndef MEM_CLOCK_RATIO #define MEM_CLOCK_RATIO 1 #endif @@ -106,7 +92,7 @@ void sim_trace_enable(bool enable) { class Processor::Impl { public: Impl() : dram_sim_(MEM_CLOCK_RATIO) { - // force random values for unitialized signals + // force random values for uninitialized signals Verilated::randReset(VERILATOR_RESET_VALUE); Verilated::randSeed(50); @@ -114,7 +100,7 @@ class Processor::Impl { Verilated::assertOn(false); // create RTL module instance - device_ = new Device(); + device_ = new VVortex(); #ifdef VCD_OUTPUT Verilated::traceEverOn(true); @@ -125,18 +111,6 @@ class Processor::Impl { ram_ = nullptr; - #ifndef NDEBUG - // dump device configuration - std::cout << "CONFIGS:" - << " num_threads=" << NUM_THREADS - << ", num_warps=" << NUM_WARPS - << ", num_cores=" << NUM_CORES - << ", num_clusters=" << NUM_CLUSTERS - << ", socket_size=" << SOCKET_SIZE - << ", local_mem_base=0x" << std::hex << LMEM_BASE_ADDR << std::dec - << ", num_barriers=" << NUM_BARRIERS - << std::endl; - #endif // reset the device this->reset(); @@ -169,14 +143,16 @@ class Processor::Impl { } void run() { - #ifndef NDEBUG std::cout << std::dec << timestamp << ": [sim] run()" << std::endl; #endif - // start execution - running_ = true; + // reset device + this->reset(); + + // start device_->reset = 0; + device_->mem_req_ready = 1; // wait on device to go busy while (!device_->busy) { @@ -188,8 +164,8 @@ class Processor::Impl { this->tick(); } - // reset device - this->reset(); + // stop + device_->reset = 1; this->cout_flush(); } @@ -198,18 +174,18 @@ class Processor::Impl { device_->dcr_wr_valid = 1; device_->dcr_wr_addr = addr; device_->dcr_wr_data = value; - while (device_->dcr_wr_valid) { - this->tick(); - } + this->tick(); + device_->dcr_wr_valid = 0; + this->tick(); } private: void reset() { - running_ = false; + this->mem_bus_reset(); + this->dcr_bus_reset(); print_bufs_.clear(); - pending_mem_reqs_.clear(); { @@ -217,13 +193,6 @@ class Processor::Impl { std::swap(dram_queue_, empty); } - mem_rd_rsp_active_ = false; - mem_wr_rsp_active_ = false; - - this->mem_bus_reset(); - - this->dcr_bus_reset(); - device_->reset = 1; for (int i = 0; i < RESET_DELAY; ++i) { @@ -240,13 +209,11 @@ class Processor::Impl { this->eval(); this->mem_bus_eval(0); - this->dcr_bus_eval(0); device_->clk = 1; this->eval(); this->mem_bus_eval(1); - this->dcr_bus_eval(1); dram_sim_.tick(); @@ -281,161 +248,6 @@ class Processor::Impl { ++timestamp; } -#ifdef AXI_BUS - - void mem_bus_reset() { - device_->m_axi_wready[0] = 0; - device_->m_axi_awready[0] = 0; - device_->m_axi_arready[0] = 0; - device_->m_axi_rvalid[0] = 0; - device_->m_axi_bvalid[0] = 0; - } - - void mem_bus_eval(bool clk) { - if (!clk) { - mem_rd_rsp_ready_ = device_->m_axi_rready[0]; - mem_wr_rsp_ready_ = device_->m_axi_bready[0]; - return; - } - - if (ram_ == nullptr) { - device_->m_axi_wready[0] = 0; - device_->m_axi_awready[0] = 0; - device_->m_axi_arready[0] = 0; - return; - } - - // process memory read responses - if (mem_rd_rsp_active_ - && device_->m_axi_rvalid[0] && mem_rd_rsp_ready_) { - mem_rd_rsp_active_ = false; - } - if (!mem_rd_rsp_active_) { - if (!pending_mem_reqs_.empty() - && (*pending_mem_reqs_.begin())->ready - && !(*pending_mem_reqs_.begin())->write) { - auto mem_rsp_it = pending_mem_reqs_.begin(); - auto mem_rsp = *mem_rsp_it; - /* - printf("%0ld: [sim] MEM Rd Rsp: addr=0x%0lx, data=0x", timestamp, mem_rsp->addr); - for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { - printf("%02x", mem_rsp->block[i]); - } - printf("\n"); - */ - device_->m_axi_rvalid[0] = 1; - device_->m_axi_rid[0] = mem_rsp->tag; - device_->m_axi_rresp[0] = 0; - device_->m_axi_rlast[0] = 1; - memcpy(device_->m_axi_rdata[0].data(), mem_rsp->block.data(), MEM_BLOCK_SIZE); - pending_mem_reqs_.erase(mem_rsp_it); - mem_rd_rsp_active_ = true; - delete mem_rsp; - } else { - device_->m_axi_rvalid[0] = 0; - } - } - - // process memory write responses - if (mem_wr_rsp_active_ - && device_->m_axi_bvalid[0] && mem_wr_rsp_ready_) { - mem_wr_rsp_active_ = false; - } - if (!mem_wr_rsp_active_) { - if (!pending_mem_reqs_.empty() - && (*pending_mem_reqs_.begin())->ready - && (*pending_mem_reqs_.begin())->write) { - auto mem_rsp_it = pending_mem_reqs_.begin(); - auto mem_rsp = *mem_rsp_it; - /* - printf("%0ld: [sim] MEM Wr Rsp: addr=0x%0lx\n", timestamp, mem_rsp->addr); - */ - device_->m_axi_bvalid[0] = 1; - device_->m_axi_bid[0] = mem_rsp->tag; - device_->m_axi_bresp[0] = 0; - pending_mem_reqs_.erase(mem_rsp_it); - mem_wr_rsp_active_ = true; - delete mem_rsp; - } else { - device_->m_axi_bvalid[0] = 0; - } - } - - // select the memory bank - uint32_t req_addr = device_->m_axi_wvalid[0] ? device_->m_axi_awaddr[0] : device_->m_axi_araddr[0]; - - // process memory requests - if ((device_->m_axi_wvalid[0] || device_->m_axi_arvalid[0]) && running_) { - if (device_->m_axi_wvalid[0]) { - auto byteen = device_->m_axi_wstrb[0]; - auto base_addr = device_->m_axi_awaddr[0]; - auto data = (uint8_t*)device_->m_axi_wdata[0].data(); - - if (base_addr >= uint64_t(IO_COUT_ADDR) - && base_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) { - // process console output - for (int i = 0; i < MEM_BLOCK_SIZE; i++) { - if ((byteen >> i) & 0x1) { - auto& ss_buf = print_bufs_[i]; - char c = data[i]; - ss_buf << c; - if (c == '\n') { - std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush; - ss_buf.str(""); - } - } - } - } else { - // process writes - /* - printf("%0ld: [sim] MEM Wr: addr=0x%0lx, byteen=0x", timestamp, base_addr); - for (int i = (MEM_BLOCK_SIZE/4)-1; i >= 0; --i) { - printf("%x", (int)((byteen >> (4 * i)) & 0xf)); - } - printf(", data=0x"); - for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { - printf("%02x", data[i]); - } - printf("\n"); - */ - for (int i = 0; i < MEM_BLOCK_SIZE; i++) { - if ((byteen >> i) & 0x1) { - (*ram_)[base_addr + i] = data[i]; - } - } - - auto mem_req = new mem_req_t(); - mem_req->tag = device_->m_axi_awid[0]; - mem_req->addr = device_->m_axi_awaddr[0]; - mem_req->write = true; - mem_req->ready = false; - pending_mem_reqs_.emplace_back(mem_req); - - // send dram request - dram_queue_.push(mem_req); - } - } else { - // process reads - auto mem_req = new mem_req_t(); - mem_req->tag = device_->m_axi_arid[0]; - mem_req->addr = device_->m_axi_araddr[0]; - ram_->read(mem_req->block.data(), device_->m_axi_araddr[0], MEM_BLOCK_SIZE); - mem_req->write = false; - mem_req->ready = false; - pending_mem_reqs_.emplace_back(mem_req); - - // send dram request - dram_queue_.push(mem_req); - } - } - - device_->m_axi_wready[0] = running_; - device_->m_axi_awready[0] = running_; - device_->m_axi_arready[0] = running_; - } - -#else - void mem_bus_reset() { device_->mem_req_ready = 0; device_->mem_rsp_valid = 0; @@ -447,46 +259,35 @@ class Processor::Impl { return; } - if (ram_ == nullptr) { - device_->mem_req_ready = 0; - return; - } - // process memory read responses - if (mem_rd_rsp_active_ - && device_->mem_rsp_valid && mem_rd_rsp_ready_) { - mem_rd_rsp_active_ = false; + if (device_->mem_rsp_valid && mem_rd_rsp_ready_) { + device_->mem_rsp_valid = 0; } - if (!mem_rd_rsp_active_) { + if (!device_->mem_rsp_valid) { if (!pending_mem_reqs_.empty() && (*pending_mem_reqs_.begin())->ready) { - device_->mem_rsp_valid = 1; auto mem_rsp_it = pending_mem_reqs_.begin(); auto mem_rsp = *mem_rsp_it; - /* - printf("%0ld: [sim] MEM Rd Rsp: tag=0x%0lx, addr=0x%0lx, data=0x", timestamp, mem_rsp->tag, mem_rsp->addr); + /*printf("%0ld: [sim] MEM Rd Rsp: tag=0x%0lx, addr=0x%0lx, data=0x", timestamp, mem_rsp->tag, mem_rsp->addr); for (int i = MEM_BLOCK_SIZE-1; i >= 0; --i) { - printf("%02x", mem_rsp->block[i]); + printf("%02x", mem_rsp->data[i]); } printf("\n"); */ - memcpy(VDataCast::get(device_->mem_rsp_data), mem_rsp->block.data(), MEM_BLOCK_SIZE); + device_->mem_rsp_valid = 1; + memcpy(VDataCast::get(device_->mem_rsp_data), mem_rsp->data.data(), MEM_BLOCK_SIZE); device_->mem_rsp_tag = mem_rsp->tag; pending_mem_reqs_.erase(mem_rsp_it); - mem_rd_rsp_active_ = true; delete mem_rsp; - } else { - device_->mem_rsp_valid = 0; } } // process memory requests - if (device_->mem_req_valid && running_) { + if (device_->mem_req_valid && device_->mem_req_ready) { uint64_t byte_addr = (device_->mem_req_addr * MEM_BLOCK_SIZE); if (device_->mem_req_rw) { auto byteen = device_->mem_req_byteen; auto data = VDataCast::get(device_->mem_req_data); - if (byte_addr >= uint64_t(IO_COUT_ADDR) && byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) { // process console output @@ -536,7 +337,7 @@ class Processor::Impl { mem_req->addr = byte_addr; mem_req->write = false; mem_req->ready = false; - ram_->read(mem_req->block.data(), byte_addr, MEM_BLOCK_SIZE); + ram_->read(mem_req->data.data(), byte_addr, MEM_BLOCK_SIZE); pending_mem_reqs_.emplace_back(mem_req); //printf("%0ld: [sim] MEM Rd Req: addr=0x%0lx, tag=0x%0lx\n", timestamp, byte_addr, device_->mem_req_tag); @@ -545,25 +346,12 @@ class Processor::Impl { dram_queue_.push(mem_req); } } - - device_->mem_req_ready = running_; } -#endif - void dcr_bus_reset() { device_->dcr_wr_valid = 0; } - void dcr_bus_eval(bool clk) { - if (!clk) { - return; - } - if (device_->dcr_wr_valid) { - device_->dcr_wr_valid = 0; - } - } - void wait(uint32_t cycles) { for (int i = 0; i < cycles; ++i) { this->tick(); @@ -573,8 +361,8 @@ class Processor::Impl { private: typedef struct { - Device* device; - std::array block; + VVortex* device; + std::array data; uint64_t addr; uint64_t tag; bool write; @@ -589,21 +377,15 @@ class Processor::Impl { DramSim dram_sim_; - Device* device_; + VVortex* device_; #ifdef VCD_OUTPUT VerilatedVcdC *tfp_; #endif - RAM* ram_; - - bool mem_rd_rsp_active_; bool mem_rd_rsp_ready_; - bool mem_wr_rsp_active_; - bool mem_wr_rsp_ready_; - - bool running_; + RAM* ram_; }; /////////////////////////////////////////////////////////////////////////////// diff --git a/sim/rtlsim/verilator.vlt b/sim/rtlsim/verilator.vlt deleted file mode 100644 index 9cfccbeb4..000000000 --- a/sim/rtlsim/verilator.vlt +++ /dev/null @@ -1,5 +0,0 @@ -`verilator_config - -lint_off -rule BLKANDNBLK -file "*/fpnew/src/*" -lint_off -rule UNOPTFLAT -file "*/fpnew/src/*" -lint_off -file "*/fpnew/src/*" diff --git a/sim/rtlsim/verilator.vlt.in b/sim/rtlsim/verilator.vlt.in new file mode 100644 index 000000000..56de6b2cf --- /dev/null +++ b/sim/rtlsim/verilator.vlt.in @@ -0,0 +1,5 @@ +`verilator_config + +lint_off -rule BLKANDNBLK -file "@VORTEX_HOME@/third_party/cvfpu/*" +lint_off -rule UNOPTFLAT -file "@VORTEX_HOME@/third_party/cvfpu/*" +lint_off -file "@VORTEX_HOME@/third_party/cvfpu/*" diff --git a/sim/simx/Makefile b/sim/simx/Makefile index 33120b13c..31fde7023 100644 --- a/sim/simx/Makefile +++ b/sim/simx/Makefile @@ -14,10 +14,6 @@ CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/src CXXFLAGS += -DXLEN_$(XLEN) CXXFLAGS += $(CONFIGS) -ifeq ($(VM_ENABLE), 1) -CXXFLAGS += -DVM_ENABLE -endif - LDFLAGS += $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a LDFLAGS += -Wl,-rpath,$(THIRD_PARTY_DIR)/ramulator -L$(THIRD_PARTY_DIR)/ramulator -lramulator diff --git a/sim/simx/arch.h b/sim/simx/arch.h index d72b4ce11..6becf5c91 100644 --- a/sim/simx/arch.h +++ b/sim/simx/arch.h @@ -33,7 +33,7 @@ class Arch { uint64_t local_mem_base_; public: - Arch(uint16_t num_threads, uint16_t num_warps, uint16_t num_cores) + Arch(uint16_t num_threads, uint16_t num_warps, uint16_t num_cores) : num_threads_(num_threads) , num_warps_(num_warps) , num_cores_(num_cores) @@ -70,6 +70,7 @@ class Arch { uint16_t socket_size() const { return socket_size_; } + }; } \ No newline at end of file diff --git a/sim/simx/cache_sim.cpp b/sim/simx/cache_sim.cpp index 4f357f195..27a73ba72 100644 --- a/sim/simx/cache_sim.cpp +++ b/sim/simx/cache_sim.cpp @@ -170,6 +170,25 @@ struct bank_req_t { } }; +inline std::ostream &operator<<(std::ostream &os, const bank_req_t& req) { + os << "set=" << req.set_id << ", rw=" << req.write; + os << std::dec << ", type=" << req.type; + os << ", tag=0x" << std::hex << req.tag; + os << ", req_tags={"; + bool first_port = true; + for (auto& port : req.ports) { + if (port.valid) { + if (!first_port) os << ", "; + first_port = false; + os << "[" << std::dec << port.req_id << "]=0x" << std::hex << port.req_tag; + } + } + os << "}"; + os << std::dec << ", cid=" << req.cid; + os << " (#" << req.uuid << ")"; + return os; +} + struct mshr_entry_t { bank_req_t bank_req; uint32_t line_id; @@ -514,6 +533,7 @@ class CacheSim::Impl { bank_req.type = bank_req_t::Core; bank_req.write = core_req.write; pipeline_req = bank_req; + DT(3, simobject_->name() << " core-req: " << core_req); } if (core_req.write) @@ -541,7 +561,7 @@ class CacheSim::Impl { uint64_t tag = mem_rsp.tag >> params_.log2_num_inputs; MemRsp core_rsp{tag, mem_rsp.cid, mem_rsp.uuid}; simobject_->CoreRspPorts.at(req_id).push(core_rsp, config_.latency); - DT(3, simobject_->name() << " core-rsp: " << core_rsp); + DT(3, simobject_->name() << " bypass-core-rsp: " << core_rsp); } void processBypassRequest(const MemReq& core_req, uint32_t req_id) { @@ -549,13 +569,13 @@ class CacheSim::Impl { MemReq mem_req(core_req); mem_req.tag = (core_req.tag << params_.log2_num_inputs) + req_id; bypass_switch_->ReqIn.at(1).push(mem_req, 1); - DT(3, simobject_->name() << " dram-req: " << mem_req); + DT(3, simobject_->name() << " bypass-dram-req: " << mem_req); } if (core_req.write && config_.write_reponse) { MemRsp core_rsp{core_req.tag, core_req.cid, core_req.uuid}; simobject_->CoreRspPorts.at(req_id).push(core_rsp, 1); - DT(3, simobject_->name() << " core-rsp: " << core_rsp); + DT(3, simobject_->name() << " bypass-core-rsp: " << core_rsp); } } @@ -693,6 +713,7 @@ class CacheSim::Impl { // allocate MSHR auto mshr_id = bank.mshr.allocate(pipeline_req, (free_line_id != -1) ? free_line_id : repl_line_id); + DT(3, simobject_->name() << "-bank" << bank_id << " mshr-enqueue: " << pipeline_req); // send fill request if (!mshr_pending) { diff --git a/sim/simx/constants.h b/sim/simx/constants.h index 0c707b55c..c651bbfc4 100644 --- a/sim/simx/constants.h +++ b/sim/simx/constants.h @@ -13,6 +13,8 @@ #pragma once +#include + #ifndef RAM_PAGE_SIZE #define RAM_PAGE_SIZE 4096 #endif @@ -21,14 +23,14 @@ #define MEM_CLOCK_RATIO 1 #endif -#define LSU_WORD_SIZE (XLEN / 8) -#define LSU_CHANNELS NUM_LSU_LANES -#define LSU_NUM_REQS (NUM_LSU_BLOCKS * LSU_CHANNELS) +inline constexpr int LSU_WORD_SIZE = (XLEN / 8); +inline constexpr int LSU_CHANNELS = NUM_LSU_LANES; +inline constexpr int LSU_NUM_REQS = (NUM_LSU_BLOCKS * LSU_CHANNELS); -#define DCACHE_WORD_SIZE LSU_LINE_SIZE -#define DCACHE_CHANNELS UP((NUM_LSU_LANES * (XLEN / 8)) / DCACHE_WORD_SIZE) -#define DCACHE_NUM_REQS (NUM_LSU_BLOCKS * DCACHE_CHANNELS) +inline constexpr int DCACHE_WORD_SIZE = LSU_LINE_SIZE; +inline constexpr int DCACHE_CHANNELS = UP((NUM_LSU_LANES * (XLEN / 8)) / DCACHE_WORD_SIZE); +inline constexpr int DCACHE_NUM_REQS = (NUM_LSU_BLOCKS * DCACHE_CHANNELS); -#define NUM_SOCKETS UP(NUM_CORES / SOCKET_SIZE) +inline constexpr int NUM_SOCKETS = UP(NUM_CORES / SOCKET_SIZE); -#define PER_ISSUE_WARPS NUM_WARPS / ISSUE_WIDTH \ No newline at end of file +inline constexpr int PER_ISSUE_WARPS = NUM_WARPS / ISSUE_WIDTH; \ No newline at end of file diff --git a/sim/simx/core.cpp b/sim/simx/core.cpp index 82af146a3..537230a80 100644 --- a/sim/simx/core.cpp +++ b/sim/simx/core.cpp @@ -129,12 +129,14 @@ Core::Core(const SimContext& ctx, dispatchers_.at((int)FUType::FPU) = SimPlatform::instance().create_object(arch, 2, NUM_FPU_BLOCKS, NUM_FPU_LANES); dispatchers_.at((int)FUType::LSU) = SimPlatform::instance().create_object(arch, 2, NUM_LSU_BLOCKS, NUM_LSU_LANES); dispatchers_.at((int)FUType::SFU) = SimPlatform::instance().create_object(arch, 2, NUM_SFU_BLOCKS, NUM_SFU_LANES); - + dispatchers_.at((int)FUType::TCU) = SimPlatform::instance().create_object(arch, 2, NUM_TCU_BLOCKS, NUM_TCU_LANES); + // initialize execute units func_units_.at((int)FUType::ALU) = SimPlatform::instance().create_object(this); func_units_.at((int)FUType::FPU) = SimPlatform::instance().create_object(this); func_units_.at((int)FUType::LSU) = SimPlatform::instance().create_object(this); func_units_.at((int)FUType::SFU) = SimPlatform::instance().create_object(this); + func_units_.at((int)FUType::TCU) = SimPlatform::instance().create_object(this); // bind commit arbiters for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) { diff --git a/sim/simx/core.h b/sim/simx/core.h index 339d76fb8..e538350dd 100644 --- a/sim/simx/core.h +++ b/sim/simx/core.h @@ -178,6 +178,7 @@ class Core : public SimObject { friend class AluUnit; friend class FpuUnit; friend class SfuUnit; + friend class TcuUnit; }; } // namespace vortex diff --git a/sim/simx/decode.cpp b/sim/simx/decode.cpp index dba57c4ef..7a37e79e2 100644 --- a/sim/simx/decode.cpp +++ b/sim/simx/decode.cpp @@ -51,6 +51,7 @@ static const std::unordered_map sc_instTable = { {Opcode::EXT2, InstType::R4}, {Opcode::R_W, InstType::R}, {Opcode::I_W, InstType::I}, + {Opcode::TCU, InstType::I}, }; enum Constants { @@ -86,7 +87,7 @@ static const char* op_string(const Instr &instr) { auto func3 = instr.getFunc3(); auto func7 = instr.getFunc7(); auto rd = instr.getRDest(); - auto rs2 = instr.getRSrc(1); + auto rs1 = instr.getRSrc(1); auto imm = instr.getImm(); switch (opcode) { @@ -343,7 +344,7 @@ static const char* op_string(const Instr &instr) { std::abort(); } case 0x60: - switch (rs2) { + switch (rs1) { case 0: return "FCVT.W.S"; case 1: return "FCVT.WU.S"; case 2: return "FCVT.L.S"; @@ -352,7 +353,7 @@ static const char* op_string(const Instr &instr) { std::abort(); } case 0x61: - switch (rs2) { + switch (rs1) { case 0: return "FCVT.W.D"; case 1: return "FCVT.WU.D"; case 2: return "FCVT.L.D"; @@ -361,7 +362,7 @@ static const char* op_string(const Instr &instr) { std::abort(); } case 0x68: - switch (rs2) { + switch (rs1) { case 0: return "FCVT.S.W"; case 1: return "FCVT.S.WU"; case 2: return "FCVT.S.L"; @@ -370,7 +371,7 @@ static const char* op_string(const Instr &instr) { std::abort(); } case 0x69: - switch (rs2) { + switch (rs1) { case 0: return "FCVT.D.W"; case 1: return "FCVT.D.WU"; case 2: return "FCVT.D.L"; @@ -395,7 +396,7 @@ static const char* op_string(const Instr &instr) { switch (func3) { case 0: return "TMC"; case 1: return "WSPAWN"; - case 2: return rs2 ? "SPLIT.N" : "SPLIT"; + case 2: return rs1 ? "SPLIT.N" : "SPLIT"; case 3: return "JOIN"; case 4: return "BAR"; case 5: return rd ? "PRED.N" : "PRED"; @@ -405,6 +406,16 @@ static const char* op_string(const Instr &instr) { default: std::abort(); } + + case Opcode::TCU: + switch(func3) + { + case 0: return "ML"; // Matrix Load + case 1: return "MS"; // Matrix Store + case 2: return "MATMUL"; // Matrix Multiply + default: + std::abort(); + } default: std::abort(); } @@ -460,6 +471,11 @@ std::shared_ptr Emulator::decode(uint32_t code) const { switch (op) { case Opcode::FCI: switch (func7) { + case 0x20: // FCVT.S.D + case 0x21: // FCVT.D.S + instr->setDestReg(rd, RegType::Float); + instr->addSrcReg(rs1, RegType::Float); + break; case 0x2c: // FSQRT.S case 0x2d: // FSQRT.D instr->setDestReg(rd, RegType::Float); @@ -543,6 +559,14 @@ std::shared_ptr Emulator::decode(uint32_t code) const { case InstType::I: { switch (op) { + case Opcode::TCU: { + instr->setDestReg(rs1, RegType::Integer); + instr->addSrcReg(rs1, RegType::Integer); + instr->setFunc3(func3); + instr->setFunc7(func7); + auto imm = code >> shift_rs2; + instr->setImm(sext(imm, width_i_imm)); + } break; case Opcode::I: case Opcode::I_W: case Opcode::JALR: diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index 8a95f4dbd..05b3497c4 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -30,17 +30,6 @@ using namespace vortex; -Emulator::ipdom_entry_t::ipdom_entry_t(const ThreadMask &tmask, Word PC) - : tmask(tmask) - , PC(PC) - , fallthrough(false) -{} - -Emulator::ipdom_entry_t::ipdom_entry_t(const ThreadMask &tmask) - : tmask(tmask) - , fallthrough(true) -{} - Emulator::warp_t::warp_t(const Arch& arch) : ireg_file(arch.num_threads(), std::vector(MAX_NUM_REGS)) , freg_file(arch.num_threads(), std::vector(MAX_NUM_REGS)) @@ -85,7 +74,11 @@ Emulator::Emulator(const Arch &arch, const DCRS &dcrs, Core* core) , core_(core) , warps_(arch.num_warps(), arch) , barriers_(arch.num_barriers(), 0) - , ipdom_size_((arch.num_threads()-1) * 2) + , ipdom_size_(arch.num_threads()-1) + // [TBC] Currently, tradeoff between scratchpad size & performance has not been evaluated. Scratchpad is + // considered to be big enough to hold input tiles for one output tile. + // In future versions, scratchpad size should be fixed to an appropriate value. + , scratchpad(std::vector(32 * 32 * 32768)) { this->clear(); } @@ -122,6 +115,11 @@ void Emulator::clear() { active_warps_.set(0); warps_[0].tmask.set(0); wspawn_.valid = false; + + for (auto& reg : scratchpad) + { + reg = 0; + } } void Emulator::attach_ram(RAM* ram) { @@ -173,10 +171,8 @@ instr_trace_t* Emulator::step() { uint64_t uuid = 0; #endif - DPH(1, "Fetch: cid=" << core_->id() << ", wid=" << scheduled_warp << ", tmask="); - for (uint32_t i = 0, n = arch_.num_threads(); i < n; ++i) - DPN(1, warp.tmask.test(i)); - DPN(1, ", PC=0x" << std::hex << warp.PC << " (#" << std::dec << uuid << ")" << std::endl); + DP(1, "Fetch: cid=" << core_->id() << ", wid=" << scheduled_warp << ", tmask=" << ThreadMaskOS(warp.tmask, arch_.num_threads()) + << ", PC=0x" << std::hex << warp.PC << " (#" << std::dec << uuid << ")"); // Fetch uint32_t instr_code = 0; @@ -428,6 +424,21 @@ void Emulator::cout_flush() { case (addr + (VX_CSR_MPM_BASE_H-VX_CSR_MPM_BASE)) : return ((value >> 32) & 0xFFFFFFFF) #endif +Word Emulator::get_tiles() +{ + return mat_size; +} + +Word Emulator::get_tc_size() +{ + return tc_size; +} + +Word Emulator::get_tc_num() +{ + return tc_num; +} + Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) { auto core_perf = core_->perf_stats(); switch (addr) { @@ -463,6 +474,10 @@ Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) { case VX_CSR_NUM_CORES: return uint32_t(arch_.num_cores()) * arch_.num_clusters(); case VX_CSR_LOCAL_MEM_BASE: return arch_.local_mem_base(); case VX_CSR_MSCRATCH: return csr_mscratch_; + case VX_MAT_MUL_SIZE: return mat_size; + case VX_TC_NUM: return tc_num; + case VX_TC_SIZE: return tc_size; + CSR_READ_64(VX_CSR_MCYCLE, core_perf.cycles); CSR_READ_64(VX_CSR_MINSTRET, core_perf.instrs); default: @@ -581,6 +596,16 @@ void Emulator::set_csr(uint32_t addr, Word value, uint32_t tid, uint32_t wid) { case VX_CSR_MNSTATUS: case VX_CSR_MCAUSE: break; + case VX_MAT_MUL_SIZE: + mat_size = value; + break; + case VX_TC_NUM: + tc_num = value; + break; + case VX_TC_SIZE: + tc_size = value; + break; + default: { std::cout << "Error: invalid CSR write addr=0x" << std::hex << addr << ", value=0x" << value << std::dec << std::endl; std::abort(); @@ -599,4 +624,16 @@ void Emulator::update_fcrs(uint32_t fflags, uint32_t tid, uint32_t wid) { this->set_csr(VX_CSR_FCSR, this->get_csr(VX_CSR_FCSR, tid, wid) | fflags, tid, wid); this->set_csr(VX_CSR_FFLAGS, this->get_csr(VX_CSR_FFLAGS, tid, wid) | fflags, tid, wid); } +} + +// For riscv-vector test functionality, ecall and ebreak must trap +// These instructions are used in the vector tests to stop execution of the test +// Therefore, without these instructions, undefined and incorrect behavior happens +// +// For now, we need these instructions to trap for testing the riscv-vector isa +void Emulator::trigger_ecall() { + active_warps_.reset(); +} +void Emulator::trigger_ebreak() { + active_warps_.reset(); } \ No newline at end of file diff --git a/sim/simx/emulator.h b/sim/simx/emulator.h index 0b2d6ac03..5f1b91d5d 100644 --- a/sim/simx/emulator.h +++ b/sim/simx/emulator.h @@ -56,14 +56,23 @@ class Emulator { bool wspawn(uint32_t num_warps, Word nextPC); int get_exitcode() const; - + + Word get_tiles(); + Word get_tc_size(); + Word get_tc_num(); + private: struct ipdom_entry_t { - ipdom_entry_t(const ThreadMask &tmask, Word PC); - ipdom_entry_t(const ThreadMask &tmask); - - ThreadMask tmask; + ipdom_entry_t(const ThreadMask &orig_tmask, const ThreadMask &else_tmask, Word PC) + : orig_tmask (orig_tmask) + , else_tmask (else_tmask) + , PC (PC) + , fallthrough(false) + {} + + ThreadMask orig_tmask; + ThreadMask else_tmask; Word PC; bool fallthrough; }; @@ -113,6 +122,10 @@ class Emulator { void update_fcrs(uint32_t fflags, uint32_t tid, uint32_t wid); + void trigger_ecall(); // Re-added for riscv-vector test functionality + + void trigger_ebreak(); // Re-added for riscv-vector test functionality + const Arch& arch_; const DCRS& dcrs_; Core* core_; @@ -125,6 +138,10 @@ class Emulator { uint32_t ipdom_size_; Word csr_mscratch_; wspawn_t wspawn_; + std::vector scratchpad; + uint32_t mat_size; + uint32_t tc_size; + uint32_t tc_num; }; } diff --git a/sim/simx/execute.cpp b/sim/simx/execute.cpp index db098726b..dd8253571 100644 --- a/sim/simx/execute.cpp +++ b/sim/simx/execute.cpp @@ -25,6 +25,7 @@ #include "emulator.h" #include "instr.h" #include "core.h" +#include "VX_types.h" using namespace vortex; @@ -829,7 +830,11 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { trace->fetch_stall = true; switch (csr_addr) { case 0x000: // RV32I: ECALL + this->trigger_ecall(); // Re-added for riscv-vector test functionality + break; case 0x001: // RV32I: EBREAK + this->trigger_ebreak(); // Re-added for riscv-vector test functionality + break; case 0x002: // RV32I: URET case 0x102: // RV32I: SRET case 0x302: // RV32I: MRET @@ -1328,7 +1333,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { auto stack_size = warp.ipdom_stack.size(); ThreadMask then_tmask, else_tmask; - auto not_pred = rsrc2 & 0x1; + auto not_pred = (rsrc1 != 0); for (uint32_t t = 0; t < num_threads; ++t) { auto cond = (warp.ireg_file.at(t).at(rsrc0) & 0x1) ^ not_pred; then_tmask[t] = warp.tmask.test(t) && cond; @@ -1347,11 +1352,9 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { } else { next_tmask = else_tmask; } - // push reconvergence thread mask onto the stack - warp.ipdom_stack.emplace(warp.tmask); - // push not taken thread mask onto the stack + // push reconvergence and not-taken thread mask onto the stack auto ntaken_tmask = ~next_tmask & warp.tmask; - warp.ipdom_stack.emplace(ntaken_tmask, next_pc); + warp.ipdom_stack.emplace(warp.tmask, ntaken_tmask, next_pc); } // return divergent state for (uint32_t t = thread_start; t < num_threads; ++t) { @@ -1372,11 +1375,14 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { std::cout << "IPDOM stack is empty!\n" << std::flush; std::abort(); } - next_tmask = warp.ipdom_stack.top().tmask; - if (!warp.ipdom_stack.top().fallthrough) { + if (warp.ipdom_stack.top().fallthrough) { + next_tmask = warp.ipdom_stack.top().orig_tmask; + warp.ipdom_stack.pop(); + } else { + next_tmask = warp.ipdom_stack.top().else_tmask; next_pc = warp.ipdom_stack.top().PC; + warp.ipdom_stack.top().fallthrough = true; } - warp.ipdom_stack.pop(); } } break; case 4: { @@ -1415,6 +1421,171 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { std::abort(); } } break; + case Opcode::TCU: + { //TODO - make it data-type flexible + uint32_t mem_bytes = 1; + DP(3, "mem_bytes=" << mem_bytes << std::endl); + uint16_t tc_size = this->get_csr(VX_TC_SIZE, 0, wid); + uint32_t TC_per_warp = this->get_csr(VX_TC_NUM, 0, wid); + + DP(3, "tc_size=" << tc_size << std::endl); + DP(3, "TC_per_warp=" << TC_per_warp << std::endl); + + //Number of loads - dependant on the thread config + uint32_t n_tiles = this->get_csr(VX_MAT_MUL_SIZE, 0, wid); //CSR instruction before MLOAD will ensure that this csr has value + int num_data_per_thread; + int num_data_per_thread_st; + uint32_t num_threads_actv; + uint32_t num_threads_actv_st; + uint32_t data_bytes_load; + uint32_t data_bytes_store; + uint32_t num_threads_per_tc = MAX (1, num_threads/TC_per_warp); + + //LOAD + if(num_threads > tc_size*tc_size*n_tiles*TC_per_warp) + { + num_threads_actv = tc_size*tc_size*n_tiles*TC_per_warp; + num_data_per_thread = 1; + } + else + { + num_threads_actv = num_threads; + num_data_per_thread = (tc_size*tc_size*n_tiles)/num_threads_per_tc; + } + data_bytes_load = mem_bytes*num_data_per_thread; + + //STORE + if(num_threads > tc_size*tc_size*TC_per_warp) + { + num_threads_actv_st = tc_size*tc_size*TC_per_warp; + num_data_per_thread_st = 1; + } + else + { + num_threads_actv_st = num_threads; + num_data_per_thread_st = (tc_size*tc_size)/num_threads_per_tc; + } + data_bytes_store = mem_bytes*num_data_per_thread_st; + + DP(3, "Num Tiles=" << n_tiles << std::endl); + + switch (func3) { + case 0: + { //Matrix Load + + DP (4, "TCU LOAD"); + trace->fu_type = FUType::LSU; + trace->lsu_type = LsuType::TCU_LOAD; + + trace->src_regs[0] = {RegType::Integer, rsrc0}; + auto trace_data = std::make_shared(num_threads); + trace->data = trace_data; + + for (uint32_t t = thread_start; t < num_threads_actv; ++t) + { + if (!warp.tmask.test(t)) + continue; + DP(3, "Thread ID" << t); + + uint32_t base_addr = rsdata[t][0].i ; + trace_data->mem_addrs.at(t) = {base_addr, data_bytes_load}; + + //Load A or B (depends on immsrc) + int loop_offset = 0; + DP(3, "n_tiles = " << n_tiles << "; num_data_per_thread = " << num_data_per_thread <dcache_read(temp_ref, (base_addr+(n*mem_bytes)+(loop_offset*mem_bytes)), mem_bytes); + + scratchpad[loop_offset + (immsrc*(n_tiles)*tc_size*tc_size) + (t*num_data_per_thread) + n] = *temp_ref; + DP(3, "Scratchpad Index: " << loop_offset + (immsrc*(n_tiles)*tc_size*tc_size) + (t*num_data_per_thread) + n << ", Value: " << scratchpad[loop_offset + (immsrc*(n_tiles)*tc_size*tc_size) + (t*num_data_per_thread) + n]); + } + } + rd_write = true; + } break; + case 1: + { + DP(4, "TCU STORE"); + trace->fu_type = FUType::LSU; + trace->lsu_type = LsuType::TCU_STORE; + + auto trace_data = std::make_shared(num_threads); + trace->data = trace_data; + + for (uint32_t t = thread_start; t < num_threads_actv_st; ++t) + { + if (!warp.tmask.test(t)) + continue; + + DP(3, "Thread ID" << t); + uint32_t base_addr = rsdata[t][0].i ; + + trace_data->mem_addrs.at(t) = {base_addr, data_bytes_store}; + + //Store C + for (int n=0; ndcache_write(temp_ref, base_addr+(n*mem_bytes), mem_bytes); + } + } + //Clear the scratchpad + for(long unsigned int i=0 ; i < scratchpad.size(); i++) + { + scratchpad[i] = 0; + } + } + break; + case 2: + { //Matrix Multiply + DP(4, "TCU MULTIPLY MAT"); + trace->fu_type = FUType::TCU; + trace->tcu_type = TCUType::TCU_MUL; + uint32_t threads_per_tc = MAX (1, num_threads/TC_per_warp); + for (uint32_t t = thread_start; t < num_threads_actv; ++t) + { + if (!warp.tmask.test(t)) + continue; + + DP(3, "Thread ID" << t); + //TC operation [only 1 thread in 1 warp needs to do this] + if (t%threads_per_tc == 0) + { + /* + // TODO : Fix needed for functional correctness + // TODO : change to systolic array implementation + uint32_t thread_offset = t*(tc_size*tc_size); + + int loop_offset = 0; + int offset_b = n_tiles*n_tiles*n_tiles*tc_size*tc_size; + uint32_t accu_offset = (n_tiles)*(n_tiles)*(n_tiles)*tc_size*tc_size*2; + for(int tiles = 0 ; tiles < n_tiles ; tiles++) //What's the HW implication of this?? A counter implementation? + { + for (int i = 0; i < tc_size; i++) { //ROW-1 + for (int j = 0; j < tc_size; j++) { //COL-2 + int sum = 0; + for (int k = 0; k < tc_size; k++) + { //COL-1 + sum = sum + scratchpad[loop_offset + thread_offset*n_tiles + i * tc_size + k] *scratchpad[loop_offset + thread_offset*n_tiles + offset_b + (k * tc_size + j)]; + } + scratchpad[accu_offset + thread_offset +(i * tc_size + j)] += sum; //[i * col2 + j] = sum + DP(3, "Scratchpad Index: " << accu_offset + (i * tc_size + j) << " , Value=" << scratchpad[accu_offset + (i * tc_size + j)]); + } + } + loop_offset += tc_size*tc_size; //Move to the next tiled matmul fragment + } + */ + } + } + + }break; + default: + std::abort(); + } + } break; default: std::abort(); } @@ -1471,10 +1642,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { } if (warp.tmask != next_tmask) { - DPH(3, "*** New Tmask="); - for (uint32_t i = 0; i < num_threads; ++i) - DPN(3, next_tmask.test(i)); - DPN(3, std::endl); + DP(3, "*** New Tmask=" << ThreadMaskOS(next_tmask, num_threads)); warp.tmask = next_tmask; if (!next_tmask.any()) { active_warps_.reset(wid); diff --git a/sim/simx/func_unit.cpp b/sim/simx/func_unit.cpp index b03551e08..a182f6d8b 100644 --- a/sim/simx/func_unit.cpp +++ b/sim/simx/func_unit.cpp @@ -21,6 +21,7 @@ #include "core.h" #include "constants.h" #include "cache_sim.h" +#include "VX_types.h" using namespace vortex; @@ -165,7 +166,7 @@ void LsuUnit::tick() { continue; } - bool is_write = (trace->lsu_type == LsuType::STORE); + bool is_write = ((trace->lsu_type == LsuType::STORE) || (trace->lsu_type == LsuType::TCU_STORE)); // check pending queue capacity if (!is_write && state.pending_rd_reqs.full()) { @@ -191,6 +192,7 @@ void LsuUnit::tick() { } } uint32_t tag = 0; + if (!is_write) { tag = state.pending_rd_reqs.allocate({trace, lsu_req.mask}); } @@ -220,6 +222,96 @@ void LsuUnit::tick() { input.pop(); } } +/* TO BE FIXED:Tensor_core code + send_request is not used anymore. Need to be modified number of load +*/ +/* +int LsuUnit::send_requests(instr_trace_t* trace, int block_idx, int tag) { + int count = 0; + + auto trace_data = std::dynamic_pointer_cast(trace->data); + bool is_write = ((trace->lsu_type == LsuType::STORE) || (trace->lsu_type == LsuType::TCU_STORE)); + + uint16_t req_per_thread = 1; + if ((trace->lsu_type == LsuType::TCU_LOAD) || (trace->lsu_type == LsuType::TCU_STORE)) + { + req_per_thread= (1>(trace_data->mem_addrs.at(0).size)/4)? 1: ((trace_data->mem_addrs.at(0).size)/4); + } + + auto t0 = trace->pid * NUM_LSU_LANES; + + for (uint32_t i = 0; i < NUM_LSU_LANES; ++i) { + uint32_t t = t0 + i; + if (!trace->tmask.test(t)) + continue; + + int req_idx = block_idx * LSU_CHANNELS + (i % LSU_CHANNELS); + auto& dcache_req_port = core_->lsu_demux_.at(req_idx)->ReqIn; + + auto mem_addr = trace_data->mem_addrs.at(t); + auto type = get_addr_type(mem_addr.addr); + // DT(3, "addr_type = " << type << ", " << *trace); + uint32_t mem_bytes = 1; + for (int i = 0; i < req_per_thread; i++) + { + MemReq mem_req; + mem_req.addr = mem_addr.addr + (i*mem_bytes); + mem_req.write = is_write; + mem_req.type = type; + mem_req.tag = tag; + mem_req.cid = trace->cid; + mem_req.uuid = trace->uuid; + + dcache_req_port.push(mem_req, 1); + DT(3, "mem-req: addr=0x" << std::hex << mem_req.addr << ", tag=" << tag + << ", lsu_type=" << trace->lsu_type << ", rid=" << req_idx << ", addr_type=" << mem_req.type << ", " << *trace); + + if (is_write) { + ++core_->perf_stats_.stores; + } else { + ++core_->perf_stats_.loads; + ++pending_loads_; + } + + ++count; + } + } + return count; +} +*/ + +/////////////////////////////////////////////////////////////////////////////// + +TcuUnit::TcuUnit(const SimContext& ctx, Core* core) + : FuncUnit(ctx, core, "TCU") + {} + +void TcuUnit::tick() { + + for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) { + auto& input = Inputs.at(i); + if (input.empty()) + continue; + auto& output = Outputs.at(i); + auto trace = input.front(); + uint32_t n_tiles = core_->emulator_.get_tiles(); + uint32_t tc_size = core_->emulator_.get_tc_size(); + + switch (trace->tcu_type) { + case TCUType::TCU_MUL: + { //mat size = n_tiles * tc_size + int matmul_latency = (n_tiles * tc_size) + tc_size + tc_size; + output.push(trace, matmul_latency); + DT(3, "matmul_latency = " << matmul_latency << ", " << *trace); + break; + } + default: + std::abort(); + } + DT(3, "pipeline-execute: op=" << trace->tcu_type << ", " << *trace); + input.pop(); + } +} /////////////////////////////////////////////////////////////////////////////// diff --git a/sim/simx/func_unit.h b/sim/simx/func_unit.h index 76dd16173..2250d70c5 100644 --- a/sim/simx/func_unit.h +++ b/sim/simx/func_unit.h @@ -98,6 +98,14 @@ class LsuUnit : public FuncUnit { /////////////////////////////////////////////////////////////////////////////// +class TcuUnit : public FuncUnit { +public: + TcuUnit(const SimContext& ctx, Core*); + void tick(); +}; + +/////////////////////////////////////////////////////////////////////////////// + class SfuUnit : public FuncUnit { public: SfuUnit(const SimContext& ctx, Core*); diff --git a/sim/simx/instr.h b/sim/simx/instr.h index f97a19eac..061b4deb0 100644 --- a/sim/simx/instr.h +++ b/sim/simx/instr.h @@ -46,7 +46,7 @@ enum class Opcode { EXT1 = 0x0b, EXT2 = 0x2b, EXT3 = 0x5b, - EXT4 = 0x7b + TCU = 0x7b }; enum class InstType { diff --git a/sim/simx/instr_trace.h b/sim/simx/instr_trace.h index bbf4eab59..5ed98d265 100644 --- a/sim/simx/instr_trace.h +++ b/sim/simx/instr_trace.h @@ -77,6 +77,7 @@ struct instr_trace_t { AluType alu_type; FpuType fpu_type; SfuType sfu_type; + TCUType tcu_type; }; ITraceData::Ptr data; diff --git a/sim/simx/main.cpp b/sim/simx/main.cpp index cd375b516..797f6bb9d 100644 --- a/sim/simx/main.cpp +++ b/sim/simx/main.cpp @@ -40,7 +40,7 @@ const char* program = nullptr; static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "t:w:c:rsh?")) != -1) { + while ((c = getopt(argc, argv, "t:w:c:rsh")) != -1) { switch (c) { case 't': num_threads = atoi(optarg); @@ -55,13 +55,12 @@ static void parse_args(int argc, char **argv) { showStats = true; break; case 'h': - case '?': - show_usage(); - exit(0); + show_usage(); + exit(0); break; default: - show_usage(); - exit(-1); + show_usage(); + exit(-1); } } diff --git a/sim/simx/mem_sim.cpp b/sim/simx/mem_sim.cpp index a38f4c01c..37ea3bb88 100644 --- a/sim/simx/mem_sim.cpp +++ b/sim/simx/mem_sim.cpp @@ -77,7 +77,7 @@ class MemSim::Impl { if (!rsp_args->request.write) { MemRsp mem_rsp{rsp_args->request.tag, rsp_args->request.cid, rsp_args->request.uuid}; rsp_args->simobject->MemRspPorts.at(rsp_args->i).push(mem_rsp, 1); - DT(3, rsp_args->simobject->name() << " mem-rsp: " << mem_rsp << " bank: " << rsp_args->i); + DT(3, rsp_args->simobject->name() << " mem-rsp: bank=" << rsp_args->i << ", " << mem_rsp); } delete rsp_args; }, @@ -90,7 +90,7 @@ class MemSim::Impl { continue; } - DT(3, simobject_->name() << " mem-req: " << mem_req << " bank: " << i); + DT(3, simobject_->name() << " mem-req: bank=" << i << ", " << mem_req); simobject_->MemReqPorts.at(i).pop(); counter++; diff --git a/sim/simx/types.h b/sim/simx/types.h index b452dd379..77b351150 100644 --- a/sim/simx/types.h +++ b/sim/simx/types.h @@ -25,6 +25,7 @@ #include #include #include "debug.h" +#include namespace vortex { @@ -58,6 +59,27 @@ typedef std::bitset WarpMask; /////////////////////////////////////////////////////////////////////////////// +class ThreadMaskOS { +public: + ThreadMaskOS(const ThreadMask& mask, int size) + : mask_(mask) + , size_(size) + {} + + friend std::ostream& operator<<(std::ostream& os, const ThreadMaskOS& wrapper) { + for (int i = 0; i < wrapper.size_; ++i) { + os << wrapper.mask_[i]; + } + return os; + } + +private: + const ThreadMask& mask_; + int size_; +}; + +/////////////////////////////////////////////////////////////////////////////// + enum class RegType { None, Integer, @@ -82,6 +104,7 @@ enum class FUType { LSU, FPU, SFU, + TCU, Count }; @@ -91,6 +114,7 @@ inline std::ostream &operator<<(std::ostream &os, const FUType& type) { case FUType::LSU: os << "LSU"; break; case FUType::FPU: os << "FPU"; break; case FUType::SFU: os << "SFU"; break; + case FUType::TCU: os << "TCU"; break; default: assert(false); } return os; @@ -122,14 +146,30 @@ inline std::ostream &operator<<(std::ostream &os, const AluType& type) { enum class LsuType { LOAD, + TCU_LOAD, STORE, + TCU_STORE, FENCE }; +enum class TCUType { + TCU_MUL +}; + +inline std::ostream &operator<<(std::ostream &os, const TCUType& type) { + switch (type) { + case TCUType::TCU_MUL: os << "TCU MUL"; break; + default: assert(false); + } + return os; +} + inline std::ostream &operator<<(std::ostream &os, const LsuType& type) { switch (type) { case LsuType::LOAD: os << "LOAD"; break; + case LsuType::TCU_LOAD: os << "TCU_LOAD"; break; case LsuType::STORE: os << "STORE"; break; + case LsuType::TCU_STORE: os << "TCU_STORE"; break; case LsuType::FENCE: os << "FENCE"; break; default: assert(false); } @@ -260,17 +300,18 @@ struct LsuReq { }; inline std::ostream &operator<<(std::ostream &os, const LsuReq& req) { - os << "rw=" << req.write << ", mask=" << req.mask << ", "; + os << "rw=" << req.write << ", mask=" << req.mask << ", addr={"; + bool first_addr = true; for (size_t i = 0; i < req.mask.size(); ++i) { - os << "addr" << i << "="; + if (!first_addr) os << ", "; + first_addr = false; if (req.mask.test(i)) { os << "0x" << std::hex << req.addrs.at(i) << std::dec; } else { os << "-"; } - os << ", "; } - os << "tag=0x" << std::hex << req.tag << std::dec << ", cid=" << req.cid; + os << "}, tag=0x" << std::hex << req.tag << std::dec << ", cid=" << req.cid; os << " (#" << req.uuid << ")"; return os; } @@ -443,7 +484,7 @@ class Mux : public SimObject> { , type_(type) , delay_(delay) , cursors_(num_outputs, 0) - , num_reqs_(num_inputs / num_outputs) + , num_reqs_(log2ceil(num_inputs / num_outputs)) { assert(delay != 0); assert(num_inputs <= 32); @@ -467,7 +508,7 @@ class Mux : public SimObject> { void tick() { uint32_t I = Inputs.size(); uint32_t O = Outputs.size(); - uint32_t R = num_reqs_; + uint32_t R = 1 << num_reqs_; // skip bypass mode if (I == O) diff --git a/sim/xrtsim/Makefile b/sim/xrtsim/Makefile index 765e3e268..83efa688f 100644 --- a/sim/xrtsim/Makefile +++ b/sim/xrtsim/Makefile @@ -30,35 +30,37 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_AFU DBG_SCOPE_FLAGS += -DDBG_SCOPE_ISSUE DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU -DBG_SCOPE_FLAGS += -DDBG_SCOPE_MSCHED # AFU parameters -CONFIGS += -DPLATFORM_PROVIDES_LOCAL_MEMORY -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BANKS,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=2 +ifeq (,$(findstring PLATFORM_MEMORY_BANKS,$(CONFIGS))) + CONFIGS += -DPLATFORM_MEMORY_BANKS=2 endif -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=26 +ifeq (,$(findstring PLATFORM_MEMORY_ADDR_WIDTH,$(CONFIGS))) + ifeq ($(XLEN),64) + CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=47 + else + CONFIGS += -DPLATFORM_MEMORY_ADDR_WIDTH=31 + endif endif -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=512 +ifeq (,$(findstring PLATFORM_MEMORY_DATA_WIDTH,$(CONFIGS))) + CONFIGS += -DPLATFORM_MEMORY_DATA_WIDTH=512 endif -ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH,$(CONFIGS))) - CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH=4 +ifeq (,$(findstring PLATFORM_MEMORY_OFFSET,$(CONFIGS))) + CONFIGS += -DPLATFORM_MEMORY_OFFSET=0 endif DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS) SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp -SRCS += $(SRC_DIR)/fpga.cpp $(SRC_DIR)/xrt_sim.cpp +SRCS += $(SRC_DIR)/xrt.cpp $(SRC_DIR)/xrt_sim.cpp -RTL_PKGS += $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv $(RTL_DIR)/core/VX_trace_pkg.sv +RTL_PKGS += $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv FPU_INCLUDE = -I$(RTL_DIR)/fpu ifneq (,$(findstring FPU_FPNEW,$(CONFIGS))) - RTL_PKGS += $(THIRD_PARTY_DIR)/fpnew/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/fpnew/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv - FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src + RTL_PKGS += $(THIRD_PARTY_DIR)/cvfpu/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv + FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -I$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -I$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/cvfpu/src endif RTL_INCLUDE = -I$(SRC_DIR) -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE) RTL_INCLUDE += -I$(AFU_DIR) @@ -66,19 +68,19 @@ RTL_INCLUDE += -I$(AFU_DIR) TOP = vortex_afu_shim VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic -VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO -Wno-GENUNNAMED +VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO VL_FLAGS += --x-initial unique --x-assign unique VL_FLAGS += -DSIMULATION -DSV_DPI VL_FLAGS += -DXLEN_$(XLEN) VL_FLAGS += $(CONFIGS) -VL_FLAGS += $(SRC_DIR)/verilator.vlt +VL_FLAGS += verilator.vlt VL_FLAGS += $(RTL_INCLUDE) VL_FLAGS += $(RTL_PKGS) CXXFLAGS += $(CONFIGS) # Enable Verilator multithreaded simulation -THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())') +THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(mp.cpu_count())') VL_FLAGS += -j $(THREADS) #VL_FLAGS += --threads $(THREADS) diff --git a/sim/xrtsim/verilator.vlt b/sim/xrtsim/verilator.vlt deleted file mode 100644 index 9cfccbeb4..000000000 --- a/sim/xrtsim/verilator.vlt +++ /dev/null @@ -1,5 +0,0 @@ -`verilator_config - -lint_off -rule BLKANDNBLK -file "*/fpnew/src/*" -lint_off -rule UNOPTFLAT -file "*/fpnew/src/*" -lint_off -file "*/fpnew/src/*" diff --git a/sim/xrtsim/verilator.vlt.in b/sim/xrtsim/verilator.vlt.in new file mode 100644 index 000000000..56de6b2cf --- /dev/null +++ b/sim/xrtsim/verilator.vlt.in @@ -0,0 +1,5 @@ +`verilator_config + +lint_off -rule BLKANDNBLK -file "@VORTEX_HOME@/third_party/cvfpu/*" +lint_off -rule UNOPTFLAT -file "@VORTEX_HOME@/third_party/cvfpu/*" +lint_off -file "@VORTEX_HOME@/third_party/cvfpu/*" diff --git a/sim/xrtsim/vortex_afu_shim.sv b/sim/xrtsim/vortex_afu_shim.sv index 648e25e7a..f94617f1e 100644 --- a/sim/xrtsim/vortex_afu_shim.sv +++ b/sim/xrtsim/vortex_afu_shim.sv @@ -11,22 +11,22 @@ // See the License for the specific language governing permissions and // limitations under the License. -`include "VX_platform.vh" `include "vortex_afu.vh" module vortex_afu_shim #( - parameter C_S_AXI_CTRL_ADDR_WIDTH = 8, + parameter C_S_AXI_CTRL_ADDR_WIDTH = 8, parameter C_S_AXI_CTRL_DATA_WIDTH = 32, - parameter C_M_AXI_MEM_ID_WIDTH = `M_AXI_MEM_ID_WIDTH, + parameter C_M_AXI_MEM_ID_WIDTH = `PLATFORM_MEMORY_ID_WIDTH, + parameter C_M_AXI_MEM_DATA_WIDTH = `PLATFORM_MEMORY_DATA_WIDTH, parameter C_M_AXI_MEM_ADDR_WIDTH = 64, - parameter C_M_AXI_MEM_DATA_WIDTH = `VX_MEM_DATA_WIDTH + parameter C_M_AXI_MEM_NUM_BANKS = `PLATFORM_MEMORY_BANKS ) ( // System signals input wire ap_clk, input wire ap_rst_n, // AXI4 master interface - `REPEAT (`M_AXI_MEM_NUM_BANKS, GEN_AXI_MEM, REPEAT_COMMA), + `REPEAT (`PLATFORM_MEMORY_BANKS, GEN_AXI_MEM, REPEAT_COMMA), // AXI4-Lite slave interface input wire s_axi_ctrl_awvalid, @@ -50,35 +50,38 @@ module vortex_afu_shim #( output wire interrupt `IGNORE_WARNINGS_END ); - vortex_afu #( - .C_S_AXI_CTRL_ADDR_WIDTH(C_S_AXI_CTRL_ADDR_WIDTH), - .C_S_AXI_CTRL_DATA_WIDTH(C_S_AXI_CTRL_DATA_WIDTH), - .C_M_AXI_MEM_ID_WIDTH(C_M_AXI_MEM_ID_WIDTH), - .C_M_AXI_MEM_ADDR_WIDTH(C_M_AXI_MEM_ADDR_WIDTH), - .C_M_AXI_MEM_DATA_WIDTH(C_M_AXI_MEM_DATA_WIDTH) - ) afu ( - .ap_clk(ap_clk), - .ap_rst_n(ap_rst_n), - // AXI4 master interface - `REPEAT (`M_AXI_MEM_NUM_BANKS, AXI_MEM_ARGS, REPEAT_COMMA), - .s_axi_ctrl_awvalid(s_axi_ctrl_awvalid), - .s_axi_ctrl_awready(s_axi_ctrl_awready), - .s_axi_ctrl_awaddr(s_axi_ctrl_awaddr), - .s_axi_ctrl_wvalid(s_axi_ctrl_wvalid), - .s_axi_ctrl_wready(s_axi_ctrl_wready), - .s_axi_ctrl_wdata(s_axi_ctrl_wdata), - .s_axi_ctrl_wstrb(s_axi_ctrl_wstrb), - .s_axi_ctrl_arvalid(s_axi_ctrl_arvalid), - .s_axi_ctrl_arready(s_axi_ctrl_arready), - .s_axi_ctrl_araddr(s_axi_ctrl_araddr), - .s_axi_ctrl_rvalid(s_axi_ctrl_rvalid), - .s_axi_ctrl_rready(s_axi_ctrl_rready), - .s_axi_ctrl_rdata(s_axi_ctrl_rdata), - .s_axi_ctrl_rresp(s_axi_ctrl_rresp), - .s_axi_ctrl_bvalid(s_axi_ctrl_bvalid), - .s_axi_ctrl_bready(s_axi_ctrl_bready), - .s_axi_ctrl_bresp(s_axi_ctrl_bresp), - .interrupt(interrupt) - ); + VX_afu_wrap #( + .C_S_AXI_CTRL_ADDR_WIDTH (C_S_AXI_CTRL_ADDR_WIDTH), + .C_S_AXI_CTRL_DATA_WIDTH (C_S_AXI_CTRL_DATA_WIDTH), + .C_M_AXI_MEM_ID_WIDTH (C_M_AXI_MEM_ID_WIDTH), + .C_M_AXI_MEM_DATA_WIDTH (C_M_AXI_MEM_DATA_WIDTH), + .C_M_AXI_MEM_ADDR_WIDTH (C_M_AXI_MEM_ADDR_WIDTH), + .C_M_AXI_MEM_NUM_BANKS (C_M_AXI_MEM_NUM_BANKS) + ) afu_wrap ( + .clk (ap_clk), + .reset (~ap_rst_n), + + `REPEAT (`PLATFORM_MEMORY_BANKS, AXI_MEM_ARGS, REPEAT_COMMA), + + .s_axi_ctrl_awvalid (s_axi_ctrl_awvalid), + .s_axi_ctrl_awready (s_axi_ctrl_awready), + .s_axi_ctrl_awaddr (s_axi_ctrl_awaddr), + .s_axi_ctrl_wvalid (s_axi_ctrl_wvalid), + .s_axi_ctrl_wready (s_axi_ctrl_wready), + .s_axi_ctrl_wdata (s_axi_ctrl_wdata), + .s_axi_ctrl_wstrb (s_axi_ctrl_wstrb), + .s_axi_ctrl_arvalid (s_axi_ctrl_arvalid), + .s_axi_ctrl_arready (s_axi_ctrl_arready), + .s_axi_ctrl_araddr (s_axi_ctrl_araddr), + .s_axi_ctrl_rvalid (s_axi_ctrl_rvalid), + .s_axi_ctrl_rready (s_axi_ctrl_rready), + .s_axi_ctrl_rdata (s_axi_ctrl_rdata), + .s_axi_ctrl_rresp (s_axi_ctrl_rresp), + .s_axi_ctrl_bvalid (s_axi_ctrl_bvalid), + .s_axi_ctrl_bready (s_axi_ctrl_bready), + .s_axi_ctrl_bresp (s_axi_ctrl_bresp), + + .interrupt (interrupt) + ); endmodule diff --git a/sim/xrtsim/fpga.cpp b/sim/xrtsim/xrt.cpp similarity index 62% rename from sim/xrtsim/fpga.cpp rename to sim/xrtsim/xrt.cpp index bc1f0cb07..2123358a0 100644 --- a/sim/xrtsim/fpga.cpp +++ b/sim/xrtsim/xrt.cpp @@ -19,7 +19,7 @@ #include #include #include -#include "fpga.h" +#include "xrt.h" #include "xrt_sim.h" #include #include @@ -30,6 +30,13 @@ using namespace vortex; extern "C" { #endif +typedef struct { + size_t size; + xrt_sim* sim; + uint32_t bank; + uint64_t addr; +} buffer_t; + extern xrtDeviceHandle xrtDeviceOpen(unsigned int index) { if (index != 0) return nullptr; @@ -45,6 +52,8 @@ extern xrtDeviceHandle xrtDeviceOpen(unsigned int index) { extern int xrtXclbinGetXSAName(xrtDeviceHandle /*dhdl*/, char* name, int size, int* ret_size) { static const char* deviceName = "vortex_xrtsim"; if (name) { + if (size < strlen(deviceName) + 1) + return -1; memcpy(name, deviceName, size); } if (ret_size) { @@ -54,6 +63,8 @@ extern int xrtXclbinGetXSAName(xrtDeviceHandle /*dhdl*/, char* name, int size, i } extern int xrtDeviceClose(xrtDeviceHandle dhdl) { + if (dhdl == nullptr) + return -1; auto sim = reinterpret_cast(dhdl); delete sim; return 0; @@ -64,19 +75,38 @@ extern int xrtKernelClose(xrtKernelHandle /*kernelHandle*/) { } extern xrtBufferHandle xrtBOAlloc(xrtDeviceHandle dhdl, size_t size, xrtBufferFlags flags, xrtMemoryGroup grp) { - return 0; + auto sim = reinterpret_cast(dhdl); + uint64_t addr; + int err = sim->mem_alloc(size, grp, &addr); + if (err != 0) + return nullptr; + auto buffer = new buffer_t(); + buffer->size = size; + buffer->bank = grp; + buffer->sim = sim; + buffer->addr = addr; + return buffer; } extern int xrtBOFree(xrtBufferHandle bhdl) { - return 0; + if (bhdl == nullptr) + return -1; + auto buffer = reinterpret_cast(bhdl); + return buffer->sim->mem_free(buffer->bank, buffer->addr); } -extern int xrtBOWrite(xrtBufferHandle bhdl, const void* src, size_t size, size_t seek) { - return 0; +extern int xrtBOWrite(xrtBufferHandle bhdl, const void* src, size_t size, size_t offset) { + if (bhdl == nullptr) + return -1; + auto buffer = reinterpret_cast(bhdl); + return buffer->sim->mem_write(buffer->bank, buffer->addr + offset, size, src); } -extern int xrtBORead(xrtBufferHandle bhdl, void* dst, size_t size, size_t skip) { - return 0; +extern int xrtBORead(xrtBufferHandle bhdl, void* dst, size_t size, size_t offset) { + if (bhdl == nullptr) + return -1; + auto buffer = reinterpret_cast(bhdl); + return buffer->sim->mem_read(buffer->bank, buffer->addr + offset, size, dst); } extern int xrtBOSync(xrtBufferHandle bhdl, enum xclBOSyncDirection dir, size_t size, size_t offset) { @@ -84,11 +114,17 @@ extern int xrtBOSync(xrtBufferHandle bhdl, enum xclBOSyncDirection dir, size_t s } extern int xrtKernelWriteRegister(xrtKernelHandle kernelHandle, uint32_t offset, uint32_t data) { - return 0; + if (kernelHandle == nullptr) + return -1; + auto sim = reinterpret_cast(kernelHandle); + return sim->register_write(offset, data); } -extern int xrtKernelReadRegister(xrtKernelHandle kernelHandle, uint32_t offset, uint32_t* datap) { - return 0; +extern int xrtKernelReadRegister(xrtKernelHandle kernelHandle, uint32_t offset, uint32_t* data) { + if (kernelHandle == nullptr) + return -1; + auto sim = reinterpret_cast(kernelHandle); + return sim->register_read(offset, data); } extern int xrtErrorGetString(xrtDeviceHandle, xrtErrorCode error, char* out, size_t len, size_t* out_len) { diff --git a/sim/xrtsim/fpga.h b/sim/xrtsim/xrt.h similarity index 98% rename from sim/xrtsim/fpga.h rename to sim/xrtsim/xrt.h index f36bbadab..0dbd5cf42 100644 --- a/sim/xrtsim/fpga.h +++ b/sim/xrtsim/xrt.h @@ -94,15 +94,15 @@ xrtBufferHandle xrtBOAlloc(xrtDeviceHandle dhdl, size_t size, xrtBufferFlags fla int xrtBOFree(xrtBufferHandle bhdl); -int xrtBOWrite(xrtBufferHandle bhdl, const void* src, size_t size, size_t seek); +int xrtBOWrite(xrtBufferHandle bhdl, const void* src, size_t size, size_t offset); -int xrtBORead(xrtBufferHandle bhdl, void* dst, size_t size, size_t skip); +int xrtBORead(xrtBufferHandle bhdl, void* dst, size_t size, size_t offset); int xrtBOSync(xrtBufferHandle bhdl, enum xclBOSyncDirection dir, size_t size, size_t offset); int xrtKernelWriteRegister(xrtKernelHandle kernelHandle, uint32_t offset, uint32_t data); -int xrtKernelReadRegister(xrtKernelHandle kernelHandle, uint32_t offset, uint32_t* datap); +int xrtKernelReadRegister(xrtKernelHandle kernelHandle, uint32_t offset, uint32_t* data); int xrtErrorGetString(xrtDeviceHandle, xrtErrorCode error, char* out, size_t len, size_t* out_len); diff --git a/sim/xrtsim/xrt_sim.cpp b/sim/xrtsim/xrt_sim.cpp index 880983bf1..d572b9479 100644 --- a/sim/xrtsim/xrt_sim.cpp +++ b/sim/xrtsim/xrt_sim.cpp @@ -32,22 +32,17 @@ #include #include #include +#include +#include -#ifndef MEMORY_BANKS - #ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS - #define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS - #else - #define MEMORY_BANKS 2 - #endif -#endif +#include + +#define PLATFORM_MEMORY_DATA_SIZE (PLATFORM_MEMORY_DATA_WIDTH/8) #ifndef MEM_CLOCK_RATIO #define MEM_CLOCK_RATIO 1 #endif -#undef MEM_BLOCK_SIZE -#define MEM_BLOCK_SIZE (PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH / 8) - #define CACHE_BLOCK_SIZE 64 #ifndef TRACE_START_TIME @@ -66,6 +61,16 @@ #define CPU_GPU_LATENCY 200 +#if PLATFORM_MEMORY_DATA_WIDTH > 64 + typedef VlWide<(PLATFORM_MEMORY_DATA_WIDTH/32)> Vl_m_data_t; +#else +#if PLATFORM_MEMORY_DATA_WIDTH > 32 + typedef QData Vl_m_data_t; +#else + typedef IData Vl_m_data_t; +#endif +#endif + using namespace vortex; static uint64_t timestamp = 0; @@ -91,6 +96,35 @@ void sim_trace_enable(bool enable) { /////////////////////////////////////////////////////////////////////////////// +#define MP_M_AXI_MEM_EACH(i) \ + m_axi_mem_[i].awvalid = &device_->m_axi_mem_##i##_awvalid; \ + m_axi_mem_[i].awready = &device_->m_axi_mem_##i##_awready; \ + m_axi_mem_[i].awaddr = &device_->m_axi_mem_##i##_awaddr; \ + m_axi_mem_[i].awid = &device_->m_axi_mem_##i##_awid; \ + m_axi_mem_[i].awlen = &device_->m_axi_mem_##i##_awlen; \ + m_axi_mem_[i].wvalid = &device_->m_axi_mem_##i##_wvalid; \ + m_axi_mem_[i].wready = &device_->m_axi_mem_##i##_wready; \ + m_axi_mem_[i].wdata = &device_->m_axi_mem_##i##_wdata; \ + m_axi_mem_[i].wstrb = &device_->m_axi_mem_##i##_wstrb; \ + m_axi_mem_[i].wlast = &device_->m_axi_mem_##i##_wlast; \ + m_axi_mem_[i].arvalid = &device_->m_axi_mem_##i##_arvalid; \ + m_axi_mem_[i].arready = &device_->m_axi_mem_##i##_arready; \ + m_axi_mem_[i].araddr = &device_->m_axi_mem_##i##_araddr; \ + m_axi_mem_[i].arid = &device_->m_axi_mem_##i##_arid; \ + m_axi_mem_[i].arlen = &device_->m_axi_mem_##i##_arlen; \ + m_axi_mem_[i].rvalid = &device_->m_axi_mem_##i##_rvalid; \ + m_axi_mem_[i].rready = &device_->m_axi_mem_##i##_rready; \ + m_axi_mem_[i].rdata = &device_->m_axi_mem_##i##_rdata; \ + m_axi_mem_[i].rlast = &device_->m_axi_mem_##i##_rlast; \ + m_axi_mem_[i].rid = &device_->m_axi_mem_##i##_rid; \ + m_axi_mem_[i].rresp = &device_->m_axi_mem_##i##_rresp; \ + m_axi_mem_[i].bvalid = &device_->m_axi_mem_##i##_bvalid; \ + m_axi_mem_[i].bready = &device_->m_axi_mem_##i##_bready; \ + m_axi_mem_[i].bresp = &device_->m_axi_mem_##i##_bresp; \ + m_axi_mem_[i].bid = &device_->m_axi_mem_##i##_bid; + +#define MP_M_AXI_MEM(n) MP_REPEAT(n, MP_M_AXI_MEM_EACH, ;) + class xrt_sim::Impl { public: Impl() @@ -108,6 +142,12 @@ class xrt_sim::Impl { if (future_.valid()) { future_.wait(); } + for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { + delete mem_alloc_[i]; + } + if (ram_) { + delete ram_; + } #ifdef VCD_OUTPUT if (tfp_) { tfp_->close(); @@ -117,13 +157,10 @@ class xrt_sim::Impl { if (device_) { delete device_; } - if (ram_) { - delete ram_; - } } int init() { - // force random values for unitialized signals + // force random values for uninitialized signals Verilated::randReset(VERILATOR_RESET_VALUE); Verilated::randSeed(50); @@ -140,34 +177,137 @@ class xrt_sim::Impl { tfp_->open("trace.vcd"); #endif + // calculate memory bank size + mem_bank_size_ = 1ull << PLATFORM_MEMORY_ADDR_WIDTH; + + // allocate RAM ram_ = new RAM(0, RAM_PAGE_SIZE); - #ifndef NDEBUG - // dump device configuration - std::cout << "CONFIGS:" - << " num_threads=" << NUM_THREADS - << ", num_warps=" << NUM_WARPS - << ", num_cores=" << NUM_CORES - << ", num_clusters=" << NUM_CLUSTERS - << ", socket_size=" << SOCKET_SIZE - << ", local_mem_base=0x" << std::hex << LMEM_BASE_ADDR << std::dec - << ", num_barriers=" << NUM_BARRIERS - << std::endl; - #endif + // initialize AXI memory interfaces + MP_M_AXI_MEM(PLATFORM_MEMORY_BANKS); + + // initialize memory allocator + for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { + mem_alloc_[i] = new MemoryAllocator(0, mem_bank_size_, 4096, 64); + } + // reset the device this->reset(); + // Turn on assertion after reset + Verilated::assertOn(true); + // launch execution thread future_ = std::async(std::launch::async, [&]{ - while (!stop_) { - std::lock_guard guard(mutex_); - this->tick(); - } + while (!stop_) { + std::lock_guard guard(mutex_); + this->tick(); + } }); return 0; } + int mem_alloc(uint64_t size, uint32_t bank_id, uint64_t* addr) { + if (bank_id >= PLATFORM_MEMORY_BANKS) + return -1; + return mem_alloc_[bank_id]->allocate(size, addr); + } + + int mem_free(uint32_t bank_id, uint64_t addr) { + if (bank_id >= PLATFORM_MEMORY_BANKS) + return -1; + return mem_alloc_[bank_id]->release(addr); + } + + int mem_write(uint32_t bank_id, uint64_t addr, uint64_t size, const void* data) { + std::lock_guard guard(mutex_); + + if (bank_id >= PLATFORM_MEMORY_BANKS) + return -1; + uint64_t base_addr = bank_id * mem_bank_size_ + addr; + ram_->write(data, base_addr, size); + /*printf("%0ld: [sim] xrt-mem-write: bank_id=%0d, addr=0x%lx, size=%ld, data=0x", timestamp, bank_id, base_addr, size); + for (int i = size-1; i >= 0; --i) { + printf("%02x", ((const uint8_t*)data)[i]); + } + printf(")\n");*/ + return 0; + } + + int mem_read(uint32_t bank_id, uint64_t addr, uint64_t size, void* data) { + std::lock_guard guard(mutex_); + + if (bank_id >= PLATFORM_MEMORY_BANKS) + return -1; + uint64_t base_addr = bank_id * mem_bank_size_ + addr; + ram_->read(data, base_addr, size); + /*printf("%0ld: [sim] xrt-mem-read: bank_id=%0d, addr=0x%lx, size=%ld, data=0x", timestamp, bank_id, base_addr, size); + for (int i = size-1; i >= 0; --i) { + printf("%02x", ((uint8_t*)data)[i]); + } + printf(")\n");*/ + return 0; + } + + int register_write(uint32_t offset, uint32_t value) { + std::lock_guard guard(mutex_); + + // write address + //printf("%0ld: [sim] register_write: address=0x%x\n", timestamp, offset); + device_->s_axi_ctrl_awvalid = 1; + device_->s_axi_ctrl_awaddr = offset; + while (!device_->s_axi_ctrl_awready) + this->tick(); + this->tick(); + device_->s_axi_ctrl_awvalid = 0; + + // write data + //printf("%0ld: [sim] register_write: data=0x%x\n", timestamp, value); + device_->s_axi_ctrl_wvalid = 1; + device_->s_axi_ctrl_wdata = value; + device_->s_axi_ctrl_wstrb = 0xf; + while (!device_->s_axi_ctrl_wready) + this->tick(); + this->tick(); + device_->s_axi_ctrl_wvalid = 0; + + // write response + //printf("%0ld: [sim] register_write: response\n", timestamp); + do { + this->tick(); + } while (!device_->s_axi_ctrl_bvalid); + device_->s_axi_ctrl_bready = 1; + this->tick(); + device_->s_axi_ctrl_bready = 0; + //printf("%0ld: [sim] register_write: done\n", timestamp); + return 0; + } + + int register_read(uint32_t offset, uint32_t* value) { + std::lock_guard guard(mutex_); + // read address + //printf("%0ld: [sim] register_read: address=0x%x\n", timestamp, offset); + device_->s_axi_ctrl_arvalid = 1; + device_->s_axi_ctrl_araddr = offset; + while (!device_->s_axi_ctrl_arready) + this->tick(); + this->tick(); + device_->s_axi_ctrl_arvalid = 0; + + // read response + //printf("%0ld: [sim] register_read: response\n", timestamp); + do { + this->tick(); + } while (!device_->s_axi_ctrl_rvalid); + *value = device_->s_axi_ctrl_rdata; + device_->s_axi_ctrl_rready = 1; + this->tick(); + device_->s_axi_ctrl_rready = 0; + //printf("%0ld: [sim] register_read: done (value=0x%x)\n", timestamp, *value); + return 0; + } + private: void reset() { @@ -178,9 +318,9 @@ class xrt_sim::Impl { reqs.clear(); } - { + for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { std::queue empty; - std::swap(dram_queue_, empty); + std::swap(dram_queues_[i], empty); } device_->ap_rst_n = 0; @@ -193,43 +333,38 @@ class xrt_sim::Impl { } device_->ap_rst_n = 1; - - for (int i = 0; i < RESET_DELAY; ++i) { - device_->ap_clk = 0; - this->eval(); - device_->ap_clk = 1; - this->eval(); + for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { + *m_axi_mem_[i].arready = 1; + *m_axi_mem_[i].awready = 1; } - - // Turn on assertion after reset - Verilated::assertOn(true); } void tick() { - this->axi_ctrl_bus_eval(); this->axi_mem_bus_eval(); - if (!dram_queue_.empty()) { - auto mem_req = dram_queue_.front(); - if (dram_sim_.send_request(mem_req->write, mem_req->addr, 0, [](void* arg) { - auto orig_req = reinterpret_cast(arg); - if (orig_req->ready) { - delete orig_req; - } else { - orig_req->ready = true; + for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { + if (!dram_queues_[i].empty()) { + auto mem_req = dram_queues_[i].front(); + if (dram_sim_.send_request(mem_req->write, mem_req->addr, i, [](void* arg) { + auto orig_req = reinterpret_cast(arg); + if (orig_req->ready) { + delete orig_req; + } else { + orig_req->ready = true; + } + }, mem_req)) { + dram_queues_[i].pop(); } - }, mem_req)) { - dram_queue_.pop(); } } + dram_sim_.tick(); + device_->ap_clk = 0; this->eval(); device_->ap_clk = 1; this->eval(); - dram_sim_.tick(); - #ifndef NDEBUG fflush(stdout); #endif @@ -246,76 +381,219 @@ class xrt_sim::Impl { } void axi_ctrl_bus_reset() { - // address write request - device_->s_axi_ctrl_awvalid = 0; - //device_->s_axi_ctrl_awaddr = 0; - - // data write request - device_->s_axi_ctrl_wvalid = 0; - //device_->s_axi_ctrl_wdata = 0; - //device_->s_axi_ctrl_wstrb = 0; - // address read request device_->s_axi_ctrl_arvalid = 0; - //device_->s_axi_ctrl_araddr = 0; + device_->s_axi_ctrl_araddr = 0; // data read response device_->s_axi_ctrl_rready = 0; + // address write request + device_->s_axi_ctrl_awvalid = 0; + device_->s_axi_ctrl_awaddr = 0; + + // data write request + device_->s_axi_ctrl_wvalid = 0; + device_->s_axi_ctrl_wdata = 0; + device_->s_axi_ctrl_wstrb = 0; + // data write response device_->s_axi_ctrl_bready = 0; } - void axi_ctrl_bus_eval() { - //-- - } - void axi_mem_bus_reset() { - // address write request - device_->m_axi_mem_0_awready = 0; + for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { + // address read request + *m_axi_mem_[i].arready = 0; - // data write request - device_->m_axi_mem_0_wready = 0; + // address write request + *m_axi_mem_[i].awready = 0; - // address read request - device_->m_axi_mem_0_arready = 0; + // data write request + *m_axi_mem_[i].wready = 0; - // data read response - device_->m_axi_mem_0_rvalid = 0; - //device_->m_axi_mem_0_rdata = 0; - //device_->m_axi_mem_0_rlast = 0; - //device_->m_axi_mem_0_rid = 0; - //device_->m_axi_mem_0_rresp = 0; + // data read response + *m_axi_mem_[i].rvalid = 0; - // data write response - device_->m_axi_mem_0_bvalid = 0; - //device_->m_axi_mem_0_bresp = 0; - //device_->m_axi_mem_0_bid = 0; + // data write response + *m_axi_mem_[i].bvalid = 0; + + // states + m_axi_states_[i].write_req_pending = false; + } } void axi_mem_bus_eval() { - //-- + for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) { + // handle read responses + if (*m_axi_mem_[i].rvalid && *m_axi_mem_[i].rready) { + *m_axi_mem_[i].rvalid = 0; + } + if (!*m_axi_mem_[i].rvalid) { + if (!pending_mem_reqs_[i].empty() + && (*pending_mem_reqs_[i].begin())->ready + && !(*pending_mem_reqs_[i].begin())->write) { + auto mem_rsp_it = pending_mem_reqs_[i].begin(); + auto mem_rsp = *mem_rsp_it; + *m_axi_mem_[i].rvalid = 1; + *m_axi_mem_[i].rid = mem_rsp->tag; + *m_axi_mem_[i].rresp = 0; + *m_axi_mem_[i].rlast = 1; + memcpy(m_axi_mem_[i].rdata->data(), mem_rsp->data.data(), PLATFORM_MEMORY_DATA_SIZE); + pending_mem_reqs_[i].erase(mem_rsp_it); + delete mem_rsp; + } + } + + // handle write responses + if (*m_axi_mem_[i].bvalid && *m_axi_mem_[i].bready) { + *m_axi_mem_[i].bvalid = 0; + } + if (!*m_axi_mem_[i].bvalid) { + if (!pending_mem_reqs_[i].empty() + && (*pending_mem_reqs_[i].begin())->ready + && (*pending_mem_reqs_[i].begin())->write) { + auto mem_rsp_it = pending_mem_reqs_[i].begin(); + auto mem_rsp = *mem_rsp_it; + *m_axi_mem_[i].bvalid = 1; + *m_axi_mem_[i].bid = mem_rsp->tag; + *m_axi_mem_[i].bresp = 0; + pending_mem_reqs_[i].erase(mem_rsp_it); + delete mem_rsp; + } + } + + // handle read requests + if (*m_axi_mem_[i].arvalid && *m_axi_mem_[i].arready) { + auto mem_req = new mem_req_t(); + mem_req->tag = *m_axi_mem_[i].arid; + mem_req->addr = uint64_t(*m_axi_mem_[i].araddr); + ram_->read(mem_req->data.data(), mem_req->addr, PLATFORM_MEMORY_DATA_SIZE); + mem_req->write = false; + mem_req->ready = false; + pending_mem_reqs_[i].emplace_back(mem_req); + + /*printf("%0ld: [sim] axi-mem-read: bank=%d, addr=0x%lx, tag=0x%x, data=0x", timestamp, i, mem_req->addr, mem_req->tag); + for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) { + printf("%02x", mem_req->data[i]); + } + printf("\n");*/ + + // send dram request + dram_queues_[i].push(mem_req); + } + + if (*m_axi_mem_[i].wready && !m_axi_states_[i].write_req_pending) { + *m_axi_mem_[i].wready = 0; + } + + // handle address write requestsls + if (*m_axi_mem_[i].awvalid && *m_axi_mem_[i].awready && !*m_axi_mem_[i].wready) { + m_axi_states_[i].write_req_addr = *m_axi_mem_[i].awaddr; + m_axi_states_[i].write_req_tag = *m_axi_mem_[i].awid; + // activate data channel + *m_axi_mem_[i].wready = 1; + m_axi_states_[i].write_req_pending = !*m_axi_mem_[i].wvalid; + } + + // handle data write requests + if (*m_axi_mem_[i].wvalid && *m_axi_mem_[i].wready) { + auto byteen = *m_axi_mem_[i].wstrb; + auto data = (uint8_t*)m_axi_mem_[i].wdata->data(); + auto byte_addr = m_axi_states_[i].write_req_addr; + + for (int i = 0; i < PLATFORM_MEMORY_DATA_SIZE; i++) { + if ((byteen >> i) & 0x1) { + (*ram_)[byte_addr + i] = data[i]; + } + } + + auto mem_req = new mem_req_t(); + mem_req->tag = m_axi_states_[i].write_req_tag; + mem_req->addr = byte_addr; + mem_req->write = true; + mem_req->ready = false; + pending_mem_reqs_[i].emplace_back(mem_req); + + /*printf("%0ld: [sim] axi-mem-write: bank=%d, addr=0x%lx, byteen=0x%lx, tag=0x%x, data=0x", timestamp, i, mem_req->addr, byteen, mem_req->tag); + for (int i = PLATFORM_MEMORY_DATA_SIZE-1; i >= 0; --i) { + printf("%02x", data[i]); + } + printf("\n");*/ + + // send dram request + dram_queues_[i].push(mem_req); + + // deactivate data channel + if (m_axi_states_[i].write_req_pending) { + *m_axi_mem_[i].wready = 0; + m_axi_states_[i].write_req_pending = false; + } + } + } } typedef struct { - std::array data; - uint32_t addr; + uint64_t write_req_addr; + uint32_t write_req_tag; + bool write_req_pending; + } m_axi_state_t; + + typedef struct { + std::array data; + uint32_t tag; + uint64_t addr; bool write; bool ready; } mem_req_t; - Vvortex_afu_shim *device_; + typedef struct { + CData* awvalid; + CData* awready; + QData* awaddr; + IData* awid; + CData* awlen; + CData* wvalid; + CData* wready; + Vl_m_data_t* wdata; + QData* wstrb; + CData* wlast; + CData* arvalid; + CData* arready; + QData* araddr; + IData* arid; + CData* arlen; + CData* rvalid; + CData* rready; + Vl_m_data_t* rdata; + CData* rlast; + IData* rid; + CData* rresp; + CData* bvalid; + CData* bready; + CData* bresp; + IData* bid; + } m_axi_mem_t; + + Vvortex_afu_shim* device_; RAM* ram_; DramSim dram_sim_; + uint64_t mem_bank_size_; std::future future_; bool stop_; std::mutex mutex_; - std::list pending_mem_reqs_[MEMORY_BANKS]; + std::list pending_mem_reqs_[PLATFORM_MEMORY_BANKS]; + + m_axi_mem_t m_axi_mem_[PLATFORM_MEMORY_BANKS]; + + MemoryAllocator* mem_alloc_[PLATFORM_MEMORY_BANKS]; + + m_axi_state_t m_axi_states_[PLATFORM_MEMORY_BANKS]; - std::queue dram_queue_; + std::queue dram_queues_[PLATFORM_MEMORY_BANKS]; #ifdef VCD_OUTPUT VerilatedVcdC* tfp_; @@ -334,4 +612,28 @@ xrt_sim::~xrt_sim() { int xrt_sim::init() { return impl_->init(); +} + +int xrt_sim::mem_alloc(uint64_t size, uint32_t bank_id, uint64_t* addr) { + return impl_->mem_alloc(size, bank_id, addr); +} + +int xrt_sim::mem_free(uint32_t bank_id, uint64_t addr) { + return impl_->mem_free(bank_id, addr); +} + +int xrt_sim::mem_write(uint32_t bank_id, uint64_t addr, uint64_t size, const void* data) { + return impl_->mem_write(bank_id, addr, size, data); +} + +int xrt_sim::mem_read(uint32_t bank_id, uint64_t addr, uint64_t size, void* data) { + return impl_->mem_read(bank_id, addr, size, data); +} + +int xrt_sim::register_write(uint32_t offset, uint32_t value) { + return impl_->register_write(offset, value); +} + +int xrt_sim::register_read(uint32_t offset, uint32_t* value) { + return impl_->register_read(offset, value); } \ No newline at end of file diff --git a/sim/xrtsim/xrt_sim.h b/sim/xrtsim/xrt_sim.h index e399c33de..6a2d5d7da 100644 --- a/sim/xrtsim/xrt_sim.h +++ b/sim/xrtsim/xrt_sim.h @@ -25,6 +25,18 @@ class xrt_sim { int init(); + int mem_alloc(uint64_t size, uint32_t bank_id, uint64_t* addr); + + int mem_free(uint32_t bank_id, uint64_t addr); + + int mem_write(uint32_t bank_id, uint64_t addr, uint64_t size, const void* value); + + int mem_read(uint32_t bank_id, uint64_t addr, uint64_t size, void* value); + + int register_write(uint32_t offset, uint32_t value); + + int register_read(uint32_t offset, uint32_t* value); + private: class Impl; diff --git a/tests/Makefile b/tests/Makefile index b141fd41d..1068da2ab 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -13,9 +13,7 @@ regression: $(MAKE) -C regression opencl: -ifneq ($(XLEN),64) $(MAKE) -C opencl -endif riscv: $(MAKE) -C riscv diff --git a/tests/kernel/common.mk b/tests/kernel/common.mk index e3f6b472b..7829ffb14 100644 --- a/tests/kernel/common.mk +++ b/tests/kernel/common.mk @@ -5,6 +5,9 @@ CFLAGS += -march=rv64imafd -mabi=lp64d else CFLAGS += -march=rv32imaf -mabi=ilp32f endif +STARTUP_ADDR ?= 0x80000000 + +VORTEX_KN_PATH ?= $(ROOT_DIR)/kernel LLVM_CFLAGS += --sysroot=$(RISCV_SYSROOT) LLVM_CFLAGS += --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH) @@ -23,13 +26,13 @@ DP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objdump CP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objcopy CFLAGS += -O3 -mcmodel=medany -fno-exceptions -nostartfiles -nostdlib -fdata-sections -ffunction-sections -CFLAGS += -I$(VORTEX_KN_PATH)/include -I$(ROOT_DIR)/hw +CFLAGS += -I$(VORTEX_HOME)/kernel/include -I$(ROOT_DIR)/hw CFLAGS += -DXLEN_$(XLEN) -DNDEBUG LIBC_LIB += -L$(LIBC_VORTEX)/lib -lm -lc LIBC_LIB += $(LIBCRT_VORTEX)/lib/baremetal/libclang_rt.builtins-riscv$(XLEN).a -LDFLAGS += -Wl,-Bstatic,--gc-sections,-T,$(VORTEX_KN_PATH)/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=0x80000000 $(ROOT_DIR)/kernel/libvortex.a $(LIBC_LIB) +LDFLAGS += -Wl,-Bstatic,--gc-sections,-T,$(VORTEX_HOME)/kernel/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=$(STARTUP_ADDR) $(VORTEX_KN_PATH)/libvortex.a $(LIBC_LIB) all: $(PROJECT).elf $(PROJECT).bin $(PROJECT).dump diff --git a/tests/opencl/bfs/CLHelper.h b/tests/opencl/bfs/CLHelper.h index d485cbc36..e4106845b 100755 --- a/tests/opencl/bfs/CLHelper.h +++ b/tests/opencl/bfs/CLHelper.h @@ -431,7 +431,7 @@ void _clRelease() { } //-------------------------------------------------------- //--cambine:create buffer and then copy data from host to device -cl_mem _clCreateAndCpyMem(int size, void *h_mem_source) throw(string) { +cl_mem _clCreateAndCpyMem(int size, void *h_mem_source) { cl_mem d_mem; d_mem = clCreateBuffer(oclHandles.context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, size, @@ -445,7 +445,7 @@ cl_mem _clCreateAndCpyMem(int size, void *h_mem_source) throw(string) { //------------------------------------------------------- //--cambine: create read only buffer for devices //--date: 17/01/2011 -cl_mem _clMallocRW(int size, void *h_mem_ptr) throw(string) { +cl_mem _clMallocRW(int size, void *h_mem_ptr) { cl_mem d_mem; d_mem = clCreateBuffer(oclHandles.context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, size, @@ -459,7 +459,7 @@ cl_mem _clMallocRW(int size, void *h_mem_ptr) throw(string) { //------------------------------------------------------- //--cambine: create read and write buffer for devices //--date: 17/01/2011 -cl_mem _clMalloc(int size, void *h_mem_ptr) throw(string) { +cl_mem _clMalloc(int size, void *h_mem_ptr) { cl_mem d_mem; d_mem = clCreateBuffer(oclHandles.context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, size, @@ -474,7 +474,7 @@ cl_mem _clMalloc(int size, void *h_mem_ptr) throw(string) { //------------------------------------------------------- //--cambine: transfer data from host to device //--date: 17/01/2011 -void _clMemcpyH2D(cl_mem d_mem, int size, const void *h_mem_ptr) throw(string) { +void _clMemcpyH2D(cl_mem d_mem, int size, const void *h_mem_ptr) { oclHandles.cl_status = clEnqueueWriteBuffer( oclHandles.queue, d_mem, CL_TRUE, 0, size, h_mem_ptr, 0, NULL, NULL); #ifdef ERRMSG @@ -485,7 +485,7 @@ void _clMemcpyH2D(cl_mem d_mem, int size, const void *h_mem_ptr) throw(string) { //-------------------------------------------------------- //--cambine:create buffer and then copy data from host to device with pinned // memory -cl_mem _clCreateAndCpyPinnedMem(int size, float *h_mem_source) throw(string) { +cl_mem _clCreateAndCpyPinnedMem(int size, float *h_mem_source) { cl_mem d_mem, d_mem_pinned; float *h_mem_pinned = NULL; d_mem_pinned = clCreateBuffer(oclHandles.context, @@ -528,7 +528,7 @@ cl_mem _clCreateAndCpyPinnedMem(int size, float *h_mem_source) throw(string) { //-------------------------------------------------------- //--cambine:create write only buffer on device -cl_mem _clMallocWO(int size) throw(string) { +cl_mem _clMallocWO(int size) { cl_mem d_mem; d_mem = clCreateBuffer(oclHandles.context, CL_MEM_WRITE_ONLY, size, 0, &oclHandles.cl_status); @@ -541,7 +541,7 @@ cl_mem _clMallocWO(int size) throw(string) { //-------------------------------------------------------- // transfer data from device to host -void _clMemcpyD2H(cl_mem d_mem, int size, void *h_mem) throw(string) { +void _clMemcpyD2H(cl_mem d_mem, int size, void *h_mem) { oclHandles.cl_status = clEnqueueReadBuffer(oclHandles.queue, d_mem, CL_TRUE, 0, size, h_mem, 0, 0, 0); #ifdef ERRMSG @@ -580,7 +580,7 @@ void _clMemcpyD2H(cl_mem d_mem, int size, void *h_mem) throw(string) { //-------------------------------------------------------- // set kernel arguments void _clSetArgs(int kernel_id, int arg_idx, void *d_mem, - int size = 0) throw(string) { + int size = 0) { if (!size) { oclHandles.cl_status = clSetKernelArg(oclHandles.kernel[kernel_id], arg_idx, sizeof(d_mem), &d_mem); @@ -657,7 +657,7 @@ void _clSetArgs(int kernel_id, int arg_idx, void *d_mem, #endif } } -void _clFinish() throw(string) { +void _clFinish() { oclHandles.cl_status = clFinish(oclHandles.queue); #ifdef ERRMSG oclHandles.error_str = "excpetion in _clFinish"; @@ -683,7 +683,7 @@ void _clFinish() throw(string) { //-------------------------------------------------------- //--cambine:enqueue kernel void _clInvokeKernel(int kernel_id, int work_items, - int work_group_size) throw(string) { + int work_group_size) { cl_uint work_dim = WORK_DIM; //cl_event e[1]; if (work_items % work_group_size != 0) // process situations that work_items @@ -755,7 +755,7 @@ void _clInvokeKernel(int kernel_id, int work_items, // #endif } void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x, - int group_y) throw(string) { + int group_y) { cl_uint work_dim = WORK_DIM; size_t local_work_size[] = {group_x, group_y}; size_t global_work_size[] = {range_x, range_y}; @@ -832,7 +832,7 @@ void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x, //-------------------------------------------------------- // release OpenCL objects -void _clFree(cl_mem ob) throw(string) { +void _clFree(cl_mem ob) { if (ob != NULL) oclHandles.cl_status = clReleaseMemObject(ob); #ifdef ERRMSG diff --git a/tests/opencl/bfs/main.cc b/tests/opencl/bfs/main.cc index 537950603..cd55f5b3f 100755 --- a/tests/opencl/bfs/main.cc +++ b/tests/opencl/bfs/main.cc @@ -72,7 +72,7 @@ void run_bfs_cpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, int *h_graph_edges, char *h_graph_mask, char *h_updating_graph_mask, char *h_graph_visited, - int *h_cost) throw(std::string) { + int *h_cost) { // int number_elements = height*width; char h_over; diff --git a/tests/opencl/common.mk b/tests/opencl/common.mk index 2e287a944..bb7b1e0d6 100644 --- a/tests/opencl/common.mk +++ b/tests/opencl/common.mk @@ -5,16 +5,18 @@ TARGET ?= opaesim XRT_SYN_DIR ?= $(VORTEX_HOME)/hw/syn/xilinx/xrt XRT_DEVICE_INDEX ?= 0 +STARTUP_ADDR ?= 0x80000000 ifeq ($(XLEN),64) VX_CFLAGS += -march=rv64imafd -mabi=lp64d -STARTUP_ADDR ?= 0x180000000 POCL_CC_FLAGS += POCL_VORTEX_XLEN=64 else VX_CFLAGS += -march=rv32imaf -mabi=ilp32f -STARTUP_ADDR ?= 0x80000000 POCL_CC_FLAGS += POCL_VORTEX_XLEN=32 endif +VORTEX_RT_PATH ?= $(ROOT_DIR)/runtime +VORTEX_KN_PATH ?= $(ROOT_DIR)/kernel + POCL_PATH ?= $(TOOLDIR)/pocl LLVM_POCL ?= $(TOOLDIR)/llvm-vortex @@ -26,18 +28,18 @@ VX_LIBS += $(LIBCRT_VORTEX)/lib/baremetal/libclang_rt.builtins-riscv$(XLEN).a VX_CFLAGS += -O3 -mcmodel=medany --sysroot=$(RISCV_SYSROOT) --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH) VX_CFLAGS += -fno-rtti -fno-exceptions -nostartfiles -nostdlib -fdata-sections -ffunction-sections -VX_CFLAGS += -I$(ROOT_DIR)/hw -I$(VORTEX_KN_PATH)/include -DXLEN_$(XLEN) -DNDEBUG +VX_CFLAGS += -I$(ROOT_DIR)/hw -I$(VORTEX_HOME)/kernel/include -DXLEN_$(XLEN) -DNDEBUG VX_CFLAGS += -Xclang -target-feature -Xclang +vortex VX_CFLAGS += -Xclang -target-feature -Xclang +zicond VX_CFLAGS += -mllvm -disable-loop-idiom-all #VX_CFLAGS += -mllvm -vortex-branch-divergence=0 #VX_CFLAGS += -mllvm -print-after-all -VX_LDFLAGS += -Wl,-Bstatic,--gc-sections,-T$(VORTEX_KN_PATH)/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=$(STARTUP_ADDR) $(ROOT_DIR)/kernel/libvortex.a $(VX_LIBS) +VX_LDFLAGS += -Wl,-Bstatic,--gc-sections,-T$(VORTEX_HOME)/kernel/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=$(STARTUP_ADDR) $(VORTEX_KN_PATH)/libvortex.a $(VX_LIBS) VX_BINTOOL += OBJCOPY=$(LLVM_VORTEX)/bin/llvm-objcopy $(VORTEX_HOME)/kernel/scripts/vxbin.py -CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors +CXXFLAGS += -std=c++17 -Wall -Wextra -Wfatal-errors CXXFLAGS += -Wno-deprecated-declarations -Wno-unused-parameter -Wno-narrowing CXXFLAGS += -pthread CXXFLAGS += -I$(POCL_PATH)/include @@ -80,7 +82,7 @@ all: $(PROJECT) $(CC) $(CXXFLAGS) -c $< -o $@ $(PROJECT): $(OBJS) - $(CXX) $(CXXFLAGS) $(OBJS) $(LDFLAGS) -L$(ROOT_DIR)/runtime -lvortex -L$(POCL_PATH)/lib -lOpenCL -o $@ + $(CXX) $(CXXFLAGS) $(OBJS) $(LDFLAGS) -L$(VORTEX_RT_PATH) -lvortex -L$(POCL_PATH)/lib -lOpenCL -o $@ $(PROJECT).host: $(OBJS) $(CXX) $(CXXFLAGS) $(OBJS) $(LDFLAGS) -lOpenCL -o $@ @@ -89,19 +91,21 @@ run-gpu: $(PROJECT).host $(KERNEL_SRCS) ./$(PROJECT).host $(OPTS) run-simx: $(PROJECT) $(KERNEL_SRCS) - LD_LIBRARY_PATH=$(POCL_PATH)/lib:$(ROOT_DIR)/runtime:$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=simx ./$(PROJECT) $(OPTS) + LD_LIBRARY_PATH=$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=simx ./$(PROJECT) $(OPTS) run-rtlsim: $(PROJECT) $(KERNEL_SRCS) - LD_LIBRARY_PATH=$(POCL_PATH)/lib:$(ROOT_DIR)/runtime:$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=rtlsim ./$(PROJECT) $(OPTS) + LD_LIBRARY_PATH=$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=rtlsim ./$(PROJECT) $(OPTS) run-opae: $(PROJECT) $(KERNEL_SRCS) - SCOPE_JSON_PATH=$(ROOT_DIR)/runtime/scope.json OPAE_DRV_PATHS=$(OPAE_DRV_PATHS) LD_LIBRARY_PATH=$(POCL_PATH)/lib:$(ROOT_DIR)/runtime:$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=opae ./$(PROJECT) $(OPTS) + SCOPE_JSON_PATH=$(VORTEX_RT_PATH)/scope.json OPAE_DRV_PATHS=$(OPAE_DRV_PATHS) LD_LIBRARY_PATH=$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=opae ./$(PROJECT) $(OPTS) run-xrt: $(PROJECT) $(KERNEL_SRCS) ifeq ($(TARGET), hw) - XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(ROOT_DIR)/runtime:$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + SCOPE_JSON_PATH=$(FPGA_BIN_DIR)/scope.json XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) +else ifeq ($(TARGET), hw_emu) + SCOPE_JSON_PATH=$(FPGA_BIN_DIR)/scope.json XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) else - XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(ROOT_DIR)/runtime:$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + SCOPE_JSON_PATH=$(VORTEX_RT_PATH)/scope.json LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_PATH)/lib:$(VORTEX_RT_PATH):$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) $(POCL_CC_FLAGS) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) endif .depend: $(SRCS) diff --git a/tests/opencl/conv3/main.cc b/tests/opencl/conv3/main.cc index cda8e74ac..1220dabdb 100644 --- a/tests/opencl/conv3/main.cc +++ b/tests/opencl/conv3/main.cc @@ -116,16 +116,15 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:h?")) != -1) { + while ((c = getopt(argc, argv, "n:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/opencl/oclprintf/main.cc b/tests/opencl/oclprintf/main.cc index ef82a33e5..c23e6dec0 100644 --- a/tests/opencl/oclprintf/main.cc +++ b/tests/opencl/oclprintf/main.cc @@ -81,16 +81,15 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:h?")) != -1) { + while ((c = getopt(argc, argv, "n:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/opencl/psort/main.cc b/tests/opencl/psort/main.cc index e0bd49b8e..8ecfdc523 100644 --- a/tests/opencl/psort/main.cc +++ b/tests/opencl/psort/main.cc @@ -87,7 +87,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "fn:h?")) != -1) { + while ((c = getopt(argc, argv, "fn:h")) != -1) { switch (c) { case 'f': float_enable = 1; @@ -96,10 +96,9 @@ static void parse_args(int argc, char **argv) { size = atoi(optarg); break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/opencl/psum/main.cc b/tests/opencl/psum/main.cc index 749d40619..5606de8c5 100644 --- a/tests/opencl/psum/main.cc +++ b/tests/opencl/psum/main.cc @@ -104,7 +104,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:l:h?")) != -1) { + while ((c = getopt(argc, argv, "n:l:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); @@ -113,10 +113,9 @@ static void parse_args(int argc, char **argv) { local_size = atoi(optarg); break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/opencl/saxpy/main.cc b/tests/opencl/saxpy/main.cc index 2d896e6a9..2397c720e 100644 --- a/tests/opencl/saxpy/main.cc +++ b/tests/opencl/saxpy/main.cc @@ -126,13 +126,12 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:h?")) != -1) { + while ((c = getopt(argc, argv, "n:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); break; - case 'h': - case '?': { + case 'h':{ show_usage(); exit(0); } break; diff --git a/tests/opencl/sfilter/main.cc b/tests/opencl/sfilter/main.cc index b9d2356b2..97cfb689e 100644 --- a/tests/opencl/sfilter/main.cc +++ b/tests/opencl/sfilter/main.cc @@ -124,16 +124,15 @@ int size = 16; static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:h?")) != -1) { + while ((c = getopt(argc, argv, "n:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/opencl/sgemm/main.cc b/tests/opencl/sgemm/main.cc index 41c1bc5e8..31f99d2e4 100644 --- a/tests/opencl/sgemm/main.cc +++ b/tests/opencl/sgemm/main.cc @@ -147,16 +147,15 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:h?")) != -1) { + while ((c = getopt(argc, argv, "n:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/opencl/sgemm2/main.cc b/tests/opencl/sgemm2/main.cc index 595a9fc51..c4ca06fdb 100644 --- a/tests/opencl/sgemm2/main.cc +++ b/tests/opencl/sgemm2/main.cc @@ -147,16 +147,15 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:h?")) != -1) { + while ((c = getopt(argc, argv, "n:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/opencl/sgemm3/main.cc b/tests/opencl/sgemm3/main.cc index 570cee9ae..24dd39752 100644 --- a/tests/opencl/sgemm3/main.cc +++ b/tests/opencl/sgemm3/main.cc @@ -148,7 +148,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:t:h?")) != -1) { + while ((c = getopt(argc, argv, "n:t:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); @@ -157,10 +157,9 @@ static void parse_args(int argc, char **argv) { tile_size = atoi(optarg); break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/opencl/vecadd/main.cc b/tests/opencl/vecadd/main.cc index e1316ad3f..190d29450 100644 --- a/tests/opencl/vecadd/main.cc +++ b/tests/opencl/vecadd/main.cc @@ -141,16 +141,15 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:h?")) != -1) { + while ((c = getopt(argc, argv, "n:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/basic/Makefile b/tests/regression/basic/Makefile index a8e86cc17..5940ca65c 100644 --- a/tests/regression/basic/Makefile +++ b/tests/regression/basic/Makefile @@ -13,7 +13,7 @@ OPTS ?= -n256 include ../common.mk -VX_LDFLAGS = -Wl,-Bstatic,--gc-sections,-T,$(VORTEX_KN_PATH)/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=$(STARTUP_ADDR) +VX_LDFLAGS = -Wl,-Bstatic,--gc-sections,-T,$(VORTEX_HOME)/kernel/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=$(STARTUP_ADDR) VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-gcc VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-g++ diff --git a/tests/regression/basic/main.cpp b/tests/regression/basic/main.cpp index 73f3e29a2..575333c4b 100755 --- a/tests/regression/basic/main.cpp +++ b/tests/regression/basic/main.cpp @@ -38,7 +38,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:t:k:h?")) != -1) { + while ((c = getopt(argc, argv, "n:t:k:h")) != -1) { switch (c) { case 'n': count = atoi(optarg); @@ -50,10 +50,9 @@ static void parse_args(int argc, char **argv) { kernel_file = optarg; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/common.mk b/tests/regression/common.mk index 12b45e848..2cba5ef9a 100644 --- a/tests/regression/common.mk +++ b/tests/regression/common.mk @@ -5,12 +5,14 @@ TARGET ?= opaesim XRT_SYN_DIR ?= $(VORTEX_HOME)/hw/syn/xilinx/xrt XRT_DEVICE_INDEX ?= 0 +VORTEX_RT_PATH ?= $(ROOT_DIR)/runtime +VORTEX_KN_PATH ?= $(ROOT_DIR)/kernel + +STARTUP_ADDR ?= 0x80000000 ifeq ($(XLEN),64) VX_CFLAGS += -march=rv64imafd -mabi=lp64d -STARTUP_ADDR ?= 0x180000000 else VX_CFLAGS += -march=rv32imaf -mabi=ilp32f -STARTUP_ADDR ?= 0x80000000 endif LLVM_CFLAGS += --sysroot=$(RISCV_SYSROOT) @@ -36,7 +38,7 @@ VX_CP = $(LLVM_VORTEX)/bin/llvm-objcopy #VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objcopy VX_CFLAGS += -O3 -mcmodel=medany -fno-rtti -fno-exceptions -nostartfiles -nostdlib -fdata-sections -ffunction-sections -VX_CFLAGS += -I$(VORTEX_KN_PATH)/include -I$(ROOT_DIR)/hw +VX_CFLAGS += -I$(VORTEX_HOME)/kernel/include -I$(ROOT_DIR)/hw VX_CFLAGS += -DXLEN_$(XLEN) VX_CFLAGS += -DNDEBUG @@ -45,12 +47,12 @@ VX_LIBS += -L$(LIBC_VORTEX)/lib -lm -lc VX_LIBS += $(LIBCRT_VORTEX)/lib/baremetal/libclang_rt.builtins-riscv$(XLEN).a #VX_LIBS += -lgcc -VX_LDFLAGS += -Wl,-Bstatic,--gc-sections,-T,$(VORTEX_KN_PATH)/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=$(STARTUP_ADDR) $(ROOT_DIR)/kernel/libvortex.a $(VX_LIBS) +VX_LDFLAGS += -Wl,-Bstatic,--gc-sections,-T,$(VORTEX_HOME)/kernel/scripts/link$(XLEN).ld,--defsym=STARTUP_ADDR=$(STARTUP_ADDR) $(VORTEX_KN_PATH)/libvortex.a $(VX_LIBS) -CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors -CXXFLAGS += -I$(VORTEX_RT_PATH)/include -I$(ROOT_DIR)/hw +CXXFLAGS += -std=c++17 -Wall -Wextra -pedantic -Wfatal-errors +CXXFLAGS += -I$(VORTEX_HOME)/runtime/include -I$(ROOT_DIR)/hw -LDFLAGS += -L$(ROOT_DIR)/runtime -lvortex +LDFLAGS += -L$(VORTEX_RT_PATH) -lvortex # Debugging ifdef DEBUG @@ -86,19 +88,21 @@ $(PROJECT): $(SRCS) $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ run-simx: $(PROJECT) kernel.vxbin - LD_LIBRARY_PATH=$(ROOT_DIR)/runtime:$(LD_LIBRARY_PATH) VORTEX_DRIVER=simx ./$(PROJECT) $(OPTS) + LD_LIBRARY_PATH=$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=simx ./$(PROJECT) $(OPTS) run-rtlsim: $(PROJECT) kernel.vxbin - LD_LIBRARY_PATH=$(ROOT_DIR)/runtime:$(LD_LIBRARY_PATH) VORTEX_DRIVER=rtlsim ./$(PROJECT) $(OPTS) + LD_LIBRARY_PATH=$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=rtlsim ./$(PROJECT) $(OPTS) run-opae: $(PROJECT) kernel.vxbin - SCOPE_JSON_PATH=$(ROOT_DIR)/runtime/scope.json OPAE_DRV_PATHS=$(OPAE_DRV_PATHS) LD_LIBRARY_PATH=$(ROOT_DIR)/runtime:$(LD_LIBRARY_PATH) VORTEX_DRIVER=opae ./$(PROJECT) $(OPTS) + SCOPE_JSON_PATH=$(VORTEX_RT_PATH)/scope.json OPAE_DRV_PATHS=$(OPAE_DRV_PATHS) LD_LIBRARY_PATH=$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=opae ./$(PROJECT) $(OPTS) run-xrt: $(PROJECT) kernel.vxbin ifeq ($(TARGET), hw) - XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(ROOT_DIR)/runtime:$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + SCOPE_JSON_PATH=$(FPGA_BIN_DIR)/scope.json XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) +else ifeq ($(TARGET), hw_emu) + SCOPE_JSON_PATH=$(FPGA_BIN_DIR)/scope.json XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(VORTEX_RT_PATH)/xrt/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) else - XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(ROOT_DIR)/runtime:$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) + SCOPE_JSON_PATH=$(VORTEX_RT_PATH)/scope.json LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=xrt ./$(PROJECT) $(OPTS) endif .depend: $(SRCS) diff --git a/tests/regression/conv3x/main.cpp b/tests/regression/conv3x/main.cpp index d5f8b4e81..3a0e192fb 100644 --- a/tests/regression/conv3x/main.cpp +++ b/tests/regression/conv3x/main.cpp @@ -109,7 +109,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:k:lh?")) != -1) { + while ((c = getopt(argc, argv, "n:k:lh")) != -1) { switch (c) { case 'n': size = atoi(optarg); @@ -121,10 +121,9 @@ static void parse_args(int argc, char **argv) { kernel_file = optarg; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/demo/common.h b/tests/regression/demo/common.h index 98b8ff587..be200ec04 100644 --- a/tests/regression/demo/common.h +++ b/tests/regression/demo/common.h @@ -2,7 +2,7 @@ #define _COMMON_H_ #ifndef TYPE -#define TYPE float +#define TYPE int #endif typedef struct { @@ -10,7 +10,7 @@ typedef struct { uint32_t task_size; uint64_t src0_addr; uint64_t src1_addr; - uint64_t dst_addr; + uint64_t dst_addr; } kernel_arg_t; #endif diff --git a/tests/regression/demo/main.cpp b/tests/regression/demo/main.cpp index 4947cb64f..3fdd03601 100644 --- a/tests/regression/demo/main.cpp +++ b/tests/regression/demo/main.cpp @@ -87,7 +87,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { + while ((c = getopt(argc, argv, "n:k:h")) != -1) { switch (c) { case 'n': count = atoi(optarg); @@ -95,8 +95,7 @@ static void parse_args(int argc, char **argv) { case 'k': kernel_file = optarg; break; - case 'h': - case '?': { + case 'h':{ show_usage(); exit(0); } break; diff --git a/tests/regression/diverge/main.cpp b/tests/regression/diverge/main.cpp index fc4384610..d858b1729 100644 --- a/tests/regression/diverge/main.cpp +++ b/tests/regression/diverge/main.cpp @@ -35,7 +35,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { + while ((c = getopt(argc, argv, "n:k:h")) != -1) { switch (c) { case 'n': count = atoi(optarg); @@ -44,10 +44,9 @@ static void parse_args(int argc, char **argv) { kernel_file = optarg; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/dogfood/main.cpp b/tests/regression/dogfood/main.cpp index 1fcf9d511..f2922c632 100644 --- a/tests/regression/dogfood/main.cpp +++ b/tests/regression/dogfood/main.cpp @@ -12,7 +12,7 @@ TestSuite* testSuite = nullptr; const char* kernel_file = "kernel.vxbin"; -int count = 1; +int count = 64; std::unordered_set selected; std::unordered_set excluded; int testid_s = 0; @@ -35,7 +35,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:t:x:s:e:k:ch?")) != -1) { + while ((c = getopt(argc, argv, "n:t:x:s:e:k:ch")) != -1) { switch (c) { case 'n': count = atoi(optarg); @@ -59,10 +59,9 @@ static void parse_args(int argc, char **argv) { stop_on_error = false; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/fence/main.cpp b/tests/regression/fence/main.cpp index ead4ad551..716036b11 100644 --- a/tests/regression/fence/main.cpp +++ b/tests/regression/fence/main.cpp @@ -35,7 +35,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { + while ((c = getopt(argc, argv, "n:k:h")) != -1) { switch (c) { case 'n': count = atoi(optarg); @@ -44,10 +44,9 @@ static void parse_args(int argc, char **argv) { kernel_file = optarg; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/io_addr/main.cpp b/tests/regression/io_addr/main.cpp index 602064ffe..78d7cf56f 100644 --- a/tests/regression/io_addr/main.cpp +++ b/tests/regression/io_addr/main.cpp @@ -42,7 +42,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { + while ((c = getopt(argc, argv, "n:k:h")) != -1) { switch (c) { case 'n': count = atoi(optarg); @@ -51,10 +51,9 @@ static void parse_args(int argc, char **argv) { kernel_file = optarg; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/matmul/Makefile b/tests/regression/matmul/Makefile new file mode 100644 index 000000000..7f1c48523 --- /dev/null +++ b/tests/regression/matmul/Makefile @@ -0,0 +1,14 @@ +ROOT_DIR := $(realpath ../../..) +include $(ROOT_DIR)/config.mk + +PROJECT := matmul + +SRC_DIR := $(VORTEX_HOME)/tests/regression/$(PROJECT) + +SRCS := $(SRC_DIR)/main.cpp + +VX_SRCS := $(SRC_DIR)/kernel.cpp + +OPTS ?= -n128 -d1 + +include ../common.mk diff --git a/tests/regression/matmul/common.h b/tests/regression/matmul/common.h new file mode 100644 index 000000000..a9aa5de6c --- /dev/null +++ b/tests/regression/matmul/common.h @@ -0,0 +1,17 @@ +#ifndef _COMMON_H_ +#define _COMMON_H_ + +typedef struct { + uint32_t num_tasks; + uint32_t num_warps; + uint32_t num_threads; + uint32_t TC_per_warp; + uint32_t matrix_size; + uint32_t data_size; + uint64_t tc_size; + uint64_t src0_addr; + uint64_t src1_addr; + uint64_t dst_addr; +} kernel_arg_t; + +#endif \ No newline at end of file diff --git a/tests/regression/matmul/kernel.cpp b/tests/regression/matmul/kernel.cpp new file mode 100644 index 000000000..5fa976df4 --- /dev/null +++ b/tests/regression/matmul/kernel.cpp @@ -0,0 +1,127 @@ +#include +#include +#include +#include "common.h" + +void kernel_body(kernel_arg_t* __UNIFORM__ arg) { + uint32_t task_id = blockIdx.x; + int32_t* src0_ptr = (int32_t*)arg->src0_addr; + int32_t* src1_ptr = (int32_t*)arg->src1_addr; + int32_t* dst_ptr = (int32_t*)arg->dst_addr; + uint64_t a_addr = reinterpret_cast(src0_ptr); + uint64_t b_addr = reinterpret_cast(src1_ptr); + uint64_t c_addr = reinterpret_cast(dst_ptr); + + uint32_t tc_size = arg->tc_size; + uint32_t TC_per_warp = arg->TC_per_warp; + unsigned num_threads = arg->num_threads; + int num_warps = arg->num_warps; + uint32_t matrix_size = arg->matrix_size; + + int n_tiles = matrix_size/tc_size; + int num_output_tiles = (matrix_size*matrix_size)/(tc_size*tc_size); + + int num_tasks = arg->num_tasks; + + //Assuming matrix size always > tensor core size + int warps_actual; + if (TC_per_warp > num_output_tiles) + warps_actual = 1; + else + warps_actual = num_output_tiles/TC_per_warp; + + int num_warps_actual = (warps_actual < num_warps)? warps_actual: num_warps; + int num_threads_per_tc = (1> num_threads/TC_per_warp)? 1: num_threads/TC_per_warp; + + int num_tasks_per_thread = (1> (num_tasks/(num_threads*num_warps_actual)))? 1: (num_tasks/(num_threads*num_warps_actual)); + int num_tasks_per_warp = (1 > num_tasks/num_warps_actual)? 1:num_tasks/num_warps_actual; + int task_id_first_warp = task_id%num_tasks_per_warp; + + //A&B + int num_data_per_op_tile = tc_size*tc_size*n_tiles; + int num_data_per_warp = num_data_per_op_tile*((1> (num_output_tiles/num_warps_actual))?1:(num_output_tiles/num_warps_actual)); + + int addr_shift; + if (((tc_size*tc_size*n_tiles)/(num_threads)) > 1) + addr_shift = (tc_size*tc_size*n_tiles)/(num_threads); + else + addr_shift = 1; + //Offset for 1st warp + int offset = ((task_id_first_warp/num_tasks_per_thread)*addr_shift) + ((task_id_first_warp%num_tasks_per_thread)*num_data_per_op_tile); + offset = offset + (num_data_per_warp*(task_id/num_tasks_per_warp)); + + //C + int num_data_per_op_tile_c = tc_size*tc_size; + int num_data_per_warp_c = num_data_per_warp/n_tiles; + + int addr_shift_c; + if (((tc_size*tc_size)/(num_threads)) > 1) + addr_shift_c = tc_size; + else + addr_shift_c = 1; + //Offset for 1st warp + int offset_c = ((task_id_first_warp/num_tasks_per_thread)*addr_shift_c) + ((task_id_first_warp%num_tasks_per_thread)*num_data_per_op_tile_c); + offset_c = offset_c + (num_data_per_warp_c*(task_id/num_tasks_per_warp)); + + int thread_limit = (num_threads < tc_size*tc_size*n_tiles*TC_per_warp)? num_threads : tc_size*tc_size*n_tiles*TC_per_warp; + int thread_limit_c = (num_threads 64 tasks => 32 tasks/warp => 8 tasks/thread + /*task0->thread0, warp0 + task1->thread0 , warp0 + task2->thread0 , warp0 + . + task7->thread0 + task8->thread1 + task9->thread1 + . + . + ------ + task32 -> thread0, warp1 + task33 -> thread1, warp1 + . + */ + + //NEW TASK DISTRIBUTION // For 8x8 matrix, 2x2 tc_size, 1 tc_num, 4threads, 2warps => 64 tasks => 32 tasks/warp => 8 tasks/thread + /*task0->thread0, warp0 + task1->thread1 , warp0 + task2->thread2 , warp0 + task3->thread3 ,... + task4->thread0 + task5->thread1 + . + . + ------ + task32 -> thread0, warp1 + task33 -> thread1, warp1 + . + .*/ + + //TODO :: change this for new task->thread distribution + if (((task_id%num_tasks_per_warp)/num_tasks_per_thread) < thread_limit) + { + uint64_t a_addr_base = a_addr + offset*arg->data_size; + uint64_t b_addr_base = b_addr + offset*arg->data_size; + uint64_t c_addr_base = c_addr + offset_c*arg->data_size; + csr_write(VX_MAT_MUL_SIZE,n_tiles); + csr_write(VX_TC_NUM,TC_per_warp); + csr_write(VX_TC_SIZE,tc_size); + + vx_matrix_load (0, a_addr_base); + vx_matrix_load (1, b_addr_base); + //In case of multiple threads - sync load + vx_fence(); + + vx_matrix_mul(); //Assuming padding to ensure matrix size is a multiple of tc_size + vx_fence(); + if (((task_id%num_tasks_per_warp)/num_tasks_per_thread) < thread_limit_c) + vx_matrix_store(c_addr_base); + //In case of multiple threads - sync store + vx_fence(); + } +} + +int main() { + kernel_arg_t* arg = (kernel_arg_t*)csr_read(VX_CSR_MSCRATCH); + return vx_spawn_threads(1, &arg->num_tasks, nullptr, (vx_kernel_func_cb)kernel_body, arg); +} diff --git a/tests/regression/matmul/main.cpp b/tests/regression/matmul/main.cpp new file mode 100644 index 000000000..9b3465c52 --- /dev/null +++ b/tests/regression/matmul/main.cpp @@ -0,0 +1,348 @@ +#include +#include +#include +#include +#include +#include +#include +#include "common.h" + +#define RT_CHECK(_expr) \ + do { \ + int _ret = _expr; \ + if (0 == _ret) \ + break; \ + printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \ + cleanup(); \ + exit(-1); \ + } while (false) + +/////////////////////////////////////////////////////////////////////////////// + +const char* kernel_file = "kernel.vxbin"; +uint32_t matrix_size = 0; + +vx_device_h device = nullptr; +vx_buffer_h A_buffer = nullptr; +vx_buffer_h B_buffer = nullptr; +vx_buffer_h C_buffer = nullptr; +vx_buffer_h krnl_buffer = nullptr; +vx_buffer_h args_buffer = nullptr; + +std::vector staging_buf; +kernel_arg_t kernel_arg = {}; + +static void show_usage() { + std::cout << "Vortex Test." << std::endl; + std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl; +} + +static void parse_args(int argc, char **argv, uint32_t &data_size) { + int c; + while ((c = getopt(argc, argv, "n:k:d:h?")) != -1) { + switch (c) { + case 'n': + matrix_size = atoi(optarg); + break; + case 'k': + kernel_file = optarg; + break; + case 'd': + data_size = atoi(optarg); + break; + case 'h': + case '?': { + show_usage(); + exit(0); + } break; + default: + show_usage(); + exit(-1); + } + } +} + +void cleanup() { + if (device) { + vx_mem_free(A_buffer); + vx_mem_free(B_buffer); + vx_mem_free(C_buffer); + vx_mem_free(krnl_buffer); + vx_mem_free(args_buffer); + vx_dev_close(device); + } +} + +template +class mainVariables +{ + public: + // Constructor + mainVariables(uint32_t bufSize, uint32_t dataSize, uint32_t matrixSize) + : buf_size(bufSize), data_size(dataSize), matrix_size(matrixSize) + { + // Resize vectors to specified sizes + src_A.resize(buf_size/data_size); + src_B.resize(buf_size/data_size); + refs.resize(buf_size/data_size); + } + + void init_inputs () + { + std::cout << "inside init" << std::endl; + for (uint32_t i = 0; i < matrix_size*matrix_size; ++i) + { + auto a = static_cast(std::rand()) / RAND_MAX; + auto b = static_cast(std::rand()) / RAND_MAX; + src_A[i] = static_cast(a * matrix_size); + src_B[i] = static_cast(b * matrix_size); + } + } + + void matmul_cpu() + { + for (uint32_t row = 0; row < matrix_size; ++row) + { + for (uint32_t col = 0; col < matrix_size; ++col) + { + TYPE sum(0); + for (uint32_t e = 0; e < matrix_size; ++e) { + sum += src_A[row * matrix_size + e] * src_B[e * matrix_size + col]; + } + refs[row * matrix_size + col] = sum; + } + } + } + + //Public variables + std::vector src_A; + std::vector src_B; + std::vector refs; + + std::vector A_mat; + std::vector B_mat; + + private: + uint32_t buf_size; + uint32_t data_size; + uint32_t matrix_size; +}; + + + +int main(int argc, char *argv[]) { + // parse command arguments + uint32_t data_size = 0; + parse_args(argc, argv, data_size); + if (matrix_size == 0) { + matrix_size = 2; + } + + // open device connection + std::cout << "open device connection" << std::endl; + RT_CHECK(vx_dev_open(&device)); + + uint64_t num_cores, num_warps, num_threads; + uint64_t tc_size, TC_per_warp; + + RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_CORES, &num_cores)); + RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_WARPS, &num_warps)); + RT_CHECK(vx_dev_caps(device, VX_CAPS_NUM_THREADS, &num_threads)); + + //Add assert/knob + RT_CHECK(vx_dev_caps(device, VX_CAPS_TC_SIZE, &tc_size)); + RT_CHECK(vx_dev_caps(device, VX_CAPS_TC_NUM, &TC_per_warp)); + + std::cout << "Debug :: tc_size = " << tc_size << std::endl; + std::cout << "Debug :: tc_num = " << TC_per_warp << std::endl; + + int threads_per_tc; + //TODO - can be changed + //Number of output tiles * number of threads + if (TC_per_warp > num_threads) + threads_per_tc = 1; + else + threads_per_tc = num_threads/TC_per_warp; + + uint32_t num_tasks = ((matrix_size*matrix_size)/(tc_size*tc_size))*threads_per_tc; + + //size of each operand + uint32_t buf_size = ((matrix_size*matrix_size)/(tc_size*tc_size))*(matrix_size/(tc_size))*(tc_size*tc_size)*data_size; + + //256 + std::cout << "Debug :: buf_size: " << buf_size << " bytes" << std::endl; + + // allocate device memory + std::cout << "allocate device memory" << std::endl; + + RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_READ, &A_buffer)); + RT_CHECK(vx_mem_address(A_buffer, &kernel_arg.src0_addr)); + RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_READ, &B_buffer)); + RT_CHECK(vx_mem_address(B_buffer, &kernel_arg.src1_addr)); + RT_CHECK(vx_mem_alloc(device, buf_size, VX_MEM_WRITE, &C_buffer)); + RT_CHECK(vx_mem_address(C_buffer, &kernel_arg.dst_addr)); + + std::cout << "A_addr=0x" << std::hex << kernel_arg.src0_addr << std::endl; + std::cout << "B_addr=0x" << std::hex << kernel_arg.src1_addr << std::endl; + std::cout << "C_addr=0x" << std::hex << kernel_arg.dst_addr << std::endl; + + mainVariables variables (buf_size, data_size, matrix_size); + variables.init_inputs(); + + ////////////////////////////////////////////////// + // generate source data + ////////////////////////////////////////////////// + variables.matmul_cpu(); + + uint32_t tc_size_f = tc_size*tc_size; + uint32_t n_tiles = matrix_size/tc_size; + + variables.A_mat.resize(buf_size); + variables.B_mat.resize(buf_size); + + //Demand matrix creation for A / traverse through the rows + for(uint32_t k=0; k(time_end - time_start).count(); + printf("Elapsed time: %lg ms\n", elapsed); + + // download destination buffer + std::cout << "download destination buffer" << std::endl; + RT_CHECK(vx_copy_from_dev((int8_t*)variables.B_mat.data(), C_buffer, 0, buf_size)); + + // verify result (TODO : needs to be fixed for for functional correctness) + /* + std::cout << "verify result" << std::endl; + { + int errors = 0; + auto buf_ptr = (int8_t*)staging_buf.data(); + uint64_t tc_size = kernel_arg.tc_size; + std::cout << "tc_size = " << tc_size << std::endl; + int Result[matrix_size*matrix_size]; + int n_tiles = (matrix_size/tc_size); + int tc_size_f = tc_size*tc_size; + + //converting buf ptr (tile by tile) to CPU style linear (row by row) + for(int k = 0; k < matrix_size/tc_size; k+= 1) + { + for(int j = 0; j < matrix_size; j+= tc_size) + { + for(int i =0; i < tc_size*tc_size; i++) + { + Result[ tc_size*matrix_size*k +j+ (i/tc_size)*matrix_size +i%(tc_size)] = buf_ptr[matrix_size*tc_size*k+tc_size*j+i]; + } + } + } + + for (uint32_t i = 0; i < matrix_size*matrix_size; ++i) { + //int ref = i + i; + int cur = Result[i]; + if (cur != refs[i]) { + ++errors; + } + } + if (errors != 0) { + std::cout << "Found " << std::dec << errors << " errors!" << std::endl; + std::cout << "FAILED!" << std::endl; + return 1; + } + else + { + std::cout << "CONDITIONALLY PASSED!" << std::endl; + } + } + */ + + // cleanup + std::cout << "cleanup" << std::endl; + cleanup(); + + std::cout << "PASSED!" << std::endl; + + return 0; +} \ No newline at end of file diff --git a/tests/regression/matmul/matmul_regression.sh b/tests/regression/matmul/matmul_regression.sh new file mode 100755 index 000000000..8d35fcfd3 --- /dev/null +++ b/tests/regression/matmul/matmul_regression.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +# README: +# This script launches a sweep of TC_SIZE, TC_NUM and MATRIX SIZES +# default values of NUM_WARPS=32, NUM_THREADS=32, NUM_CORES=4, DATA_SIZE=1 +# Edit matrix_sizes, tcsizes & tcnums variables to vary the sweep limits + +# Define arrays for tc_size,tc_num and matrix sizes +matrix_sizes=(16 32 64 128 256 512) +tcsizes=(8 16 32) +tcnums=(4 8 16 32) + +cd ../../../build/ + +# Loop through each combination of above configs +for size in "${matrix_sizes[@]}"; do + for tcsize in "${tcsizes[@]}"; do + for tcnum in "${tcnums[@]}"; do + mkdir -p sim_final/mat${size} + log_name="sim_final/mat${size}/tcsize${tcsize}_tcnum${tcnum}_32w32t" + cmd="CONFIGS=\"-DTC_NUM=${tcnum} -DTC_SIZE=${tcsize}\" ./ci/blackbox.sh --cores=4 --app=matmul --driver=simx --threads=32 --warps=32 --args=\"-n${size} -d1\" --rebuild=1 --perf=1 > ${log_name} 2>&1" + echo $cmd + eval $cmd + done + done +done diff --git a/tests/regression/mstress/main.cpp b/tests/regression/mstress/main.cpp index 7bf0dbe0e..5a1f0d300 100644 --- a/tests/regression/mstress/main.cpp +++ b/tests/regression/mstress/main.cpp @@ -83,7 +83,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { + while ((c = getopt(argc, argv, "n:k:h")) != -1) { switch (c) { case 'n': count = atoi(optarg); @@ -92,10 +92,9 @@ static void parse_args(int argc, char **argv) { kernel_file = optarg; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/printf/main.cpp b/tests/regression/printf/main.cpp index 18d778c4b..eefa32592 100644 --- a/tests/regression/printf/main.cpp +++ b/tests/regression/printf/main.cpp @@ -33,7 +33,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { + while ((c = getopt(argc, argv, "n:k:h")) != -1) { switch (c) { case 'n': count = atoi(optarg); @@ -42,10 +42,9 @@ static void parse_args(int argc, char **argv) { kernel_file = optarg; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/sgemm2x/main.cpp b/tests/regression/sgemm2x/main.cpp index 3da359ee5..f10f8fcd1 100644 --- a/tests/regression/sgemm2x/main.cpp +++ b/tests/regression/sgemm2x/main.cpp @@ -103,7 +103,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:t:k:h?")) != -1) { + while ((c = getopt(argc, argv, "n:t:k:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); @@ -115,10 +115,9 @@ static void parse_args(int argc, char **argv) { kernel_file = optarg; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/sgemmx/main.cpp b/tests/regression/sgemmx/main.cpp index 4c2b18c30..b31af9b04 100644 --- a/tests/regression/sgemmx/main.cpp +++ b/tests/regression/sgemmx/main.cpp @@ -99,7 +99,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { + while ((c = getopt(argc, argv, "n:k:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); @@ -108,10 +108,9 @@ static void parse_args(int argc, char **argv) { kernel_file = optarg; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/sort/main.cpp b/tests/regression/sort/main.cpp index 19e9aee50..032ce18df 100644 --- a/tests/regression/sort/main.cpp +++ b/tests/regression/sort/main.cpp @@ -34,7 +34,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { + while ((c = getopt(argc, argv, "n:k:h")) != -1) { switch (c) { case 'n': count = atoi(optarg); @@ -43,10 +43,9 @@ static void parse_args(int argc, char **argv) { kernel_file = optarg; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/stencil3d/main.cpp b/tests/regression/stencil3d/main.cpp index 0536effc0..5a5fcc716 100644 --- a/tests/regression/stencil3d/main.cpp +++ b/tests/regression/stencil3d/main.cpp @@ -128,7 +128,7 @@ static void stencil_cpu(TYPE *out, const TYPE *in, uint32_t width, uint32_t heig {ny = 0;} else if (ny >= (int)height) {ny = height - 1;} - + if (nz < 0) {nz = 0;} else if (nz >= (int)depth) @@ -168,7 +168,7 @@ static void show_usage() static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:t:k:h?")) != -1) + while ((c = getopt(argc, argv, "n:t:k:h")) != -1) { switch (c) { @@ -182,12 +182,9 @@ static void parse_args(int argc, char **argv) kernel_file = optarg; break; case 'h': - case '?': - { show_usage(); exit(0); - } - break; + break; default: show_usage(); exit(-1); diff --git a/tests/regression/vecaddx/main.cpp b/tests/regression/vecaddx/main.cpp index d80e2fdc1..4a79861d3 100644 --- a/tests/regression/vecaddx/main.cpp +++ b/tests/regression/vecaddx/main.cpp @@ -87,7 +87,7 @@ static void show_usage() { static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "n:k:h?")) != -1) { + while ((c = getopt(argc, argv, "n:k:h")) != -1) { switch (c) { case 'n': size = atoi(optarg); @@ -96,10 +96,9 @@ static void parse_args(int argc, char **argv) { kernel_file = optarg; break; case 'h': - case '?': { show_usage(); exit(0); - } break; + break; default: show_usage(); exit(-1); diff --git a/tests/unittest/common.mk b/tests/unittest/common.mk index a6f6b2794..c04db4d11 100644 --- a/tests/unittest/common.mk +++ b/tests/unittest/common.mk @@ -1,6 +1,8 @@ -CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors -CXXFLAGS += -I$(VORTEX_RT_PATH)/common +ROOT_DIR := $(realpath ../../..) + +CXXFLAGS += -std=c++17 -Wall -Wextra -pedantic -Wfatal-errors +CXXFLAGS += -I$(VORTEX_HOME)/sim/common # Debugging ifdef DEBUG diff --git a/tests/unittest/vx_malloc/main.cpp b/tests/unittest/vx_malloc/main.cpp index f10f986ca..d7e20b439 100644 --- a/tests/unittest/vx_malloc/main.cpp +++ b/tests/unittest/vx_malloc/main.cpp @@ -1,4 +1,4 @@ -#include +#include #include #define RT_CHECK(_expr) \ @@ -12,7 +12,7 @@ static uint64_t minAddress = 0; static uint64_t maxAddress = 0xffffffff; -static uint32_t pageAlign = 4096; +static uint32_t pageAlign = 4096; static uint32_t blockAlign = 64; int main() { diff --git a/third_party/Makefile b/third_party/Makefile index a2f74264e..24905e58c 100644 --- a/third_party/Makefile +++ b/third_party/Makefile @@ -1,6 +1,6 @@ -all: fpnew softfloat ramulator +all: cvfpu softfloat ramulator -fpnew: +cvfpu: softfloat: SPECIALIZE_TYPE=RISCV SOFTFLOAT_OPTS="-fPIC -DSOFTFLOAT_ROUND_ODD -DINLINE_LEVEL=5 -DSOFTFLOAT_FAST_DIV32TO16 -DSOFTFLOAT_FAST_DIV64TO32" $(MAKE) -C softfloat/build/Linux-x86_64-GCC @@ -13,4 +13,4 @@ clean: $(MAKE) -C softfloat/build/Linux-x86_64-GCC clean rm -rf ramulator/build ramulator/libramulator.so -.PHONY: all fpnew softfloat ramulator \ No newline at end of file +.PHONY: all cvfpu softfloat ramulator \ No newline at end of file diff --git a/third_party/cvfpu b/third_party/cvfpu new file mode 160000 index 000000000..a6af69155 --- /dev/null +++ b/third_party/cvfpu @@ -0,0 +1 @@ +Subproject commit a6af691551ffbd76d5d9cf30774d3295a41615e4 diff --git a/third_party/fpnew b/third_party/fpnew deleted file mode 160000 index 79e453139..000000000 --- a/third_party/fpnew +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 79e453139072df42c9ec8f697132ba485d74e23d