From 1cc7e17b44fcca15e18d653c9bb1fb8855b0b558 Mon Sep 17 00:00:00 2001 From: Lux Date: Sat, 14 Dec 2024 18:53:49 -0800 Subject: [PATCH 01/10] ADD: minor fixes --- CMakeLists.txt | 13 ++++++++----- examples/embench/CMakeLists.txt | 5 +++++ examples/embench/common/inc/trigger.h | 13 +++++++------ 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3ed5422..1d3f637 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,7 +34,8 @@ option(CHIP "Build for a specific platform" OF option(PROF_COV "Build with profiling and coverage" OFF ) option(GCNO_ONLY "Only build gcno files" OFF ) option(USE_PGO "Build with profile guided optimization" OFF ) - +# Define the variable with a default empty value +set(AFDO_PATH "" CACHE STRING "Path to autofdo profile data") ################################# # Toolchain Targets ################################# @@ -92,6 +93,12 @@ if (USE_PGO) endif() add_compile_options(-fno-builtin) +# Check if the variable is not empty and add compiler options accordingly +if(NOT "${AFDO_PATH}" STREQUAL "") + message(STATUS "Using AutoFDO profile from: ${AFDO_PATH}") + add_compile_options(-fauto-profile=${AFDO_PATH}) +endif() + # add_compile_options(-ffunction-sections -fdata-sections -fno-common -fno-builtin-printf -fno-pie) # add_compile_options(-Wall -Wextra -Warray-bounds -Wno-unused-parameter -Wcast-qual) add_compile_options(${ARCH_FLAGS}) @@ -110,9 +117,6 @@ if (PROF_COV) add_link_options(-lgcov) endif() - - - ################################# # Build ################################# @@ -127,7 +131,6 @@ target_include_directories(app PUBLIC app/include) include_directories(/scratch/iansseijelly/chipyard/.conda-env/riscv-tools/riscv64-unknown-elf/include) link_directories(/scratch/iansseijelly/chipyard/.conda-env/riscv-tools/riscv64-unknown-elf/lib) - ################################# # Dependencies ################################# diff --git a/examples/embench/CMakeLists.txt b/examples/embench/CMakeLists.txt index ebc37ad..1b33af2 100644 --- a/examples/embench/CMakeLists.txt +++ b/examples/embench/CMakeLists.txt @@ -12,6 +12,11 @@ if (EMBENCH_ENABLE_TRACE_DMA) add_definitions(-DUSE_L_TRACE_DMA) endif() +if (EMBENCH_ENABLE_LBR) + add_definitions(-DTIMER_INTERRUPT) + add_definitions(-DUSE_LBR) +endif() + set(BENCHMARKS dummy wikisort diff --git a/examples/embench/common/inc/trigger.h b/examples/embench/common/inc/trigger.h index ee58ff9..5434600 100644 --- a/examples/embench/common/inc/trigger.h +++ b/examples/embench/common/inc/trigger.h @@ -24,7 +24,7 @@ /* timer interrupt interval in milliseconds only used when TIMER_INTERRUPT is defined */ -#define TIMER_INTERRUPT_INTERVAL 100 +#define TIMER_INTERRUPT_INTERVAL 25 #ifdef USE_L_TRACE_DMA static uint8_t dma_buffer[512 * 1024]; @@ -96,19 +96,20 @@ void machine_timer_interrupt_callback() { #endif static inline void stop_trigger(void) { + #ifdef REPORT_TOTAL_TIME + int64_t curr_time = clint_get_time(CLINT); + printf("stop trigger at %lld\n", curr_time); + #endif + #ifdef USE_LBR lbr_dump_records(); #endif + #ifdef USE_L_TRACE LTraceEncoderType *encoder = l_trace_encoder_get(get_hart_id()); l_trace_encoder_stop(encoder); #endif - #ifdef REPORT_TOTAL_TIME - int64_t curr_time = clint_get_time(CLINT); - printf("stop trigger at %lld\n", curr_time); - #endif - #ifdef USE_L_TRACE_DMA LTraceSinkDmaType *sink_dma = l_trace_sink_dma_get(get_hart_id()); l_trace_sink_dma_read(sink_dma, dma_buffer); From fc0c4d624d1aba2b9ede9fc0aa3f634f7c4fa8e9 Mon Sep 17 00:00:00 2001 From: Lux Date: Sat, 14 Dec 2024 18:54:05 -0800 Subject: [PATCH 02/10] ADD: restrucutre test scripts --- scripts/gcov/test.sh | 17 ---- scripts/test_all/test_all.sh | 99 ++++++++++++++++++++++++ scripts/test_all/test_instrumentation.sh | 61 +++++++++++++++ scripts/test_all/test_sampling.sh | 58 ++++++++++++++ 4 files changed, 218 insertions(+), 17 deletions(-) delete mode 100755 scripts/gcov/test.sh create mode 100755 scripts/test_all/test_all.sh create mode 100755 scripts/test_all/test_instrumentation.sh create mode 100755 scripts/test_all/test_sampling.sh diff --git a/scripts/gcov/test.sh b/scripts/gcov/test.sh deleted file mode 100755 index d985e39..0000000 --- a/scripts/gcov/test.sh +++ /dev/null @@ -1,17 +0,0 @@ -# must be run from the root directory -# generate the baseline -cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D PROF_COV=OFF -D USE_PGO=OFF -cmake --build ./build/ --target nettle-sha256 -spike build/examples/embench/nettle-sha256.elf > build/test.txt -# generate the coverage profile -cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D PROF_COV=ON -D USE_PGO=OFF -cmake --build ./build/ --target nettle-sha256 -spike build/examples/embench/nettle-sha256.elf > build/test_cov.txt -# write the coverage files -python3 /scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/gcov/dump_gcda.py /scratch/iansseijelly/chipyard/software/baremetal-ide/build/test_cov.txt -# perform PGO -cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D PROF_COV=OFF -D USE_PGO=ON -cmake --build ./build/ --target nettle-sha256 -spike build/examples/embench/nettle-sha256.elf > build/test_after.txt -# diff -diff build/test.txt build/test_after.txt \ No newline at end of file diff --git a/scripts/test_all/test_all.sh b/scripts/test_all/test_all.sh new file mode 100755 index 0000000..4b05382 --- /dev/null +++ b/scripts/test_all/test_all.sh @@ -0,0 +1,99 @@ +#!/bin/bash +set -e +# here should be where the script itself is +cd /scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/test_all +HERE=$(pwd) +mkdir -p $HERE/test +cd /scratch/iansseijelly/chipyard/software/baremetal-ide + +echo "--- Generating the vanilla baseline ---" +rm -rf build +# generate the vanilla baseline +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D PROF_COV=OFF -D USE_PGO=OFF +cmake --build ./build/ --target wikisort +pushd /scratch/iansseijelly/chipyard/sims/vcs +make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 +cp /scratch/iansseijelly/chipyard/sims/vcs/output/chipyard.harness.TestHarness.WithLTraceEncoderRocketConfig/wikisort.log $HERE/test/vanilla.log +popd + +echo "--- Generating the instrumented coverage profile ---" +rm -rf build +# generate the instrumented coverage profile +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D PROF_COV=ON -D USE_PGO=OFF +cmake --build ./build/ --target wikisort +spike build/examples/embench/wikisort.elf > build/test_cov.txt +# write the coverage files +python3 /scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/gcov/dump_gcda.py /scratch/iansseijelly/chipyard/software/baremetal-ide/build/test_cov.txt +pushd /scratch/iansseijelly/chipyard/sims/vcs +make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 +cp /scratch/iansseijelly/chipyard/sims/vcs/output/chipyard.harness.TestHarness.WithLTraceEncoderRocketConfig/wikisort.log $HERE/test/instrumented.log +popd +# perform PGO +echo "--- Performing PGO ---" +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D PROF_COV=OFF -D USE_PGO=ON +cmake --build ./build/ --target wikisort +pushd /scratch/iansseijelly/chipyard/sims/vcs +make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 +cp /scratch/iansseijelly/chipyard/sims/vcs/output/chipyard.harness.TestHarness.WithLTraceEncoderRocketConfig/wikisort.log $HERE/test/instrumentation_pgo.log +popd + +rm -rf build +# generate a trace +echo "--- Generating tacit trace ---" +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D GCNO_ONLY=ON -D USE_PGO=OFF -D EMBENCH_ENABLE_TRACE_PRINT=ON +cmake --build ./build/ --target wikisort +pushd /scratch/iansseijelly/chipyard/sims/vcs +make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 +cp /scratch/iansseijelly/chipyard/sims/vcs/output/chipyard.harness.TestHarness.WithLTraceEncoderRocketConfig/wikisort.log $HERE/test/trace.log +popd +pushd /scratch/iansseijelly/ltrace_decoder +cargo run -- --encoded-trace /scratch/iansseijelly/chipyard/sims/vcs/byte_printer.txt --binary /scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf --gcno /scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/CMakeFiles/wikisort_lib.dir/wikisort/libwikisort.c.gcno --to-gcda --to-afdo +# save the trace afdo file for later use +cp ./trace_afdo.txt $HERE/test/trace_afdo.txt +/scratch/iansseijelly/autofdo/build/create_gcov --binary /scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf --profile $HERE/test/trace_afdo.txt --profiler text --gcov_version=2 --gcov trace_afdo_fbdata.afdo +popd + +# must not remove build directory, as .gcda files are needed +# perform trace PGO +echo "--- Performing trace PGO ---" +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D PROF_COV=OFF -D USE_PGO=ON +cmake --build ./build/ --target wikisort +pushd /scratch/iansseijelly/chipyard/sims/vcs +make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 +cp /scratch/iansseijelly/chipyard/sims/vcs/output/chipyard.harness.TestHarness.WithLTraceEncoderRocketConfig/wikisort.log $HERE/test/trace-pgo.log +popd + +rm -rf build +# perform LBR samping +echo "--- Performing LBR sampling ---" +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D EMBENCH_ENABLE_LBR=ON +cmake --build ./build/ --target wikisort +pushd /scratch/iansseijelly/chipyard/sims/vcs +make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 +cp /scratch/iansseijelly/chipyard/sims/vcs/output/chipyard.harness.TestHarness.WithLTraceEncoderRocketConfig/wikisort.log $HERE/test/sample-lbr-vanilla.log +popd +# post-process the sampled LBR record +pushd /scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/test_all/test +python3 /scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/perf/dump_lbr.py /scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/test_all/test/sample-lbr-vanilla.log +/scratch/iansseijelly/autofdo/build/create_gcov --binary /scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf --profile /scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/perf/lbr_branch.txt --profiler text --gcov_version=2 +popd + +# perform LBR AutoFDO +echo "--- Performing LBR AutoFDO ---" +rm -rf build +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D EMBENCH_ENABLE_LBR=OFF -D AFDO_PATH=/scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/test_all/test/fbdata.afdo +cmake --build ./build/ --target wikisort +pushd /scratch/iansseijelly/chipyard/sims/vcs +make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 +cp /scratch/iansseijelly/chipyard/sims/vcs/output/chipyard.harness.TestHarness.WithLTraceEncoderRocketConfig/wikisort.log $HERE/test/sample-lbr-afdo.log +popd + +# perform trace LBR AutoFDO +echo "--- Performing trace LBR AutoFDO ---" +rm -rf build +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D EMBENCH_ENABLE_LBR=OFF -D AFDO_PATH=/scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/test_all/test/trace_afdo_fbdata.afdo +cmake --build ./build/ --target wikisort +pushd /scratch/iansseijelly/chipyard/sims/vcs +make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 +cp /scratch/iansseijelly/chipyard/sims/vcs/output/chipyard.harness.TestHarness.WithLTraceEncoderRocketConfig/wikisort.log $HERE/test/trace-lbr-afdo.log +popd \ No newline at end of file diff --git a/scripts/test_all/test_instrumentation.sh b/scripts/test_all/test_instrumentation.sh new file mode 100755 index 0000000..f0027c1 --- /dev/null +++ b/scripts/test_all/test_instrumentation.sh @@ -0,0 +1,61 @@ +#!/bin/bash +set -e +# here should be where the script itself is +cd /scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/test_all +HERE=$(pwd) +mkdir -p $HERE/test +cd /scratch/iansseijelly/chipyard/software/baremetal-ide + +echo "--- Generating the vanilla baseline ---" +rm -rf build +# generate the vanilla baseline +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D PROF_COV=OFF -D USE_PGO=OFF +cmake --build ./build/ --target wikisort +pushd /scratch/iansseijelly/chipyard/sims/vcs +make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 +cp /scratch/iansseijelly/chipyard/sims/vcs/output/chipyard.harness.TestHarness.WithLTraceEncoderRocketConfig/wikisort.log $HERE/test/vanilla.log +popd + +echo "--- Generating the instrumented coverage profile ---" +rm -rf build +# generate the instrumented coverage profile +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D PROF_COV=ON -D USE_PGO=OFF +cmake --build ./build/ --target wikisort +spike build/examples/embench/wikisort.elf > build/test_cov.txt +# write the coverage files +python3 /scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/gcov/dump_gcda.py /scratch/iansseijelly/chipyard/software/baremetal-ide/build/test_cov.txt +pushd /scratch/iansseijelly/chipyard/sims/vcs +make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 +cp /scratch/iansseijelly/chipyard/sims/vcs/output/chipyard.harness.TestHarness.WithLTraceEncoderRocketConfig/wikisort.log $HERE/test/instrumented.log +popd +# perform PGO +echo "--- Performing PGO ---" +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D PROF_COV=OFF -D USE_PGO=ON +cmake --build ./build/ --target wikisort +pushd /scratch/iansseijelly/chipyard/sims/vcs +make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 +cp /scratch/iansseijelly/chipyard/sims/vcs/output/chipyard.harness.TestHarness.WithLTraceEncoderRocketConfig/wikisort.log $HERE/test/instrumentation_pgo.log +popd + +rm -rf build +# generate a trace +echo "--- Generating tacit trace ---" +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D GCNO_ONLY=ON -D USE_PGO=OFF -D EMBENCH_ENABLE_TRACE_PRINT=ON +cmake --build ./build/ --target wikisort +pushd /scratch/iansseijelly/chipyard/sims/vcs +make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 +cp /scratch/iansseijelly/chipyard/sims/vcs/output/chipyard.harness.TestHarness.WithLTraceEncoderRocketConfig/wikisort.log $HERE/test/trace.log +popd +pushd /scratch/iansseijelly/ltrace_decoder +cargo run -- --encoded-trace /scratch/iansseijelly/chipyard/sims/vcs/byte_printer.txt --binary /scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf --gcno /scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/CMakeFiles/wikisort_lib.dir/wikisort/libwikisort.c.gcno --to-gcda --to-afdo +popd + +# must not remove build directory, as .gcda files are needed +# perform trace PGO +echo "--- Performing trace PGO ---" +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D PROF_COV=OFF -D USE_PGO=ON +cmake --build ./build/ --target wikisort +pushd /scratch/iansseijelly/chipyard/sims/vcs +make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 +cp /scratch/iansseijelly/chipyard/sims/vcs/output/chipyard.harness.TestHarness.WithLTraceEncoderRocketConfig/wikisort.log $HERE/test/trace-pgo.log +popd \ No newline at end of file diff --git a/scripts/test_all/test_sampling.sh b/scripts/test_all/test_sampling.sh new file mode 100755 index 0000000..91663a2 --- /dev/null +++ b/scripts/test_all/test_sampling.sh @@ -0,0 +1,58 @@ +#!/bin/bash +set -e +# here should be where the script itself is +cd /scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/test_all +HERE=$(pwd) +mkdir -p $HERE/test +cd /scratch/iansseijelly/chipyard/software/baremetal-ide + +rm -rf build +# perform LBR samping +echo "--- Performing LBR sampling ---" +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D EMBENCH_ENABLE_LBR=ON +cmake --build ./build/ --target wikisort +pushd /scratch/iansseijelly/chipyard/sims/vcs +make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 +cp /scratch/iansseijelly/chipyard/sims/vcs/output/chipyard.harness.TestHarness.WithLTraceEncoderRocketConfig/wikisort.log $HERE/test/sample-lbr-vanilla.log +popd +# post-process the sampled LBR record +pushd /scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/test_all/test +python3 /scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/perf/dump_lbr.py /scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/test_all/test/sample-lbr-vanilla.log +mv ./lbr_branch.txt $HERE/test/lbr_sampling_branch.txt +/scratch/iansseijelly/autofdo/build/create_gcov --binary /scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf --profile $HERE/test/lbr_sampling_branch.txt --profiler text --gcov_version=2 +popd + +# perform LBR AutoFDO +echo "--- Performing LBR AutoFDO ---" +rm -rf build +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D EMBENCH_ENABLE_LBR=OFF -D AFDO_PATH=/scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/test_all/test/fbdata.afdo +cmake --build ./build/ --target wikisort +pushd /scratch/iansseijelly/chipyard/sims/vcs +make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 +cp /scratch/iansseijelly/chipyard/sims/vcs/output/chipyard.harness.TestHarness.WithLTraceEncoderRocketConfig/wikisort.log $HERE/test/sample-lbr-afdo.log +popd + +rm -rf build +# generate a trace +echo "--- Generating tacit trace ---" +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D GCNO_ONLY=ON -D USE_PGO=OFF -D EMBENCH_ENABLE_TRACE_PRINT=ON +cmake --build ./build/ --target wikisort +pushd /scratch/iansseijelly/chipyard/sims/vcs +make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 +cp /scratch/iansseijelly/chipyard/sims/vcs/output/chipyard.harness.TestHarness.WithLTraceEncoderRocketConfig/wikisort.log $HERE/test/trace.log +popd +pushd /scratch/iansseijelly/ltrace_decoder +cargo run -- --encoded-trace /scratch/iansseijelly/chipyard/sims/vcs/byte_printer.txt --binary /scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf --gcno /scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/CMakeFiles/wikisort_lib.dir/wikisort/libwikisort.c.gcno --to-afdo +# save the trace afdo file for later use +cp ./trace_afdo.txt $HERE/test/trace_afdo.txt +popd +/scratch/iansseijelly/autofdo/build/create_gcov --binary /scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf --profile $HERE/test/trace_afdo.txt --profiler text --gcov_version=2 --gcov $HERE/test/trace_afdo_fbdata.afdo +# perform trace LBR AutoFDO +echo "--- Performing trace LBR AutoFDO ---" +rm -rf build +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D EMBENCH_ENABLE_LBR=OFF -D AFDO_PATH=/scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/test_all/test/trace_afdo_fbdata.afdo +cmake --build ./build/ --target wikisort +pushd /scratch/iansseijelly/chipyard/sims/vcs +make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 +cp /scratch/iansseijelly/chipyard/sims/vcs/output/chipyard.harness.TestHarness.WithLTraceEncoderRocketConfig/wikisort.log $HERE/test/trace-lbr-afdo.log +popd \ No newline at end of file From efb913aa9bab5b031261a66a37f5bacd9c0578e4 Mon Sep 17 00:00:00 2001 From: Lux Date: Sat, 14 Dec 2024 18:57:29 -0800 Subject: [PATCH 03/10] FIX: sync the test_all script --- scripts/test_all/test_all.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/test_all/test_all.sh b/scripts/test_all/test_all.sh index 4b05382..c94c7c2 100755 --- a/scripts/test_all/test_all.sh +++ b/scripts/test_all/test_all.sh @@ -75,7 +75,8 @@ popd # post-process the sampled LBR record pushd /scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/test_all/test python3 /scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/perf/dump_lbr.py /scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/test_all/test/sample-lbr-vanilla.log -/scratch/iansseijelly/autofdo/build/create_gcov --binary /scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf --profile /scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/perf/lbr_branch.txt --profiler text --gcov_version=2 +mv ./lbr_branch.txt $HERE/test/lbr_sampling_branch.txt +/scratch/iansseijelly/autofdo/build/create_gcov --binary /scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf --profile $HERE/test/lbr_sampling_branch.txt --profiler text --gcov_version=2 popd # perform LBR AutoFDO From 2a51f40cb31a1b5cc903f991003d9c0083af408a Mon Sep 17 00:00:00 2001 From: Lux Date: Mon, 30 Dec 2024 09:50:23 -0800 Subject: [PATCH 04/10] ADD: minor updates --- CMakeLists.txt | 9 +++++++-- scripts/test_all/test_all.sh | 2 +- scripts/test_all/test_instrumentation.sh | 2 +- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1d3f637..bfa0caa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,6 +34,7 @@ option(CHIP "Build for a specific platform" OF option(PROF_COV "Build with profiling and coverage" OFF ) option(GCNO_ONLY "Only build gcno files" OFF ) option(USE_PGO "Build with profile guided optimization" OFF ) +option(OPT_INFO "Build with optimization information" OFF ) # Define the variable with a default empty value set(AFDO_PATH "" CACHE STRING "Path to autofdo profile data") ################################# @@ -75,8 +76,9 @@ endif() add_subdirectory(${CMAKE_SOURCE_DIR}/platform/${CHIP}) -add_compile_options(-O1) +add_compile_options(-O2) add_compile_options(-Wall -Wextra) +add_compile_options(-Wno-error=coverage-mismatch) if (PROF_COV) message(STATUS "Building with profiling and coverage") @@ -91,7 +93,10 @@ endif() if (USE_PGO) add_compile_options(-fprofile-use) endif() -add_compile_options(-fno-builtin) + +if (OPT_INFO) + add_compile_options(-fopt-info) +endif() # Check if the variable is not empty and add compiler options accordingly if(NOT "${AFDO_PATH}" STREQUAL "") diff --git a/scripts/test_all/test_all.sh b/scripts/test_all/test_all.sh index c94c7c2..b3f0519 100755 --- a/scripts/test_all/test_all.sh +++ b/scripts/test_all/test_all.sh @@ -56,7 +56,7 @@ popd # must not remove build directory, as .gcda files are needed # perform trace PGO echo "--- Performing trace PGO ---" -cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D PROF_COV=OFF -D USE_PGO=ON +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug PROF_COV=OFF -D USE_PGO=ON -D EMBENCH_ENABLE_TRACE_PRINT=ON -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake cmake --build ./build/ --target wikisort pushd /scratch/iansseijelly/chipyard/sims/vcs make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 diff --git a/scripts/test_all/test_instrumentation.sh b/scripts/test_all/test_instrumentation.sh index f0027c1..67ca4ad 100755 --- a/scripts/test_all/test_instrumentation.sh +++ b/scripts/test_all/test_instrumentation.sh @@ -53,7 +53,7 @@ popd # must not remove build directory, as .gcda files are needed # perform trace PGO echo "--- Performing trace PGO ---" -cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D PROF_COV=OFF -D USE_PGO=ON +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D PROF_COV=OFF -D GCNO_ONLY=OFF -D USE_PGO=ON -D EMBENCH_ENABLE_TRACE_PRINT=ON cmake --build ./build/ --target wikisort pushd /scratch/iansseijelly/chipyard/sims/vcs make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 From fa40c2f2a640c208fcadc696083cc9732eef4464 Mon Sep 17 00:00:00 2001 From: Lux Date: Mon, 30 Dec 2024 09:50:48 -0800 Subject: [PATCH 05/10] ADD: sin example --- examples/pmu-tests/CMakeLists.txt | 2 ++ examples/pmu-tests/src/trig-sin.c | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 examples/pmu-tests/src/trig-sin.c diff --git a/examples/pmu-tests/CMakeLists.txt b/examples/pmu-tests/CMakeLists.txt index 2404d0d..379f1de 100644 --- a/examples/pmu-tests/CMakeLists.txt +++ b/examples/pmu-tests/CMakeLists.txt @@ -4,6 +4,7 @@ set (TESTS pmu-test-inhibit lbr-test ltrace-dma-test + trig-sin sort ) @@ -12,5 +13,6 @@ foreach(test ${TESTS}) target_link_libraries(${test} PUBLIC l_trace_encoder PRIVATE -L${CMAKE_BINARY_DIR}/glossy -Wl,--whole-archive glossy -Wl,--no-whole-archive + PUBLIC m ) endforeach() diff --git a/examples/pmu-tests/src/trig-sin.c b/examples/pmu-tests/src/trig-sin.c new file mode 100644 index 0000000..c8c58a7 --- /dev/null +++ b/examples/pmu-tests/src/trig-sin.c @@ -0,0 +1,21 @@ +#include +#include "riscv.h" +#include "riscv_encoding.h" +#include "l_trace_encoder.h" +#include "math.h" + +#define NUM_ITERS 10 // 0 to 2pi, 10 steps + +int main(int argc, char **argv) { + LTraceEncoderType *encoder = l_trace_encoder_get(get_hart_id()); + l_trace_encoder_configure_target(encoder, TARGET_PRINT); + l_trace_encoder_start(encoder); + + for (int i = 0; i < NUM_ITERS; i++) { + volatile float x = i * (2 * M_PI / NUM_ITERS); + volatile float y = sin(x); + printf("x: %f, y: %f\n", x, y); + } + + l_trace_encoder_stop(encoder); +} From b174835c7a1751a10db9e232abaecee2c744d4e9 Mon Sep 17 00:00:00 2001 From: Lux Date: Mon, 30 Dec 2024 22:49:41 -0800 Subject: [PATCH 06/10] FIX: inline trace start and end --- driver/rocket-chip/l_trace_encoder/l_trace_encoder.c | 9 --------- driver/rocket-chip/l_trace_encoder/l_trace_encoder.h | 10 ++++++++-- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/driver/rocket-chip/l_trace_encoder/l_trace_encoder.c b/driver/rocket-chip/l_trace_encoder/l_trace_encoder.c index 6791cd3..3093cf8 100644 --- a/driver/rocket-chip/l_trace_encoder/l_trace_encoder.c +++ b/driver/rocket-chip/l_trace_encoder/l_trace_encoder.c @@ -18,15 +18,6 @@ void l_trace_sink_dma_read(LTraceSinkDmaType *sink_dma, uint8_t *buffer) { printf("\n"); } -void l_trace_encoder_start(LTraceEncoderType *encoder) { - SET_BITS(encoder->TR_TE_CTRL, 0x1 << 1); -} - void l_trace_encoder_configure_target(LTraceEncoderType *encoder, uint64_t target) { encoder->TR_TE_TARGET = target; } - -void l_trace_encoder_stop(LTraceEncoderType *encoder) { - CLEAR_BITS(encoder->TR_TE_CTRL, 0x1 << 1); -} - diff --git a/driver/rocket-chip/l_trace_encoder/l_trace_encoder.h b/driver/rocket-chip/l_trace_encoder/l_trace_encoder.h index 98d0783..1597f4b 100644 --- a/driver/rocket-chip/l_trace_encoder/l_trace_encoder.h +++ b/driver/rocket-chip/l_trace_encoder/l_trace_encoder.h @@ -41,8 +41,14 @@ static inline LTraceSinkDmaType *l_trace_sink_dma_get(uint32_t hart_id) { return (LTraceSinkDmaType *)(L_TRACE_SINK_DMA_BASE_ADDRESS + hart_id * 0x1000); } -void l_trace_encoder_start(LTraceEncoderType *encoder); -void l_trace_encoder_stop(LTraceEncoderType *encoder); +static inline void l_trace_encoder_start(LTraceEncoderType *encoder) { + SET_BITS(encoder->TR_TE_CTRL, 0x1 << 1); +} + +static inline void l_trace_encoder_stop(LTraceEncoderType *encoder) { + CLEAR_BITS(encoder->TR_TE_CTRL, 0x1 << 1); +} + void l_trace_encoder_configure_target(LTraceEncoderType *encoder, uint64_t target); void l_trace_sink_dma_configure_addr(LTraceSinkDmaType *sink_dma, uint64_t dma_addr); void l_trace_sink_dma_read(LTraceSinkDmaType *sink_dma, uint8_t *buffer); From 833d7597df5d57bfc7d028402d9d36ddbbc6caf1 Mon Sep 17 00:00:00 2001 From: Lux Date: Mon, 30 Dec 2024 22:52:39 -0800 Subject: [PATCH 07/10] RENAME: speedscope-test --- CMakeLists.txt | 2 +- examples/pmu-tests/CMakeLists.txt | 2 +- examples/pmu-tests/src/speedscope-test.c | 45 ++++++++++++++++++++++++ examples/pmu-tests/src/trig-sin.c | 21 ----------- 4 files changed, 47 insertions(+), 23 deletions(-) create mode 100644 examples/pmu-tests/src/speedscope-test.c delete mode 100644 examples/pmu-tests/src/trig-sin.c diff --git a/CMakeLists.txt b/CMakeLists.txt index bfa0caa..6f570c0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -76,7 +76,7 @@ endif() add_subdirectory(${CMAKE_SOURCE_DIR}/platform/${CHIP}) -add_compile_options(-O2) +add_compile_options(-O0) add_compile_options(-Wall -Wextra) add_compile_options(-Wno-error=coverage-mismatch) diff --git a/examples/pmu-tests/CMakeLists.txt b/examples/pmu-tests/CMakeLists.txt index 379f1de..c5c82f4 100644 --- a/examples/pmu-tests/CMakeLists.txt +++ b/examples/pmu-tests/CMakeLists.txt @@ -4,7 +4,7 @@ set (TESTS pmu-test-inhibit lbr-test ltrace-dma-test - trig-sin + speedscope-test sort ) diff --git a/examples/pmu-tests/src/speedscope-test.c b/examples/pmu-tests/src/speedscope-test.c new file mode 100644 index 0000000..ab8d5b7 --- /dev/null +++ b/examples/pmu-tests/src/speedscope-test.c @@ -0,0 +1,45 @@ +#include +#include "riscv.h" +#include "riscv_encoding.h" +#include "l_trace_encoder.h" +#include "math.h" + +#define NUM_ITERS 10 // 0 to 2pi, 10 steps + +int add_kernel(int a, int b) { + if (b == 0) { + return a; + } + return add_kernel(a, b - 1) + 1; +} + +int workload(int a, int b) { + return add_kernel(a, b); +} + +int main(int argc, char **argv) { + LTraceEncoderType *encoder = l_trace_encoder_get(get_hart_id()); + l_trace_encoder_configure_target(encoder, TARGET_PRINT); + l_trace_encoder_start(encoder); + + for (int i = 0; i < 10; i++) { + __asm__("nop"); + } + + for (int i = 0; i < 10; i++) { + volatile int x = workload(1, i); + } + + for (int i = 0; i < 10; i++) { + volatile float x = sin(i); + } + + printf("Hello, world! %d\n", 0); + + l_trace_encoder_stop(encoder); + + // spin for a bit, to make sure the trace buffer is flushed + for (int i = 0; i < 10; i++) { + __asm__("nop"); + } +} diff --git a/examples/pmu-tests/src/trig-sin.c b/examples/pmu-tests/src/trig-sin.c deleted file mode 100644 index c8c58a7..0000000 --- a/examples/pmu-tests/src/trig-sin.c +++ /dev/null @@ -1,21 +0,0 @@ -#include -#include "riscv.h" -#include "riscv_encoding.h" -#include "l_trace_encoder.h" -#include "math.h" - -#define NUM_ITERS 10 // 0 to 2pi, 10 steps - -int main(int argc, char **argv) { - LTraceEncoderType *encoder = l_trace_encoder_get(get_hart_id()); - l_trace_encoder_configure_target(encoder, TARGET_PRINT); - l_trace_encoder_start(encoder); - - for (int i = 0; i < NUM_ITERS; i++) { - volatile float x = i * (2 * M_PI / NUM_ITERS); - volatile float y = sin(x); - printf("x: %f, y: %f\n", x, y); - } - - l_trace_encoder_stop(encoder); -} From fa5b0244b56ab73b5c500ace4a12eb26027018f9 Mon Sep 17 00:00:00 2001 From: Lux Date: Sat, 11 Jan 2025 17:23:51 -0800 Subject: [PATCH 08/10] ADD: useit driver, more tests --- .../l_trace_encoder/l_trace_encoder.c | 8 ++ .../l_trace_encoder/l_trace_encoder.h | 4 + examples/pmu-tests/CMakeLists.txt | 2 + examples/pmu-tests/src/ltrace-hpm-test.c | 40 +++++++ examples/pmu-tests/src/vpp-test.c | 106 ++++++++++++++++++ 5 files changed, 160 insertions(+) create mode 100644 examples/pmu-tests/src/ltrace-hpm-test.c create mode 100644 examples/pmu-tests/src/vpp-test.c diff --git a/driver/rocket-chip/l_trace_encoder/l_trace_encoder.c b/driver/rocket-chip/l_trace_encoder/l_trace_encoder.c index 3093cf8..8dc2294 100644 --- a/driver/rocket-chip/l_trace_encoder/l_trace_encoder.c +++ b/driver/rocket-chip/l_trace_encoder/l_trace_encoder.c @@ -21,3 +21,11 @@ void l_trace_sink_dma_read(LTraceSinkDmaType *sink_dma, uint8_t *buffer) { void l_trace_encoder_configure_target(LTraceEncoderType *encoder, uint64_t target) { encoder->TR_TE_TARGET = target; } + +void l_trace_encoder_configure_hpm_counter_en(LTraceEncoderType *encoder, uint32_t hpm_counter) { + encoder->TR_TE_HPM_COUNTER = hpm_counter; +} + +void l_trace_encoder_configure_hpm_counter_time(LTraceEncoderType *encoder, uint32_t time) { + encoder->TR_TE_HPM_TIME = time; +} diff --git a/driver/rocket-chip/l_trace_encoder/l_trace_encoder.h b/driver/rocket-chip/l_trace_encoder/l_trace_encoder.h index 1597f4b..f4a07f4 100644 --- a/driver/rocket-chip/l_trace_encoder/l_trace_encoder.h +++ b/driver/rocket-chip/l_trace_encoder/l_trace_encoder.h @@ -9,6 +9,8 @@ typedef struct { uint32_t TR_TE_CTRL; uint32_t TR_TE_TARGET; + uint32_t TR_TE_HPM_COUNTER; + uint32_t TR_TE_HPM_TIME; } LTraceEncoderType; typedef struct { @@ -52,4 +54,6 @@ static inline void l_trace_encoder_stop(LTraceEncoderType *encoder) { void l_trace_encoder_configure_target(LTraceEncoderType *encoder, uint64_t target); void l_trace_sink_dma_configure_addr(LTraceSinkDmaType *sink_dma, uint64_t dma_addr); void l_trace_sink_dma_read(LTraceSinkDmaType *sink_dma, uint8_t *buffer); +void l_trace_encoder_configure_hpm_counter_en(LTraceEncoderType *encoder, uint32_t hpm_counter); +void l_trace_encoder_configure_hpm_counter_time(LTraceEncoderType *encoder, uint32_t time); #endif /* __L_TRACE_ENCODER_H */ diff --git a/examples/pmu-tests/CMakeLists.txt b/examples/pmu-tests/CMakeLists.txt index c5c82f4..fcc219e 100644 --- a/examples/pmu-tests/CMakeLists.txt +++ b/examples/pmu-tests/CMakeLists.txt @@ -4,7 +4,9 @@ set (TESTS pmu-test-inhibit lbr-test ltrace-dma-test + ltrace-hpm-test speedscope-test + vpp-test sort ) diff --git a/examples/pmu-tests/src/ltrace-hpm-test.c b/examples/pmu-tests/src/ltrace-hpm-test.c new file mode 100644 index 0000000..c703f0f --- /dev/null +++ b/examples/pmu-tests/src/ltrace-hpm-test.c @@ -0,0 +1,40 @@ +#include +#include "riscv.h" +#include "pmu.h" +#include "riscv_encoding.h" +#include "l_trace_encoder.h" + +#define NUM_ITERS 100 + +// #define USE_L_TRACE_DMA + +static volatile uint32_t array[NUM_ITERS]; + +int main(int argc, char **argv) { + // enable counter enable + PMU_COUNTER_ENABLE(3); + // configure event3 and reset counter3 + printf("---configure event3---\n"); + PMU_EVENT_ENABLE(PMU_EVENT(0, LOAD), 3); + LTraceEncoderType *encoder = l_trace_encoder_get(get_hart_id()); + l_trace_encoder_configure_target(encoder, TARGET_PRINT); + l_trace_encoder_configure_hpm_counter_en(encoder, 1 << 3); + l_trace_encoder_configure_hpm_counter_time(encoder, 1000); // report every 1000 cycles + PMU_COUNTER_RESET(3); + l_trace_encoder_start(encoder); + // do some dummy loop with load + for (int i = 0; i < NUM_ITERS; i++) { + array[i] = i; + } + // do some different loops + for (int i = 0; i < NUM_ITERS; i++) { + array[i] -= i; + } + l_trace_encoder_stop(encoder); + PMU_INHIBIT_ENABLE(3); + + // read counter + uint32_t counter = PMU_COUNTER_READ(3); + printf("counter: %d\n", counter); + +} diff --git a/examples/pmu-tests/src/vpp-test.c b/examples/pmu-tests/src/vpp-test.c new file mode 100644 index 0000000..afc766f --- /dev/null +++ b/examples/pmu-tests/src/vpp-test.c @@ -0,0 +1,106 @@ +#include +#include +#include "riscv.h" +#include "l_trace_encoder.h" + +#define ARRAY_SIZE 4096 +#define CHUNK_SIZE 64 +#define NUM_ITERATIONS 10 + +typedef struct { + uint32_t value; + uint32_t next_index; +} Node; + +static volatile Node array[ARRAY_SIZE]; + +// Fisher-Yates shuffle to randomize array access pattern +static void shuffle_indices(void) { + for (int i = ARRAY_SIZE - 1; i > 0; i--) { + // Use a simple PRNG for reproducibility + uint32_t j = (i * 1103515245 + 12345) % (i + 1); + array[i].next_index ^= array[j].next_index; + array[j].next_index ^= array[i].next_index; + array[i].next_index ^= array[j].next_index; + } +} + +// Sort a chunk using different algorithms based on data properties +static void sort_chunk(uint32_t start_idx, uint32_t size) { + // Count number of unique values to determine algorithm + uint32_t unique = 0; + uint32_t prev = 0; + + for (uint32_t i = 0; i < size; i++) { + uint32_t curr_idx = start_idx; + for (uint32_t j = 0; j < i; j++) { + curr_idx = array[curr_idx].next_index; + } + if (array[curr_idx].value != prev) { + unique++; + prev = array[curr_idx].value; + } + } + + // Choose algorithm based on data properties + if (unique < size/4) { + // Counting sort for low uniqueness + uint32_t max = 0; + uint32_t curr_idx = start_idx; + for (uint32_t i = 0; i < size; i++) { + if (array[curr_idx].value > max) max = array[curr_idx].value; + curr_idx = array[curr_idx].next_index; + } + + uint32_t counts[256] = {0}; // Limited range for simplicity + curr_idx = start_idx; + for (uint32_t i = 0; i < size; i++) { + counts[array[curr_idx].value % 256]++; + curr_idx = array[curr_idx].next_index; + } + } else { + // Insertion sort for high uniqueness + for (uint32_t i = 1; i < size; i++) { + uint32_t curr_idx = start_idx; + for (uint32_t j = 0; j < i; j++) { + curr_idx = array[curr_idx].next_index; + } + uint32_t key = array[curr_idx].value; + + uint32_t j = i - 1; + uint32_t prev_idx = start_idx; + while (j >= 0 && array[prev_idx].value > key) { + uint32_t next_idx = array[prev_idx].next_index; + array[next_idx].value = array[prev_idx].value; + j--; + prev_idx = array[prev_idx].next_index; + } + array[curr_idx].value = key; + } + } +} + +int main() { + // Initialize with pseudo-random data and linked indices + for (uint32_t i = 0; i < ARRAY_SIZE; i++) { + array[i].value = (i * 1103515245 + 12345) & 0xFFFFFFFF; + array[i].next_index = (i + 1) % ARRAY_SIZE; + } + + LTraceEncoderType *encoder = l_trace_encoder_get(get_hart_id()); + l_trace_encoder_configure_target(encoder, TARGET_PRINT); + l_trace_encoder_start(encoder); + + for (int iter = 0; iter < NUM_ITERATIONS; iter++) { + // Randomize access pattern + shuffle_indices(); + + // Sort chunks in different orders + for (uint32_t i = 0; i < ARRAY_SIZE; i += CHUNK_SIZE) { + sort_chunk(i, CHUNK_SIZE); + } + } + + l_trace_encoder_stop(encoder); + return 0; +} From f8896212d43356b37c7c34adf7a5db186c0c0afa Mon Sep 17 00:00:00 2001 From: iansseijelly Date: Sun, 12 Jan 2025 00:09:28 -0800 Subject: [PATCH 09/10] ADD: mada timer driver --- driver/mada/timer/CMakeLists.txt | 7 +++++ driver/mada/timer/mada_timer.c | 1 + driver/mada/timer/mada_timer.h | 54 ++++++++++++++++++++++++++++++++ 3 files changed, 62 insertions(+) create mode 100644 driver/mada/timer/CMakeLists.txt create mode 100644 driver/mada/timer/mada_timer.c create mode 100644 driver/mada/timer/mada_timer.h diff --git a/driver/mada/timer/CMakeLists.txt b/driver/mada/timer/CMakeLists.txt new file mode 100644 index 0000000..123ab8e --- /dev/null +++ b/driver/mada/timer/CMakeLists.txt @@ -0,0 +1,7 @@ + +add_library(mada_timer STATIC mada_timer.c) + +target_include_directories(mada_timer PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) + +target_link_libraries(mada_timer PUBLIC rocketcore) +target_link_libraries(mada_timer PUBLIC metal) diff --git a/driver/mada/timer/mada_timer.c b/driver/mada/timer/mada_timer.c new file mode 100644 index 0000000..91e028e --- /dev/null +++ b/driver/mada/timer/mada_timer.c @@ -0,0 +1 @@ +#include "mada_timer.h" \ No newline at end of file diff --git a/driver/mada/timer/mada_timer.h b/driver/mada/timer/mada_timer.h new file mode 100644 index 0000000..57db9d9 --- /dev/null +++ b/driver/mada/timer/mada_timer.h @@ -0,0 +1,54 @@ +#ifndef __MADA_TIMER_H +#define __MADA_TIMER_H + +#include "metal.h" + +/* +register map +0x00: control and status +0x04: counter value (r) +0x08: auto-reload value (rw) +0x0c: prescaler value (rw) +0x10: capture and control 0 (rw) +0x14: capture and control 1 (rw) +0x18: capture and control 2 (rw) +0x1c: capture and control 3 (rw) +*/ + +typedef struct { + uint32_t MD_TM_CSR; + uint32_t MD_TM_COUNTER_VAL; + uint32_t MD_TM_AUTO_RELAOD; + uint32_t MD_TM_PRESCALER; + uint32_t MD_TM_CCR0; + uint32_t MD_TM_CCR1; + uint32_t MD_TM_CCR2; +} MadaTimerType; + +#define MADA_ADDR 0x11000000 +#define MADA_ENCODER0 ((MadaTimerType*)MADA_ADDR) + +static inline void mada_timer_enable(MadaTimerType *timer){ + timer->MD_TM_CSR = 0x1; +} + +static inline void mada_timer_disable(MadaTimerType *timer){ + timer->MD_TM_CSR = 0x0; +} + +static inline uint32_t mada_timer_read_raw_counter(MadaTimerType *timer){ + return timer->MD_TM_COUNTER_VAL; +} + +static inline void mada_timer_configure(MadaTimerType* timer, uint32_t auto_reload, uint32_t prescaler){ + timer->MD_TM_AUTO_RELAOD = auto_reload; + timer->MD_TM_PRESCALER = prescaler; +} + +static inline void mada_timer_pwm_set(MadaTimerType* timer, uint32_t ccr0, uint32_t ccr1, uint32_t ccr2){ + timer->MD_TM_CCR0 = ccr0; + timer->MD_TM_CCR1 = ccr1; + timer->MD_TM_CCR2 = ccr2; +} + +#endif From 2163f805e4c79899aac45b1aea367006a3b44f18 Mon Sep 17 00:00:00 2001 From: Lux Date: Mon, 13 Jan 2025 00:12:09 -0800 Subject: [PATCH 10/10] ADD: foc test --- driver/mada/timer/mada_timer.h | 1 - examples/pmu-tests/CMakeLists.txt | 1 + examples/pmu-tests/src/foc-driver.c | 82 +++++++++++++++++ examples/pmu-tests/src/vpp-test.c | 137 ++++++++++------------------ 4 files changed, 129 insertions(+), 92 deletions(-) create mode 100644 examples/pmu-tests/src/foc-driver.c diff --git a/driver/mada/timer/mada_timer.h b/driver/mada/timer/mada_timer.h index 57db9d9..74cb2fd 100644 --- a/driver/mada/timer/mada_timer.h +++ b/driver/mada/timer/mada_timer.h @@ -14,7 +14,6 @@ register map 0x18: capture and control 2 (rw) 0x1c: capture and control 3 (rw) */ - typedef struct { uint32_t MD_TM_CSR; uint32_t MD_TM_COUNTER_VAL; diff --git a/examples/pmu-tests/CMakeLists.txt b/examples/pmu-tests/CMakeLists.txt index fcc219e..3795cee 100644 --- a/examples/pmu-tests/CMakeLists.txt +++ b/examples/pmu-tests/CMakeLists.txt @@ -14,6 +14,7 @@ foreach(test ${TESTS}) add_executable(${test} src/${test}.c) target_link_libraries(${test} PUBLIC l_trace_encoder + PUBLIC mada_timer PRIVATE -L${CMAKE_BINARY_DIR}/glossy -Wl,--whole-archive glossy -Wl,--no-whole-archive PUBLIC m ) diff --git a/examples/pmu-tests/src/foc-driver.c b/examples/pmu-tests/src/foc-driver.c new file mode 100644 index 0000000..4665274 --- /dev/null +++ b/examples/pmu-tests/src/foc-driver.c @@ -0,0 +1,82 @@ +#include +#include "riscv.h" +#include "riscv_encoding.h" +#include "l_trace_encoder.h" +#include "math.h" +#include "mada_timer.h" + +#define NUM_ITERS 100 // loop iterations +#define NUM_SAMPLES 60 // 0 to 2pi, 60 steps + +#define BUS_VOLTAGE 3.3f // 3V3 +#define SOC_FREQ 25 // 25 MHz +#define TIMER_PRESCALER 1000 // x cycle is 1 timer tick +#define TIMER_AUTORELOAD 1000 // x timer ticks up + x timer ticks down = 1 pwm cycle + +void FOC_invParkTransform(float *v_alpha, float *v_beta, float v_q, float v_d, float sin_theta, float cos_theta) { + *v_alpha = -(sin_theta * v_q) + (cos_theta * v_d); + *v_beta = (cos_theta * v_q) + (sin_theta * v_d); +} + +void FOC_invClarkSVPWM(float *v_a, float *v_b, float *v_c, float v_alpha, float v_beta) { + float v_a_phase = v_alpha; + float v_b_phase = (-.5f * v_alpha) + ((sqrtf(3.f)/2.f) * v_beta); + float v_c_phase = (-.5f * v_alpha) - ((sqrtf(3.f)/2.f) * v_beta); + + float v_neutral = .5f * (fmaxf(fmaxf(v_a_phase, v_b_phase), v_c_phase) + fminf(fminf(v_a_phase, v_b_phase), v_c_phase)); + + *v_a = v_a_phase - v_neutral; + *v_b = v_b_phase - v_neutral; + *v_c = v_c_phase - v_neutral; +} + +void FOC_pwmUpdate(MadaTimerType *timer, float *v_a, float *v_b, float *v_c) { + // convert v to ccr values to set pwm + uint32_t ccr0 = (uint32_t)(BUS_VOLTAGE * v_a * TIMER_AUTORELOAD / 2.f); + uint32_t ccr1 = (uint32_t)(BUS_VOLTAGE * v_b * TIMER_AUTORELOAD / 2.f); + uint32_t ccr2 = (uint32_t)(BUS_VOLTAGE * v_c * TIMER_AUTORELOAD / 2.f); + mada_timer_pwm_set(timer, ccr0, ccr1, ccr2); +} + +void FOC_update(float vq, float vd) { + float v_alpha, v_beta; + float v_a, v_b, v_c; + FOC_invParkTransform(&v_alpha, &v_beta, vq, vd, sin(vq), cos(vq)); + FOC_invClarkSVPWM(&v_a, &v_b, &v_c, v_alpha, v_beta); + FOC_pwmUpdate(MADA_ENCODER0, v_a, v_b, v_c); +} + + +int main(int argc, char **argv) { + LTraceEncoderType *encoder = l_trace_encoder_get(get_hart_id()); + l_trace_encoder_configure_target(encoder, TARGET_PRINT); + l_trace_encoder_start(encoder); + + for (int i = 0; i < 10; i++) { + __asm__("nop"); + } + + // configure timer + mada_timer_configure(MADA_ENCODER0, TIMER_AUTORELOAD, TIMER_PRESCALER); + + // warmup + // float v_a = 0, v_b = 0, v_c = 0; + // FOC_update(&v_a, &v_b, &v_c, 0, 0); + for (int iter = 0; iter < NUM_ITERS; iter++) { + for (int i = 0; i < NUM_SAMPLES; i++) { + float vq = 2 * M_PI * i / NUM_SAMPLES; + float vd = 0; + float v_alpha, v_beta, v_a, v_b, v_c; + for (int j = 0; j < 2; j++) { + FOC_update(&v_a, &v_b, &v_c, vq, vd); + } + } + } + + l_trace_encoder_stop(encoder); + + // spin for a bit, to make sure the trace buffer is flushed + for (int i = 0; i < 10; i++) { + __asm__("nop"); + } +} diff --git a/examples/pmu-tests/src/vpp-test.c b/examples/pmu-tests/src/vpp-test.c index afc766f..f1424f0 100644 --- a/examples/pmu-tests/src/vpp-test.c +++ b/examples/pmu-tests/src/vpp-test.c @@ -1,106 +1,61 @@ #include -#include #include "riscv.h" +#include "riscv_encoding.h" #include "l_trace_encoder.h" +#include "math.h" +#include "mada_timer.h" -#define ARRAY_SIZE 4096 -#define CHUNK_SIZE 64 -#define NUM_ITERATIONS 10 +#define NUM_ITERS 60 // 0 to 2pi, 60 steps -typedef struct { - uint32_t value; - uint32_t next_index; -} Node; +void FOC_invParkTransform(float *v_alpha, float *v_beta, float v_q, float v_d, float sin_theta, float cos_theta) { + *v_alpha = -(sin_theta * v_q) + (cos_theta * v_d); + *v_beta = (cos_theta * v_q) + (sin_theta * v_d); +} -static volatile Node array[ARRAY_SIZE]; +void FOC_invClarkSVPWM(float *v_a, float *v_b, float *v_c, float v_alpha, float v_beta) { + float v_a_phase = v_alpha; + float v_b_phase = (-.5f * v_alpha) + ((sqrtf(3.f)/2.f) * v_beta); + float v_c_phase = (-.5f * v_alpha) - ((sqrtf(3.f)/2.f) * v_beta); -// Fisher-Yates shuffle to randomize array access pattern -static void shuffle_indices(void) { - for (int i = ARRAY_SIZE - 1; i > 0; i--) { - // Use a simple PRNG for reproducibility - uint32_t j = (i * 1103515245 + 12345) % (i + 1); - array[i].next_index ^= array[j].next_index; - array[j].next_index ^= array[i].next_index; - array[i].next_index ^= array[j].next_index; - } -} + float v_neutral = .5f * (fmaxf(fmaxf(v_a_phase, v_b_phase), v_c_phase) + fminf(fminf(v_a_phase, v_b_phase), v_c_phase)); -// Sort a chunk using different algorithms based on data properties -static void sort_chunk(uint32_t start_idx, uint32_t size) { - // Count number of unique values to determine algorithm - uint32_t unique = 0; - uint32_t prev = 0; - - for (uint32_t i = 0; i < size; i++) { - uint32_t curr_idx = start_idx; - for (uint32_t j = 0; j < i; j++) { - curr_idx = array[curr_idx].next_index; - } - if (array[curr_idx].value != prev) { - unique++; - prev = array[curr_idx].value; - } - } + *v_a = v_a_phase - v_neutral; + *v_b = v_b_phase - v_neutral; + *v_c = v_c_phase - v_neutral; +} - // Choose algorithm based on data properties - if (unique < size/4) { - // Counting sort for low uniqueness - uint32_t max = 0; - uint32_t curr_idx = start_idx; - for (uint32_t i = 0; i < size; i++) { - if (array[curr_idx].value > max) max = array[curr_idx].value; - curr_idx = array[curr_idx].next_index; - } - - uint32_t counts[256] = {0}; // Limited range for simplicity - curr_idx = start_idx; - for (uint32_t i = 0; i < size; i++) { - counts[array[curr_idx].value % 256]++; - curr_idx = array[curr_idx].next_index; - } - } else { - // Insertion sort for high uniqueness - for (uint32_t i = 1; i < size; i++) { - uint32_t curr_idx = start_idx; - for (uint32_t j = 0; j < i; j++) { - curr_idx = array[curr_idx].next_index; - } - uint32_t key = array[curr_idx].value; - - uint32_t j = i - 1; - uint32_t prev_idx = start_idx; - while (j >= 0 && array[prev_idx].value > key) { - uint32_t next_idx = array[prev_idx].next_index; - array[next_idx].value = array[prev_idx].value; - j--; - prev_idx = array[prev_idx].next_index; - } - array[curr_idx].value = key; - } - } +void FOC_update(float *v_a, float *v_b, float *v_c, float vq, float vd) { + float v_alpha, v_beta; + FOC_invParkTransform(&v_alpha, &v_beta, vq, vd, sin(vq), cos(vq)); + FOC_invClarkSVPWM(v_a, v_b, v_c, v_alpha, v_beta); } -int main() { - // Initialize with pseudo-random data and linked indices - for (uint32_t i = 0; i < ARRAY_SIZE; i++) { - array[i].value = (i * 1103515245 + 12345) & 0xFFFFFFFF; - array[i].next_index = (i + 1) % ARRAY_SIZE; +int main(int argc, char **argv) { + LTraceEncoderType *encoder = l_trace_encoder_get(get_hart_id()); + l_trace_encoder_configure_target(encoder, TARGET_PRINT); + l_trace_encoder_start(encoder); + + for (int i = 0; i < 10; i++) { + __asm__("nop"); + } + + // warmup + // float v_a = 0, v_b = 0, v_c = 0; + // FOC_update(&v_a, &v_b, &v_c, 0, 0); + + for (int i = 0; i < NUM_ITERS; i++) { + float vq = 2 * M_PI * i / NUM_ITERS; + float vd = 0; + float v_alpha, v_beta, v_a, v_b, v_c; + for (int j = 0; j < 2; j++) { + FOC_update(&v_a, &v_b, &v_c, vq, vd); } + } - LTraceEncoderType *encoder = l_trace_encoder_get(get_hart_id()); - l_trace_encoder_configure_target(encoder, TARGET_PRINT); - l_trace_encoder_start(encoder); - - for (int iter = 0; iter < NUM_ITERATIONS; iter++) { - // Randomize access pattern - shuffle_indices(); - - // Sort chunks in different orders - for (uint32_t i = 0; i < ARRAY_SIZE; i += CHUNK_SIZE) { - sort_chunk(i, CHUNK_SIZE); - } - } + l_trace_encoder_stop(encoder); - l_trace_encoder_stop(encoder); - return 0; + // spin for a bit, to make sure the trace buffer is flushed + for (int i = 0; i < 10; i++) { + __asm__("nop"); + } }