diff --git a/CMakeLists.txt b/CMakeLists.txt index 3ed5422..6f570c0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,7 +34,9 @@ option(CHIP "Build for a specific platform" OF option(PROF_COV "Build with profiling and coverage" OFF ) option(GCNO_ONLY "Only build gcno files" OFF ) option(USE_PGO "Build with profile guided optimization" OFF ) - +option(OPT_INFO "Build with optimization information" OFF ) +# Define the variable with a default empty value +set(AFDO_PATH "" CACHE STRING "Path to autofdo profile data") ################################# # Toolchain Targets ################################# @@ -74,8 +76,9 @@ endif() add_subdirectory(${CMAKE_SOURCE_DIR}/platform/${CHIP}) -add_compile_options(-O1) +add_compile_options(-O0) add_compile_options(-Wall -Wextra) +add_compile_options(-Wno-error=coverage-mismatch) if (PROF_COV) message(STATUS "Building with profiling and coverage") @@ -90,7 +93,16 @@ endif() if (USE_PGO) add_compile_options(-fprofile-use) endif() -add_compile_options(-fno-builtin) + +if (OPT_INFO) + add_compile_options(-fopt-info) +endif() + +# Check if the variable is not empty and add compiler options accordingly +if(NOT "${AFDO_PATH}" STREQUAL "") + message(STATUS "Using AutoFDO profile from: ${AFDO_PATH}") + add_compile_options(-fauto-profile=${AFDO_PATH}) +endif() # add_compile_options(-ffunction-sections -fdata-sections -fno-common -fno-builtin-printf -fno-pie) # add_compile_options(-Wall -Wextra -Warray-bounds -Wno-unused-parameter -Wcast-qual) @@ -110,9 +122,6 @@ if (PROF_COV) add_link_options(-lgcov) endif() - - - ################################# # Build ################################# @@ -127,7 +136,6 @@ target_include_directories(app PUBLIC app/include) include_directories(/scratch/iansseijelly/chipyard/.conda-env/riscv-tools/riscv64-unknown-elf/include) link_directories(/scratch/iansseijelly/chipyard/.conda-env/riscv-tools/riscv64-unknown-elf/lib) - ################################# # Dependencies ################################# diff --git a/driver/mada/timer/CMakeLists.txt b/driver/mada/timer/CMakeLists.txt new file mode 100644 index 0000000..123ab8e --- /dev/null +++ b/driver/mada/timer/CMakeLists.txt @@ -0,0 +1,7 @@ + +add_library(mada_timer STATIC mada_timer.c) + +target_include_directories(mada_timer PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) + +target_link_libraries(mada_timer PUBLIC rocketcore) +target_link_libraries(mada_timer PUBLIC metal) diff --git a/driver/mada/timer/mada_timer.c b/driver/mada/timer/mada_timer.c new file mode 100644 index 0000000..91e028e --- /dev/null +++ b/driver/mada/timer/mada_timer.c @@ -0,0 +1 @@ +#include "mada_timer.h" \ No newline at end of file diff --git a/driver/mada/timer/mada_timer.h b/driver/mada/timer/mada_timer.h new file mode 100644 index 0000000..74cb2fd --- /dev/null +++ b/driver/mada/timer/mada_timer.h @@ -0,0 +1,53 @@ +#ifndef __MADA_TIMER_H +#define __MADA_TIMER_H + +#include "metal.h" + +/* +register map +0x00: control and status +0x04: counter value (r) +0x08: auto-reload value (rw) +0x0c: prescaler value (rw) +0x10: capture and control 0 (rw) +0x14: capture and control 1 (rw) +0x18: capture and control 2 (rw) +0x1c: capture and control 3 (rw) +*/ +typedef struct { + uint32_t MD_TM_CSR; + uint32_t MD_TM_COUNTER_VAL; + uint32_t MD_TM_AUTO_RELAOD; + uint32_t MD_TM_PRESCALER; + uint32_t MD_TM_CCR0; + uint32_t MD_TM_CCR1; + uint32_t MD_TM_CCR2; +} MadaTimerType; + +#define MADA_ADDR 0x11000000 +#define MADA_ENCODER0 ((MadaTimerType*)MADA_ADDR) + +static inline void mada_timer_enable(MadaTimerType *timer){ + timer->MD_TM_CSR = 0x1; +} + +static inline void mada_timer_disable(MadaTimerType *timer){ + timer->MD_TM_CSR = 0x0; +} + +static inline uint32_t mada_timer_read_raw_counter(MadaTimerType *timer){ + return timer->MD_TM_COUNTER_VAL; +} + +static inline void mada_timer_configure(MadaTimerType* timer, uint32_t auto_reload, uint32_t prescaler){ + timer->MD_TM_AUTO_RELAOD = auto_reload; + timer->MD_TM_PRESCALER = prescaler; +} + +static inline void mada_timer_pwm_set(MadaTimerType* timer, uint32_t ccr0, uint32_t ccr1, uint32_t ccr2){ + timer->MD_TM_CCR0 = ccr0; + timer->MD_TM_CCR1 = ccr1; + timer->MD_TM_CCR2 = ccr2; +} + +#endif diff --git a/driver/rocket-chip/l_trace_encoder/l_trace_encoder.c b/driver/rocket-chip/l_trace_encoder/l_trace_encoder.c index 6791cd3..8dc2294 100644 --- a/driver/rocket-chip/l_trace_encoder/l_trace_encoder.c +++ b/driver/rocket-chip/l_trace_encoder/l_trace_encoder.c @@ -18,15 +18,14 @@ void l_trace_sink_dma_read(LTraceSinkDmaType *sink_dma, uint8_t *buffer) { printf("\n"); } -void l_trace_encoder_start(LTraceEncoderType *encoder) { - SET_BITS(encoder->TR_TE_CTRL, 0x1 << 1); -} - void l_trace_encoder_configure_target(LTraceEncoderType *encoder, uint64_t target) { encoder->TR_TE_TARGET = target; } -void l_trace_encoder_stop(LTraceEncoderType *encoder) { - CLEAR_BITS(encoder->TR_TE_CTRL, 0x1 << 1); +void l_trace_encoder_configure_hpm_counter_en(LTraceEncoderType *encoder, uint32_t hpm_counter) { + encoder->TR_TE_HPM_COUNTER = hpm_counter; } +void l_trace_encoder_configure_hpm_counter_time(LTraceEncoderType *encoder, uint32_t time) { + encoder->TR_TE_HPM_TIME = time; +} diff --git a/driver/rocket-chip/l_trace_encoder/l_trace_encoder.h b/driver/rocket-chip/l_trace_encoder/l_trace_encoder.h index 98d0783..f4a07f4 100644 --- a/driver/rocket-chip/l_trace_encoder/l_trace_encoder.h +++ b/driver/rocket-chip/l_trace_encoder/l_trace_encoder.h @@ -9,6 +9,8 @@ typedef struct { uint32_t TR_TE_CTRL; uint32_t TR_TE_TARGET; + uint32_t TR_TE_HPM_COUNTER; + uint32_t TR_TE_HPM_TIME; } LTraceEncoderType; typedef struct { @@ -41,9 +43,17 @@ static inline LTraceSinkDmaType *l_trace_sink_dma_get(uint32_t hart_id) { return (LTraceSinkDmaType *)(L_TRACE_SINK_DMA_BASE_ADDRESS + hart_id * 0x1000); } -void l_trace_encoder_start(LTraceEncoderType *encoder); -void l_trace_encoder_stop(LTraceEncoderType *encoder); +static inline void l_trace_encoder_start(LTraceEncoderType *encoder) { + SET_BITS(encoder->TR_TE_CTRL, 0x1 << 1); +} + +static inline void l_trace_encoder_stop(LTraceEncoderType *encoder) { + CLEAR_BITS(encoder->TR_TE_CTRL, 0x1 << 1); +} + void l_trace_encoder_configure_target(LTraceEncoderType *encoder, uint64_t target); void l_trace_sink_dma_configure_addr(LTraceSinkDmaType *sink_dma, uint64_t dma_addr); void l_trace_sink_dma_read(LTraceSinkDmaType *sink_dma, uint8_t *buffer); +void l_trace_encoder_configure_hpm_counter_en(LTraceEncoderType *encoder, uint32_t hpm_counter); +void l_trace_encoder_configure_hpm_counter_time(LTraceEncoderType *encoder, uint32_t time); #endif /* __L_TRACE_ENCODER_H */ diff --git a/examples/embench/CMakeLists.txt b/examples/embench/CMakeLists.txt index ebc37ad..1b33af2 100644 --- a/examples/embench/CMakeLists.txt +++ b/examples/embench/CMakeLists.txt @@ -12,6 +12,11 @@ if (EMBENCH_ENABLE_TRACE_DMA) add_definitions(-DUSE_L_TRACE_DMA) endif() +if (EMBENCH_ENABLE_LBR) + add_definitions(-DTIMER_INTERRUPT) + add_definitions(-DUSE_LBR) +endif() + set(BENCHMARKS dummy wikisort diff --git a/examples/embench/common/inc/trigger.h b/examples/embench/common/inc/trigger.h index ee58ff9..5434600 100644 --- a/examples/embench/common/inc/trigger.h +++ b/examples/embench/common/inc/trigger.h @@ -24,7 +24,7 @@ /* timer interrupt interval in milliseconds only used when TIMER_INTERRUPT is defined */ -#define TIMER_INTERRUPT_INTERVAL 100 +#define TIMER_INTERRUPT_INTERVAL 25 #ifdef USE_L_TRACE_DMA static uint8_t dma_buffer[512 * 1024]; @@ -96,19 +96,20 @@ void machine_timer_interrupt_callback() { #endif static inline void stop_trigger(void) { + #ifdef REPORT_TOTAL_TIME + int64_t curr_time = clint_get_time(CLINT); + printf("stop trigger at %lld\n", curr_time); + #endif + #ifdef USE_LBR lbr_dump_records(); #endif + #ifdef USE_L_TRACE LTraceEncoderType *encoder = l_trace_encoder_get(get_hart_id()); l_trace_encoder_stop(encoder); #endif - #ifdef REPORT_TOTAL_TIME - int64_t curr_time = clint_get_time(CLINT); - printf("stop trigger at %lld\n", curr_time); - #endif - #ifdef USE_L_TRACE_DMA LTraceSinkDmaType *sink_dma = l_trace_sink_dma_get(get_hart_id()); l_trace_sink_dma_read(sink_dma, dma_buffer); diff --git a/examples/pmu-tests/CMakeLists.txt b/examples/pmu-tests/CMakeLists.txt index 2404d0d..3795cee 100644 --- a/examples/pmu-tests/CMakeLists.txt +++ b/examples/pmu-tests/CMakeLists.txt @@ -4,6 +4,9 @@ set (TESTS pmu-test-inhibit lbr-test ltrace-dma-test + ltrace-hpm-test + speedscope-test + vpp-test sort ) @@ -11,6 +14,8 @@ foreach(test ${TESTS}) add_executable(${test} src/${test}.c) target_link_libraries(${test} PUBLIC l_trace_encoder + PUBLIC mada_timer PRIVATE -L${CMAKE_BINARY_DIR}/glossy -Wl,--whole-archive glossy -Wl,--no-whole-archive + PUBLIC m ) endforeach() diff --git a/examples/pmu-tests/src/foc-driver.c b/examples/pmu-tests/src/foc-driver.c new file mode 100644 index 0000000..4665274 --- /dev/null +++ b/examples/pmu-tests/src/foc-driver.c @@ -0,0 +1,82 @@ +#include +#include "riscv.h" +#include "riscv_encoding.h" +#include "l_trace_encoder.h" +#include "math.h" +#include "mada_timer.h" + +#define NUM_ITERS 100 // loop iterations +#define NUM_SAMPLES 60 // 0 to 2pi, 60 steps + +#define BUS_VOLTAGE 3.3f // 3V3 +#define SOC_FREQ 25 // 25 MHz +#define TIMER_PRESCALER 1000 // x cycle is 1 timer tick +#define TIMER_AUTORELOAD 1000 // x timer ticks up + x timer ticks down = 1 pwm cycle + +void FOC_invParkTransform(float *v_alpha, float *v_beta, float v_q, float v_d, float sin_theta, float cos_theta) { + *v_alpha = -(sin_theta * v_q) + (cos_theta * v_d); + *v_beta = (cos_theta * v_q) + (sin_theta * v_d); +} + +void FOC_invClarkSVPWM(float *v_a, float *v_b, float *v_c, float v_alpha, float v_beta) { + float v_a_phase = v_alpha; + float v_b_phase = (-.5f * v_alpha) + ((sqrtf(3.f)/2.f) * v_beta); + float v_c_phase = (-.5f * v_alpha) - ((sqrtf(3.f)/2.f) * v_beta); + + float v_neutral = .5f * (fmaxf(fmaxf(v_a_phase, v_b_phase), v_c_phase) + fminf(fminf(v_a_phase, v_b_phase), v_c_phase)); + + *v_a = v_a_phase - v_neutral; + *v_b = v_b_phase - v_neutral; + *v_c = v_c_phase - v_neutral; +} + +void FOC_pwmUpdate(MadaTimerType *timer, float *v_a, float *v_b, float *v_c) { + // convert v to ccr values to set pwm + uint32_t ccr0 = (uint32_t)(BUS_VOLTAGE * v_a * TIMER_AUTORELOAD / 2.f); + uint32_t ccr1 = (uint32_t)(BUS_VOLTAGE * v_b * TIMER_AUTORELOAD / 2.f); + uint32_t ccr2 = (uint32_t)(BUS_VOLTAGE * v_c * TIMER_AUTORELOAD / 2.f); + mada_timer_pwm_set(timer, ccr0, ccr1, ccr2); +} + +void FOC_update(float vq, float vd) { + float v_alpha, v_beta; + float v_a, v_b, v_c; + FOC_invParkTransform(&v_alpha, &v_beta, vq, vd, sin(vq), cos(vq)); + FOC_invClarkSVPWM(&v_a, &v_b, &v_c, v_alpha, v_beta); + FOC_pwmUpdate(MADA_ENCODER0, v_a, v_b, v_c); +} + + +int main(int argc, char **argv) { + LTraceEncoderType *encoder = l_trace_encoder_get(get_hart_id()); + l_trace_encoder_configure_target(encoder, TARGET_PRINT); + l_trace_encoder_start(encoder); + + for (int i = 0; i < 10; i++) { + __asm__("nop"); + } + + // configure timer + mada_timer_configure(MADA_ENCODER0, TIMER_AUTORELOAD, TIMER_PRESCALER); + + // warmup + // float v_a = 0, v_b = 0, v_c = 0; + // FOC_update(&v_a, &v_b, &v_c, 0, 0); + for (int iter = 0; iter < NUM_ITERS; iter++) { + for (int i = 0; i < NUM_SAMPLES; i++) { + float vq = 2 * M_PI * i / NUM_SAMPLES; + float vd = 0; + float v_alpha, v_beta, v_a, v_b, v_c; + for (int j = 0; j < 2; j++) { + FOC_update(&v_a, &v_b, &v_c, vq, vd); + } + } + } + + l_trace_encoder_stop(encoder); + + // spin for a bit, to make sure the trace buffer is flushed + for (int i = 0; i < 10; i++) { + __asm__("nop"); + } +} diff --git a/examples/pmu-tests/src/ltrace-hpm-test.c b/examples/pmu-tests/src/ltrace-hpm-test.c new file mode 100644 index 0000000..c703f0f --- /dev/null +++ b/examples/pmu-tests/src/ltrace-hpm-test.c @@ -0,0 +1,40 @@ +#include +#include "riscv.h" +#include "pmu.h" +#include "riscv_encoding.h" +#include "l_trace_encoder.h" + +#define NUM_ITERS 100 + +// #define USE_L_TRACE_DMA + +static volatile uint32_t array[NUM_ITERS]; + +int main(int argc, char **argv) { + // enable counter enable + PMU_COUNTER_ENABLE(3); + // configure event3 and reset counter3 + printf("---configure event3---\n"); + PMU_EVENT_ENABLE(PMU_EVENT(0, LOAD), 3); + LTraceEncoderType *encoder = l_trace_encoder_get(get_hart_id()); + l_trace_encoder_configure_target(encoder, TARGET_PRINT); + l_trace_encoder_configure_hpm_counter_en(encoder, 1 << 3); + l_trace_encoder_configure_hpm_counter_time(encoder, 1000); // report every 1000 cycles + PMU_COUNTER_RESET(3); + l_trace_encoder_start(encoder); + // do some dummy loop with load + for (int i = 0; i < NUM_ITERS; i++) { + array[i] = i; + } + // do some different loops + for (int i = 0; i < NUM_ITERS; i++) { + array[i] -= i; + } + l_trace_encoder_stop(encoder); + PMU_INHIBIT_ENABLE(3); + + // read counter + uint32_t counter = PMU_COUNTER_READ(3); + printf("counter: %d\n", counter); + +} diff --git a/examples/pmu-tests/src/speedscope-test.c b/examples/pmu-tests/src/speedscope-test.c new file mode 100644 index 0000000..ab8d5b7 --- /dev/null +++ b/examples/pmu-tests/src/speedscope-test.c @@ -0,0 +1,45 @@ +#include +#include "riscv.h" +#include "riscv_encoding.h" +#include "l_trace_encoder.h" +#include "math.h" + +#define NUM_ITERS 10 // 0 to 2pi, 10 steps + +int add_kernel(int a, int b) { + if (b == 0) { + return a; + } + return add_kernel(a, b - 1) + 1; +} + +int workload(int a, int b) { + return add_kernel(a, b); +} + +int main(int argc, char **argv) { + LTraceEncoderType *encoder = l_trace_encoder_get(get_hart_id()); + l_trace_encoder_configure_target(encoder, TARGET_PRINT); + l_trace_encoder_start(encoder); + + for (int i = 0; i < 10; i++) { + __asm__("nop"); + } + + for (int i = 0; i < 10; i++) { + volatile int x = workload(1, i); + } + + for (int i = 0; i < 10; i++) { + volatile float x = sin(i); + } + + printf("Hello, world! %d\n", 0); + + l_trace_encoder_stop(encoder); + + // spin for a bit, to make sure the trace buffer is flushed + for (int i = 0; i < 10; i++) { + __asm__("nop"); + } +} diff --git a/examples/pmu-tests/src/vpp-test.c b/examples/pmu-tests/src/vpp-test.c new file mode 100644 index 0000000..f1424f0 --- /dev/null +++ b/examples/pmu-tests/src/vpp-test.c @@ -0,0 +1,61 @@ +#include +#include "riscv.h" +#include "riscv_encoding.h" +#include "l_trace_encoder.h" +#include "math.h" +#include "mada_timer.h" + +#define NUM_ITERS 60 // 0 to 2pi, 60 steps + +void FOC_invParkTransform(float *v_alpha, float *v_beta, float v_q, float v_d, float sin_theta, float cos_theta) { + *v_alpha = -(sin_theta * v_q) + (cos_theta * v_d); + *v_beta = (cos_theta * v_q) + (sin_theta * v_d); +} + +void FOC_invClarkSVPWM(float *v_a, float *v_b, float *v_c, float v_alpha, float v_beta) { + float v_a_phase = v_alpha; + float v_b_phase = (-.5f * v_alpha) + ((sqrtf(3.f)/2.f) * v_beta); + float v_c_phase = (-.5f * v_alpha) - ((sqrtf(3.f)/2.f) * v_beta); + + float v_neutral = .5f * (fmaxf(fmaxf(v_a_phase, v_b_phase), v_c_phase) + fminf(fminf(v_a_phase, v_b_phase), v_c_phase)); + + *v_a = v_a_phase - v_neutral; + *v_b = v_b_phase - v_neutral; + *v_c = v_c_phase - v_neutral; +} + +void FOC_update(float *v_a, float *v_b, float *v_c, float vq, float vd) { + float v_alpha, v_beta; + FOC_invParkTransform(&v_alpha, &v_beta, vq, vd, sin(vq), cos(vq)); + FOC_invClarkSVPWM(v_a, v_b, v_c, v_alpha, v_beta); +} + +int main(int argc, char **argv) { + LTraceEncoderType *encoder = l_trace_encoder_get(get_hart_id()); + l_trace_encoder_configure_target(encoder, TARGET_PRINT); + l_trace_encoder_start(encoder); + + for (int i = 0; i < 10; i++) { + __asm__("nop"); + } + + // warmup + // float v_a = 0, v_b = 0, v_c = 0; + // FOC_update(&v_a, &v_b, &v_c, 0, 0); + + for (int i = 0; i < NUM_ITERS; i++) { + float vq = 2 * M_PI * i / NUM_ITERS; + float vd = 0; + float v_alpha, v_beta, v_a, v_b, v_c; + for (int j = 0; j < 2; j++) { + FOC_update(&v_a, &v_b, &v_c, vq, vd); + } + } + + l_trace_encoder_stop(encoder); + + // spin for a bit, to make sure the trace buffer is flushed + for (int i = 0; i < 10; i++) { + __asm__("nop"); + } +} diff --git a/scripts/gcov/test.sh b/scripts/gcov/test.sh deleted file mode 100755 index d985e39..0000000 --- a/scripts/gcov/test.sh +++ /dev/null @@ -1,17 +0,0 @@ -# must be run from the root directory -# generate the baseline -cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D PROF_COV=OFF -D USE_PGO=OFF -cmake --build ./build/ --target nettle-sha256 -spike build/examples/embench/nettle-sha256.elf > build/test.txt -# generate the coverage profile -cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D PROF_COV=ON -D USE_PGO=OFF -cmake --build ./build/ --target nettle-sha256 -spike build/examples/embench/nettle-sha256.elf > build/test_cov.txt -# write the coverage files -python3 /scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/gcov/dump_gcda.py /scratch/iansseijelly/chipyard/software/baremetal-ide/build/test_cov.txt -# perform PGO -cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D PROF_COV=OFF -D USE_PGO=ON -cmake --build ./build/ --target nettle-sha256 -spike build/examples/embench/nettle-sha256.elf > build/test_after.txt -# diff -diff build/test.txt build/test_after.txt \ No newline at end of file diff --git a/scripts/test_all/test_all.sh b/scripts/test_all/test_all.sh new file mode 100755 index 0000000..b3f0519 --- /dev/null +++ b/scripts/test_all/test_all.sh @@ -0,0 +1,100 @@ +#!/bin/bash +set -e +# here should be where the script itself is +cd /scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/test_all +HERE=$(pwd) +mkdir -p $HERE/test +cd /scratch/iansseijelly/chipyard/software/baremetal-ide + +echo "--- Generating the vanilla baseline ---" +rm -rf build +# generate the vanilla baseline +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D PROF_COV=OFF -D USE_PGO=OFF +cmake --build ./build/ --target wikisort +pushd /scratch/iansseijelly/chipyard/sims/vcs +make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 +cp /scratch/iansseijelly/chipyard/sims/vcs/output/chipyard.harness.TestHarness.WithLTraceEncoderRocketConfig/wikisort.log $HERE/test/vanilla.log +popd + +echo "--- Generating the instrumented coverage profile ---" +rm -rf build +# generate the instrumented coverage profile +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D PROF_COV=ON -D USE_PGO=OFF +cmake --build ./build/ --target wikisort +spike build/examples/embench/wikisort.elf > build/test_cov.txt +# write the coverage files +python3 /scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/gcov/dump_gcda.py /scratch/iansseijelly/chipyard/software/baremetal-ide/build/test_cov.txt +pushd /scratch/iansseijelly/chipyard/sims/vcs +make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 +cp /scratch/iansseijelly/chipyard/sims/vcs/output/chipyard.harness.TestHarness.WithLTraceEncoderRocketConfig/wikisort.log $HERE/test/instrumented.log +popd +# perform PGO +echo "--- Performing PGO ---" +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D PROF_COV=OFF -D USE_PGO=ON +cmake --build ./build/ --target wikisort +pushd /scratch/iansseijelly/chipyard/sims/vcs +make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 +cp /scratch/iansseijelly/chipyard/sims/vcs/output/chipyard.harness.TestHarness.WithLTraceEncoderRocketConfig/wikisort.log $HERE/test/instrumentation_pgo.log +popd + +rm -rf build +# generate a trace +echo "--- Generating tacit trace ---" +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D GCNO_ONLY=ON -D USE_PGO=OFF -D EMBENCH_ENABLE_TRACE_PRINT=ON +cmake --build ./build/ --target wikisort +pushd /scratch/iansseijelly/chipyard/sims/vcs +make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 +cp /scratch/iansseijelly/chipyard/sims/vcs/output/chipyard.harness.TestHarness.WithLTraceEncoderRocketConfig/wikisort.log $HERE/test/trace.log +popd +pushd /scratch/iansseijelly/ltrace_decoder +cargo run -- --encoded-trace /scratch/iansseijelly/chipyard/sims/vcs/byte_printer.txt --binary /scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf --gcno /scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/CMakeFiles/wikisort_lib.dir/wikisort/libwikisort.c.gcno --to-gcda --to-afdo +# save the trace afdo file for later use +cp ./trace_afdo.txt $HERE/test/trace_afdo.txt +/scratch/iansseijelly/autofdo/build/create_gcov --binary /scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf --profile $HERE/test/trace_afdo.txt --profiler text --gcov_version=2 --gcov trace_afdo_fbdata.afdo +popd + +# must not remove build directory, as .gcda files are needed +# perform trace PGO +echo "--- Performing trace PGO ---" +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug PROF_COV=OFF -D USE_PGO=ON -D EMBENCH_ENABLE_TRACE_PRINT=ON -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake +cmake --build ./build/ --target wikisort +pushd /scratch/iansseijelly/chipyard/sims/vcs +make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 +cp /scratch/iansseijelly/chipyard/sims/vcs/output/chipyard.harness.TestHarness.WithLTraceEncoderRocketConfig/wikisort.log $HERE/test/trace-pgo.log +popd + +rm -rf build +# perform LBR samping +echo "--- Performing LBR sampling ---" +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D EMBENCH_ENABLE_LBR=ON +cmake --build ./build/ --target wikisort +pushd /scratch/iansseijelly/chipyard/sims/vcs +make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 +cp /scratch/iansseijelly/chipyard/sims/vcs/output/chipyard.harness.TestHarness.WithLTraceEncoderRocketConfig/wikisort.log $HERE/test/sample-lbr-vanilla.log +popd +# post-process the sampled LBR record +pushd /scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/test_all/test +python3 /scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/perf/dump_lbr.py /scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/test_all/test/sample-lbr-vanilla.log +mv ./lbr_branch.txt $HERE/test/lbr_sampling_branch.txt +/scratch/iansseijelly/autofdo/build/create_gcov --binary /scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf --profile $HERE/test/lbr_sampling_branch.txt --profiler text --gcov_version=2 +popd + +# perform LBR AutoFDO +echo "--- Performing LBR AutoFDO ---" +rm -rf build +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D EMBENCH_ENABLE_LBR=OFF -D AFDO_PATH=/scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/test_all/test/fbdata.afdo +cmake --build ./build/ --target wikisort +pushd /scratch/iansseijelly/chipyard/sims/vcs +make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 +cp /scratch/iansseijelly/chipyard/sims/vcs/output/chipyard.harness.TestHarness.WithLTraceEncoderRocketConfig/wikisort.log $HERE/test/sample-lbr-afdo.log +popd + +# perform trace LBR AutoFDO +echo "--- Performing trace LBR AutoFDO ---" +rm -rf build +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D EMBENCH_ENABLE_LBR=OFF -D AFDO_PATH=/scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/test_all/test/trace_afdo_fbdata.afdo +cmake --build ./build/ --target wikisort +pushd /scratch/iansseijelly/chipyard/sims/vcs +make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 +cp /scratch/iansseijelly/chipyard/sims/vcs/output/chipyard.harness.TestHarness.WithLTraceEncoderRocketConfig/wikisort.log $HERE/test/trace-lbr-afdo.log +popd \ No newline at end of file diff --git a/scripts/test_all/test_instrumentation.sh b/scripts/test_all/test_instrumentation.sh new file mode 100755 index 0000000..67ca4ad --- /dev/null +++ b/scripts/test_all/test_instrumentation.sh @@ -0,0 +1,61 @@ +#!/bin/bash +set -e +# here should be where the script itself is +cd /scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/test_all +HERE=$(pwd) +mkdir -p $HERE/test +cd /scratch/iansseijelly/chipyard/software/baremetal-ide + +echo "--- Generating the vanilla baseline ---" +rm -rf build +# generate the vanilla baseline +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D PROF_COV=OFF -D USE_PGO=OFF +cmake --build ./build/ --target wikisort +pushd /scratch/iansseijelly/chipyard/sims/vcs +make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 +cp /scratch/iansseijelly/chipyard/sims/vcs/output/chipyard.harness.TestHarness.WithLTraceEncoderRocketConfig/wikisort.log $HERE/test/vanilla.log +popd + +echo "--- Generating the instrumented coverage profile ---" +rm -rf build +# generate the instrumented coverage profile +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D PROF_COV=ON -D USE_PGO=OFF +cmake --build ./build/ --target wikisort +spike build/examples/embench/wikisort.elf > build/test_cov.txt +# write the coverage files +python3 /scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/gcov/dump_gcda.py /scratch/iansseijelly/chipyard/software/baremetal-ide/build/test_cov.txt +pushd /scratch/iansseijelly/chipyard/sims/vcs +make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 +cp /scratch/iansseijelly/chipyard/sims/vcs/output/chipyard.harness.TestHarness.WithLTraceEncoderRocketConfig/wikisort.log $HERE/test/instrumented.log +popd +# perform PGO +echo "--- Performing PGO ---" +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D PROF_COV=OFF -D USE_PGO=ON +cmake --build ./build/ --target wikisort +pushd /scratch/iansseijelly/chipyard/sims/vcs +make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 +cp /scratch/iansseijelly/chipyard/sims/vcs/output/chipyard.harness.TestHarness.WithLTraceEncoderRocketConfig/wikisort.log $HERE/test/instrumentation_pgo.log +popd + +rm -rf build +# generate a trace +echo "--- Generating tacit trace ---" +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D GCNO_ONLY=ON -D USE_PGO=OFF -D EMBENCH_ENABLE_TRACE_PRINT=ON +cmake --build ./build/ --target wikisort +pushd /scratch/iansseijelly/chipyard/sims/vcs +make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 +cp /scratch/iansseijelly/chipyard/sims/vcs/output/chipyard.harness.TestHarness.WithLTraceEncoderRocketConfig/wikisort.log $HERE/test/trace.log +popd +pushd /scratch/iansseijelly/ltrace_decoder +cargo run -- --encoded-trace /scratch/iansseijelly/chipyard/sims/vcs/byte_printer.txt --binary /scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf --gcno /scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/CMakeFiles/wikisort_lib.dir/wikisort/libwikisort.c.gcno --to-gcda --to-afdo +popd + +# must not remove build directory, as .gcda files are needed +# perform trace PGO +echo "--- Performing trace PGO ---" +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D PROF_COV=OFF -D GCNO_ONLY=OFF -D USE_PGO=ON -D EMBENCH_ENABLE_TRACE_PRINT=ON +cmake --build ./build/ --target wikisort +pushd /scratch/iansseijelly/chipyard/sims/vcs +make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 +cp /scratch/iansseijelly/chipyard/sims/vcs/output/chipyard.harness.TestHarness.WithLTraceEncoderRocketConfig/wikisort.log $HERE/test/trace-pgo.log +popd \ No newline at end of file diff --git a/scripts/test_all/test_sampling.sh b/scripts/test_all/test_sampling.sh new file mode 100755 index 0000000..91663a2 --- /dev/null +++ b/scripts/test_all/test_sampling.sh @@ -0,0 +1,58 @@ +#!/bin/bash +set -e +# here should be where the script itself is +cd /scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/test_all +HERE=$(pwd) +mkdir -p $HERE/test +cd /scratch/iansseijelly/chipyard/software/baremetal-ide + +rm -rf build +# perform LBR samping +echo "--- Performing LBR sampling ---" +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D EMBENCH_ENABLE_LBR=ON +cmake --build ./build/ --target wikisort +pushd /scratch/iansseijelly/chipyard/sims/vcs +make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 +cp /scratch/iansseijelly/chipyard/sims/vcs/output/chipyard.harness.TestHarness.WithLTraceEncoderRocketConfig/wikisort.log $HERE/test/sample-lbr-vanilla.log +popd +# post-process the sampled LBR record +pushd /scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/test_all/test +python3 /scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/perf/dump_lbr.py /scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/test_all/test/sample-lbr-vanilla.log +mv ./lbr_branch.txt $HERE/test/lbr_sampling_branch.txt +/scratch/iansseijelly/autofdo/build/create_gcov --binary /scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf --profile $HERE/test/lbr_sampling_branch.txt --profiler text --gcov_version=2 +popd + +# perform LBR AutoFDO +echo "--- Performing LBR AutoFDO ---" +rm -rf build +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D EMBENCH_ENABLE_LBR=OFF -D AFDO_PATH=/scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/test_all/test/fbdata.afdo +cmake --build ./build/ --target wikisort +pushd /scratch/iansseijelly/chipyard/sims/vcs +make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 +cp /scratch/iansseijelly/chipyard/sims/vcs/output/chipyard.harness.TestHarness.WithLTraceEncoderRocketConfig/wikisort.log $HERE/test/sample-lbr-afdo.log +popd + +rm -rf build +# generate a trace +echo "--- Generating tacit trace ---" +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D GCNO_ONLY=ON -D USE_PGO=OFF -D EMBENCH_ENABLE_TRACE_PRINT=ON +cmake --build ./build/ --target wikisort +pushd /scratch/iansseijelly/chipyard/sims/vcs +make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 +cp /scratch/iansseijelly/chipyard/sims/vcs/output/chipyard.harness.TestHarness.WithLTraceEncoderRocketConfig/wikisort.log $HERE/test/trace.log +popd +pushd /scratch/iansseijelly/ltrace_decoder +cargo run -- --encoded-trace /scratch/iansseijelly/chipyard/sims/vcs/byte_printer.txt --binary /scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf --gcno /scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/CMakeFiles/wikisort_lib.dir/wikisort/libwikisort.c.gcno --to-afdo +# save the trace afdo file for later use +cp ./trace_afdo.txt $HERE/test/trace_afdo.txt +popd +/scratch/iansseijelly/autofdo/build/create_gcov --binary /scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf --profile $HERE/test/trace_afdo.txt --profiler text --gcov_version=2 --gcov $HERE/test/trace_afdo_fbdata.afdo +# perform trace LBR AutoFDO +echo "--- Performing trace LBR AutoFDO ---" +rm -rf build +cmake -S ./ -B ./build/ -D CMAKE_BUILD_TYPE=Debug -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -D EMBENCH_ENABLE_LBR=OFF -D AFDO_PATH=/scratch/iansseijelly/chipyard/software/baremetal-ide/scripts/test_all/test/trace_afdo_fbdata.afdo +cmake --build ./build/ --target wikisort +pushd /scratch/iansseijelly/chipyard/sims/vcs +make run-binary CONFIG=WithLTraceEncoderRocketConfig BINARY=/scratch/iansseijelly/chipyard/software/baremetal-ide/build/examples/embench/wikisort.elf LOADMEM=1 +cp /scratch/iansseijelly/chipyard/sims/vcs/output/chipyard.harness.TestHarness.WithLTraceEncoderRocketConfig/wikisort.log $HERE/test/trace-lbr-afdo.log +popd \ No newline at end of file