From c1e137bf5b3eb7f7cc6eafb83f699134fe848c9c Mon Sep 17 00:00:00 2001 From: Adrian Lundell <36153706+AdrianLundell@users.noreply.github.com> Date: Thu, 19 Dec 2024 15:39:22 +0100 Subject: [PATCH] Add support for system_config/memory_mode in arm AOT flow (#7359) Add support for system_config/memory_mode in arm AOT flow - Passes new args to Vela compileSpec - Selects correct Timing adapter values depending on system_config --- backends/arm/test/setup_testing.sh | 13 +-- examples/arm/aot_arm_compiler.py | 46 ++++++++-- examples/arm/executor_runner/CMakeLists.txt | 93 +++++++++++++++++++-- examples/arm/run.sh | 32 ++++++- 4 files changed, 160 insertions(+), 24 deletions(-) diff --git a/backends/arm/test/setup_testing.sh b/backends/arm/test/setup_testing.sh index 5625ae212f..0562604dd9 100755 --- a/backends/arm/test/setup_testing.sh +++ b/backends/arm/test/setup_testing.sh @@ -20,8 +20,9 @@ fvp_model=FVP_Corstone_SSE-300_Ethos-U55 # Put in backends/arm/test/res to be used by unit tests. function build_semihosting_executorch_runner() { target_board=$1 + system_config=$2 build_test_dir=${build_root_test_dir}_${target_board} - echo "[${FUNCNAME[0]}] Configuring ${target_board}" + echo "[${FUNCNAME[0]}] Configuring ${target_board} with system config ${system_config}" if [[ ${target_board} == "corstone-300" ]]; then local target_cpu=cortex-m55 elif [[ ${target_board} == "corstone-320" ]]; then @@ -36,15 +37,14 @@ function build_semihosting_executorch_runner() { cmake -DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} \ -DCMAKE_BUILD_TYPE=RelWithDebInfo \ -DTARGET_CPU=${target_cpu} \ - -DTARGET_BOARD=${target_board} \ -DSEMIHOSTING=ON \ -DCMAKE_RUNTIME_OUTPUT_DIRECTORY=${build_test_dir} \ -B ${build_test_dir} \ -DETHOS_SDK_PATH:PATH=${ethos_u_root_dir} \ -DET_DIR_PATH:PATH=${et_root_dir} \ -DET_BUILD_DIR_PATH:PATH=${et_build_dir} \ - -DPYTHON_EXECUTABLE=$(which python3) - + -DPYTHON_EXECUTABLE=$(which python3) \ + -DSYSTEM_CONFIG=${system_config} echo "[${FUNCNAME[0]}] Configured CMAKE" n=$(nproc) @@ -53,6 +53,7 @@ function build_semihosting_executorch_runner() { find ${build_test_dir} -name "arm_executor_runner" } -build_semihosting_executorch_runner corstone-300 +# Use most optimal system_configs for testing +build_semihosting_executorch_runner corstone-300 Ethos_U55_High_End_Embedded -build_semihosting_executorch_runner corstone-320 +build_semihosting_executorch_runner corstone-320 Ethos_U85_SYS_DRAM_Mid diff --git a/examples/arm/aot_arm_compiler.py b/examples/arm/aot_arm_compiler.py index 6d899c2146..2de1e713c9 100644 --- a/examples/arm/aot_arm_compiler.py +++ b/examples/arm/aot_arm_compiler.py @@ -258,12 +258,14 @@ def get_compile_spec( target: str, intermediates: Optional[str] = None, reorder_inputs: Optional[str] = None, + system_config: Optional[str] = None, + memory_mode: Optional[str] = None, ) -> ArmCompileSpecBuilder: spec_builder = None if target == "TOSA": spec_builder = ( ArmCompileSpecBuilder() - .tosa_compile_spec("TOSA-0.80.0+BI") + .tosa_compile_spec("TOSA-0.80+BI") .set_permute_memory_format(True) ) elif "ethos-u55" in target: @@ -271,8 +273,8 @@ def get_compile_spec( ArmCompileSpecBuilder() .ethosu_compile_spec( target, - system_config="Ethos_U55_High_End_Embedded", - memory_mode="Shared_Sram", + system_config=system_config, + memory_mode=memory_mode, extra_flags="--debug-force-regor --output-format=raw --verbose-operators --verbose-cycle-estimate", ) .set_permute_memory_format(True) @@ -284,8 +286,8 @@ def get_compile_spec( ArmCompileSpecBuilder() .ethosu_compile_spec( target, - system_config="Ethos_U85_SYS_DRAM_Mid", - memory_mode="Shared_Sram", + system_config=system_config, + memory_mode=memory_mode, extra_flags="--output-format=raw --verbose-operators --verbose-cycle-estimate", ) .set_permute_memory_format(True) @@ -441,6 +443,18 @@ def get_args(): default=None, help="Provide the order of the inputs. This can be required when inputs > 1.", ) + parser.add_argument( + "--system_config", + required=False, + default=None, + help="System configuration to select from the Vela configuration file (see vela.ini). This option must match the selected target, default is for an optimal system 'Ethos_U55_High_End_Embedded'/'Ethos_U85_SYS_DRAM_High'", + ) + parser.add_argument( + "--memory_mode", + required=False, + default=None, + help="Memory mode to select from the Vela configuration file (see vela.ini). Default is 'Shared_Sram' for Ethos-U55 targets and 'Sram_Only' for Ethos-U85 targets", + ) args = parser.parse_args() if args.evaluate and ( @@ -471,6 +485,22 @@ def get_args(): ): raise RuntimeError(f"Model {args.model_name} cannot be delegated.") + if args.system_config is None: + if "u55" in args.target: + args.system_config = "Ethos_U55_High_End_Embedded" + elif "u85" in args.target: + args.system_confg = "Ethos_U85_SYS_DRAM_Mid" + else: + raise RuntimeError(f"Invalid target name {args.target}") + + if args.memory_mode is None: + if "u55" in args.target: + args.memory_mode = "Shared_Sram" + elif "u85" in args.target: + args.memory_mode = "Sram_Only" + else: + raise RuntimeError(f"Invalid target name {args.target}") + return args @@ -504,7 +534,11 @@ def get_args(): # As we can target multiple output encodings from ArmBackend, one must # be specified. compile_spec = get_compile_spec( - args.target, args.intermediates, args.reorder_inputs + args.target, + args.intermediates, + args.reorder_inputs, + args.system_config, + args.memory_mode, ) edge = to_edge_transform_and_lower( exported_program, diff --git a/examples/arm/executor_runner/CMakeLists.txt b/examples/arm/executor_runner/CMakeLists.txt index ea180f4d23..9b976fc2bb 100644 --- a/examples/arm/executor_runner/CMakeLists.txt +++ b/examples/arm/executor_runner/CMakeLists.txt @@ -18,8 +18,6 @@ if(NOT DEFINED ET_PTE_FILE_PATH AND NOT ${SEMIHOSTING}) ) endif() -set(TARGET_BOARD "corstone-300" CACHE STRING "Target board") - # Example ExecuTorch demo for bare metal Cortex-M based systems set(ET_DIR_PATH "../../.." @@ -46,6 +44,10 @@ set(PYTHON_EXECUTABLE CACHE PATH "Define to override python executable used" ) +# Selects timing adapter values matching system_config. +# Default is Ethos_U55_High_End_Embedded, simulating optimal hardware for the Corestone-300. +set(SYSTEM_CONFIG "Ethos_U55_High_End_Embedded" CACHE STRING "System config") + get_filename_component(ET_BUILD_DIR_PATH ${ET_BUILD_DIR_PATH} REALPATH) get_filename_component(ET_DIR_PATH ${ET_DIR_PATH} REALPATH) get_filename_component(ET_INCLUDE_PATH ${ET_INCLUDE_PATH} REALPATH) @@ -58,15 +60,14 @@ endif() # Corstone-300, that includes ethosu_core_driver and bare-metal bringup # libraries. We link against ethosu_target_init which includes all of these # dependencies. - -if(TARGET_BOARD STREQUAL "corstone-300") +if(SYSTEM_CONFIG STREQUAL "Ethos_U55_High_End_Embedded") add_subdirectory(${ETHOS_SDK_PATH}/core_platform/targets/corstone-300 target) + set(TARGET_BOARD "corstone-300") target_compile_definitions(ethosu_target_common INTERFACE # ETHOSU_MODEL=0 place pte file/data in SRAM area # ETHOSU_MODEL=1 place pte file/data in DDR area ETHOSU_MODEL=1 # Configure NPU architecture timing adapters - # Ethos_U55_High_End_Embedded # This is just example numbers and you should make this match your hardware # SRAM ETHOSU_TA_MAXR_0=8 @@ -97,14 +98,90 @@ if(TARGET_BOARD STREQUAL "corstone-300") ETHOSU_TA_HISTBIN_1=0 ETHOSU_TA_HISTCNT_1=0 ) -elseif(TARGET_BOARD STREQUAL "corstone-320") +elseif(SYSTEM_CONFIG STREQUAL "Ethos_U55_Deep_Embedded") + add_subdirectory(${ETHOS_SDK_PATH}/core_platform/targets/corstone-300 target) + set(TARGET_BOARD "corstone-300") + target_compile_definitions(ethosu_target_common INTERFACE + # ETHOSU_MODEL=0 place pte file/data in SRAM area + # ETHOSU_MODEL=1 place pte file/data in DDR area + ETHOSU_MODEL=1 + # Configure NPU architecture timing adapters + # This is just example numbers and you should make this match your hardware + # SRAM + ETHOSU_TA_MAXR_0=4 + ETHOSU_TA_MAXW_0=4 + ETHOSU_TA_MAXRW_0=0 + ETHOSU_TA_RLATENCY_0=8 + ETHOSU_TA_WLATENCY_0=8 + ETHOSU_TA_PULSE_ON_0=3999 + ETHOSU_TA_PULSE_OFF_0=1 + ETHOSU_TA_BWCAP_0=4000 + ETHOSU_TA_PERFCTRL_0=0 + ETHOSU_TA_PERFCNT_0=0 + ETHOSU_TA_MODE_0=1 + ETHOSU_TA_HISTBIN_0=0 + ETHOSU_TA_HISTCNT_0=0 + # Flash + ETHOSU_TA_MAXR_1=2 + ETHOSU_TA_MAXW_1=0 + ETHOSU_TA_MAXRW_1=0 + ETHOSU_TA_RLATENCY_1=32 + ETHOSU_TA_WLATENCY_1=0 + ETHOSU_TA_PULSE_ON_1=360 + ETHOSU_TA_PULSE_OFF_1=40 + ETHOSU_TA_BWCAP_1=25 + ETHOSU_TA_PERFCTRL_1=0 + ETHOSU_TA_PERFCNT_1=0 + ETHOSU_TA_MODE_1=1 + ETHOSU_TA_HISTBIN_1=0 + ETHOSU_TA_HISTCNT_1=0 + ) +elseif(SYSTEM_CONFIG STREQUAL "Ethos_U85_SYS_DRAM_Low") + add_subdirectory(${ETHOS_SDK_PATH}/core_platform/targets/corstone-320 target) + set(TARGET_BOARD "corstone-320") + target_compile_definitions(ethosu_target_common INTERFACE + # ETHOSU_MODEL=0 place pte file/data in SRAM area + # ETHOSU_MODEL=1 place pte file/data in DDR area + ETHOSU_MODEL=1 + # Configure NPU architecture timing adapters + # This is just example numbers and you should make this match your hardware + # SRAM + ETHOSU_TA_MAXR_0=8 + ETHOSU_TA_MAXW_0=8 + ETHOSU_TA_MAXRW_0=0 + ETHOSU_TA_RLATENCY_0=16 + ETHOSU_TA_WLATENCY_0=16 + ETHOSU_TA_PULSE_ON_0=3999 + ETHOSU_TA_PULSE_OFF_0=1 + ETHOSU_TA_BWCAP_0=4000 + ETHOSU_TA_PERFCTRL_0=0 + ETHOSU_TA_PERFCNT_0=0 + ETHOSU_TA_MODE_0=1 + ETHOSU_TA_HISTBIN_0=0 + ETHOSU_TA_HISTCNT_0=0 + # DRAM + ETHOSU_TA_MAXR_1=24 + ETHOSU_TA_MAXW_1=12 + ETHOSU_TA_MAXRW_1=0 + ETHOSU_TA_RLATENCY_1=250 + ETHOSU_TA_WLATENCY_1=125 + ETHOSU_TA_PULSE_ON_1=4000 + ETHOSU_TA_PULSE_OFF_1=1000 + ETHOSU_TA_BWCAP_1=2344 + ETHOSU_TA_PERFCTRL_1=0 + ETHOSU_TA_PERFCNT_1=0 + ETHOSU_TA_MODE_1=1 + ETHOSU_TA_HISTBIN_1=0 + ETHOSU_TA_HISTCNT_1=0 + ) +elseif(SYSTEM_CONFIG STREQUAL "Ethos_U85_SYS_DRAM_Mid" OR SYSTEM_CONFIG STREQUAL "Ethos_U85_SYS_DRAM_High") add_subdirectory(${ETHOS_SDK_PATH}/core_platform/targets/corstone-320 target) + set(TARGET_BOARD "corstone-320") target_compile_definitions(ethosu_target_common INTERFACE # ETHOSU_MODEL=0 place pte file/data in SRAM area # ETHOSU_MODEL=1 place pte file/data in DDR area ETHOSU_MODEL=1 # Configure NPU architecture timing adapters - # Ethos_U85_SYS_DRAM_Mid # This is just example numbers and you should make this match your hardware # SRAM ETHOSU_TA_MAXR_0=8 @@ -136,7 +213,7 @@ elseif(TARGET_BOARD STREQUAL "corstone-320") ETHOSU_TA_HISTCNT_1=0 ) else() - message(FATAL_ERROR "Unsupported TARGET_BOARD: ${TARGET_BOARD}") + message(FATAL_ERROR "Unsupported SYSTEM_CONFIG: ${SYSTEM_CONFIG}") endif() # Dependencies from the ExecuTorch build diff --git a/examples/arm/run.sh b/examples/arm/run.sh index daab39ffc6..55b715d738 100755 --- a/examples/arm/run.sh +++ b/examples/arm/run.sh @@ -30,6 +30,8 @@ build_type="Release" extra_build_flags="" build_only=false reorder_inputs="" +system_config="" +memory_mode="" help() { echo "Usage: $(basename $0) [options]" @@ -45,6 +47,9 @@ help() { echo " --build_only Only build, don't run FVP" echo " --scratch-dir= Path to your Ethos-U scrach dir if you not using default" echo " --reorder_inputs= Reorder the inputs. This can be required when inputs > 1." + echo " --system_config= System configuration to select from the Vela configuration file (see vela.ini). Default: Ethos_U55_High_End_Embedded for EthosU55 targets, Ethos_U85_SYS_DRAM_Mid for EthosU85 targets." + echo " NOTE: If given, this option must match the given target. This option also sets timing adapter values customized for specific hardware, see ./executor_runner/CMakeLists.txt." + echo " --memory_mode= Memory mode to select from the Vela configuration file (see vela.ini), e.g. Shared_Sram/Sram_Only. Default: 'Shared_Sram' for Ethos-U55 targets, 'Sram_Only' for Ethos-U85 targets" exit 0 } @@ -62,6 +67,8 @@ for arg in "$@"; do --build_only) build_only=true ;; --scratch-dir=*) root_dir="${arg#*=}";; --reorder_inputs=*) reorder_inputs="${arg#*=}";; + --system_config=*) system_config="${arg#*=}";; + --memory_mode=*) memory_mode="${arg#*=}";; *) ;; esac @@ -85,6 +92,7 @@ setup_path_script=${root_dir}/setup_path.sh et_root_dir=$(cd ${script_dir}/../.. && pwd) et_build_dir=${et_root_dir}/cmake-out +# Set target based variables fvp_model=FVP_Corstone_SSE-300_Ethos-U55 if [[ ${target} =~ "ethos-u85" ]] then @@ -92,6 +100,24 @@ then fvp_model=FVP_Corstone_SSE-320 fi +if [[ ${system_config} == "" ]] +then + system_config="Ethos_U55_High_End_Embedded" + if [[ ${target} =~ "ethos-u85" ]] + then + system_config="Ethos_U85_SYS_DRAM_Mid" + fi +fi + +if [[ ${memory_mode} == "" ]] +then + memory_mode="Shared_Sram" + if [[ ${target} =~ "ethos-u85" ]] + then + system_config="Sram_Only" + fi +fi + toolchain_cmake=${script_dir}/ethos-u-setup/arm-none-eabi-gcc.cmake _setup_msg="please refer to ${script_dir}/ethos-u-setup/setup.sh to properly install necessary tools." @@ -125,7 +151,7 @@ function generate_pte_file() { # We are using the aot_lib from build_quantization_aot_lib below SO_LIB=$(find cmake-out-aot-lib -name libquantized_ops_aot_lib.${SO_EXT}) - local ARM_AOT_CMD="python3 -m examples.arm.aot_arm_compiler --model_name=${model} --target=${target} ${model_compiler_flags} --reorder_inputs=${reorder_inputs} --output ${output_folder} --so_library=$SO_LIB" + local ARM_AOT_CMD="python3 -m examples.arm.aot_arm_compiler --model_name=${model} --target=${target} ${model_compiler_flags} --reorder_inputs=${reorder_inputs} --output ${output_folder} --so_library=$SO_LIB --system_config=${system_config} --memory_mode=${memory_mode}" echo "CALL ${ARM_AOT_CMD}" >&2 ${ARM_AOT_CMD} 1>&2 @@ -271,10 +297,8 @@ function build_executorch_runner() { local pte=${1} if [[ ${target} == *"ethos-u55"* ]]; then local target_cpu=cortex-m55 - local target_board=corstone-300 else local target_cpu=cortex-m85 - local target_board=corstone-320 fi echo "--------------------------------------------------------------------------------" echo "Build Arm Baremetal executor_runner for ${target} - '${executor_runner_path}/cmake-out'" @@ -291,7 +315,6 @@ function build_executorch_runner() { -DCMAKE_BUILD_TYPE=${build_type} \ -DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} \ -DTARGET_CPU=${target_cpu} \ - -DTARGET_BOARD=${target_board} \ -DET_DIR_PATH:PATH=${et_root_dir} \ -DET_BUILD_DIR_PATH:PATH=${et_build_dir} \ -DET_PTE_FILE_PATH:PATH="${pte}" \ @@ -299,6 +322,7 @@ function build_executorch_runner() { -DETHOSU_TARGET_NPU_CONFIG=${target} \ ${build_with_etdump_flags} \ -DPYTHON_EXECUTABLE=$(which python3) \ + -DSYSTEM_CONFIG=${system_config} \ ${extra_build_flags} \ -B ${executor_runner_path}/cmake-out