From 3f6b365cef1b171f50667991c657261057e060b3 Mon Sep 17 00:00:00 2001 From: dijopaul Date: Wed, 24 Jul 2024 04:52:07 -0700 Subject: [PATCH 1/4] Add nnlib as submodule --- .gitmodules | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitmodules b/.gitmodules index 9164ee5700..815a04f976 100644 --- a/.gitmodules +++ b/.gitmodules @@ -67,3 +67,6 @@ [submodule "extension/llm/third-party/abseil-cpp"] path = extension/llm/third-party/abseil-cpp url = https://github.com/abseil/abseil-cpp.git +[submodule "backends/cadence/hifi/third-party/nnlib/nnlib-hifi4"] + path = backends/cadence/hifi/third-party/nnlib/nnlib-hifi4 + url = https://github.com/foss-xtensa/nnlib-hifi4.git \ No newline at end of file From 6d1d27e838a2d5e8730eabfdda41d9b5217aa954 Mon Sep 17 00:00:00 2001 From: dijopaul Date: Wed, 24 Jul 2024 05:36:57 -0700 Subject: [PATCH 2/4] Adding nnlib submodule --- .gitmodules | 2 +- backends/cadence/hifi/third-party/nnlib/nnlib-hifi4 | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) create mode 160000 backends/cadence/hifi/third-party/nnlib/nnlib-hifi4 diff --git a/.gitmodules b/.gitmodules index 815a04f976..f956373d59 100644 --- a/.gitmodules +++ b/.gitmodules @@ -69,4 +69,4 @@ url = https://github.com/abseil/abseil-cpp.git [submodule "backends/cadence/hifi/third-party/nnlib/nnlib-hifi4"] path = backends/cadence/hifi/third-party/nnlib/nnlib-hifi4 - url = https://github.com/foss-xtensa/nnlib-hifi4.git \ No newline at end of file + url = https://github.com/foss-xtensa/nnlib-hifi4.git diff --git a/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4 b/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4 new file mode 160000 index 0000000000..6a9ea45e23 --- /dev/null +++ b/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4 @@ -0,0 +1 @@ +Subproject commit 6a9ea45e23ef591fe207442df33a5ebe88bbe8de From 3d67a7b4daed0b01e5dc407ec1c6260f04adcd46 Mon Sep 17 00:00:00 2001 From: dijopaul Date: Wed, 24 Jul 2024 12:18:16 -0700 Subject: [PATCH 3/4] Integrated nnlib API unde backends/cadence/hifi --- backends/cadence/CMakeLists.txt | 9 +- backends/cadence/aot/functions_hifi.yaml | 128 ++++++++++++++++++ backends/cadence/hifi/kernels/CMakeLists.txt | 11 +- backends/cadence/hifi/kernels/kernels.cpp | 4 +- backends/cadence/hifi/kernels/kernels.h | 14 ++ .../cadence/hifi/operators/CMakeLists.txt | 16 +-- .../hifi/operators/dequantize_per_tensor.cpp | 3 +- .../hifi/operators/quantize_per_tensor.cpp | 3 +- .../hifi/third-party/nnlib/CMakeLists.txt | 30 ++++ 9 files changed, 197 insertions(+), 21 deletions(-) create mode 100644 backends/cadence/aot/functions_hifi.yaml create mode 100644 backends/cadence/hifi/third-party/nnlib/CMakeLists.txt diff --git a/backends/cadence/CMakeLists.txt b/backends/cadence/CMakeLists.txt index b3c3b80d00..e9f6877c7e 100644 --- a/backends/cadence/CMakeLists.txt +++ b/backends/cadence/CMakeLists.txt @@ -23,7 +23,12 @@ include(${EXECUTORCH_ROOT}/build/Utils.cmake) # Let files say "include ". set(_common_include_directories ${EXECUTORCH_ROOT}/..) +set (TARGET_DIR reference) +if(EXECUTORCH_NNLIB_OPT) +set (TARGET_DIR hifi) +add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/third-party/nnlib) +endif() -add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/reference/operators) -add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/reference/kernels) +add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/operators) +add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/kernels) diff --git a/backends/cadence/aot/functions_hifi.yaml b/backends/cadence/aot/functions_hifi.yaml new file mode 100644 index 0000000000..f9d75ef56b --- /dev/null +++ b/backends/cadence/aot/functions_hifi.yaml @@ -0,0 +1,128 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This yaml file contains operators that are also defined by the ATen library. +# For lean mode: +# - Codegen'd target `executorch_generated_lib` will be reading all the information +# from this file, including operator schema and kernel metadata. +# - Selective build target `codegen:executorch_defined_ops` now is selecting all the +# operators in this file, by dumping all the op names into `selected_operators.yaml`. +# +# See the README.md file in executorch/kernels/portable for a description of the syntax used +# by this file. + + +# aten ops +- op: _to_copy.out + kernels: + - arg_meta: null + kernel_name: torch::executor::to_copy_out + +- op: _softmax.out + kernels: + - arg_meta: null + kernel_name: torch::executor::softmax_out + +- op: add.out + kernels: + - arg_meta: null + kernel_name: torch::executor::add_out + +- op: bmm.out + kernels: + - arg_meta: null + kernel_name: torch::executor::bmm_out + +- op: cat.out + kernels: + - arg_meta: null + kernel_name: torch::executor::cat_out + +- op: clone.out + kernels: + - arg_meta: null + kernel_name: torch::executor::clone_out + +- op: div.out + kernels: + - arg_meta: null + kernel_name: torch::executor::div_out + +- op: div.out_mode + kernels: + - arg_meta: null + kernel_name: torch::executor::div_out_mode + +- op: embedding.out + kernels: + - arg_meta: null + kernel_name: torch::executor::embedding_out + +- op: full.out + kernels: + - arg_meta: null + kernel_name: torch::executor::full_out + +- op: mul.out + kernels: + - arg_meta: null + kernel_name: torch::executor::mul_out + +- op: permute_copy.out + kernels: + - arg_meta: null + kernel_name: torch::executor::permute_copy_out + +- op: sigmoid.out + kernels: + - arg_meta: null + kernel_name: torch::executor::sigmoid_out + +- op: slice_copy.Tensor_out + kernels: + - arg_meta: null + kernel_name: torch::executor::slice_copy_Tensor_out + +- op: split_with_sizes_copy.out + kernels: + - arg_meta: null + kernel_name: torch::executor::split_with_sizes_copy_out + +- op: sub.out + kernels: + - arg_meta: null + kernel_name: torch::executor::sub_out + +- op: view_copy.out + kernels: + - arg_meta: null + kernel_name: torch::executor::view_copy_out + +- op: where.self_out + kernels: + - arg_meta: null + kernel_name: torch::executor::where_out + +# custom ops +- func: cadence::quantize_per_tensor.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!) + variants: function + kernels: + - arg_meta: null + kernel_name: impl::HiFi::quantize_per_tensor_out + +- func: cadence::dequantize_per_tensor.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!) + variants: function + kernels: + - arg_meta: null + kernel_name: impl::HiFi::dequantize_per_tensor_out + + +- func: cadence::quantized_layer_norm.out(Tensor input, Tensor in_scale, Tensor in_zero_point, int[] normalized_shape, Tensor weight, Tensor bias, float eps, float output_scale, int output_zero_point, *, Tensor(a!) out) -> Tensor(a!) + kernels: + - arg_meta: null + kernel_name: impl::HiFi::quantized_layer_norm_out + +- func: cadence::quantized_linear.out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, Tensor weight_zero_point, Tensor out_multiplier, Tensor out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!) + kernels: + - arg_meta: null + kernel_name: impl::HiFi::quantized_linear_out + diff --git a/backends/cadence/hifi/kernels/CMakeLists.txt b/backends/cadence/hifi/kernels/CMakeLists.txt index 9d4d456d8b..af2359a1a0 100644 --- a/backends/cadence/hifi/kernels/CMakeLists.txt +++ b/backends/cadence/hifi/kernels/CMakeLists.txt @@ -14,9 +14,10 @@ add_library( target_include_directories( cadence_kernels PUBLIC . - ${NN_LIB_BASE_DIR}/xa_nnlib/algo/common/include/ - ${NN_LIB_BASE_DIR}/xa_nnlib/include/nnlib - ${NN_LIB_BASE_DIR}/xa_nnlib/include - ${NN_LIB_BASE_DIR}/xa_nnlib/algo/ndsp/hifi4/include/ - ${NXP_SDK_ROOT_DIR}/middleware/dsp/naturedsp/hifi4/include/ + ${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4/xa_nnlib/algo/common/include/ + ${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4/xa_nnlib/include/nnlib + ${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4/xa_nnlib/include + ${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4/xa_nnlib/algo/ndsp/hifi4/include/ ) + +target_link_libraries(cadence_kernels PRIVATE xa_nnlib) \ No newline at end of file diff --git a/backends/cadence/hifi/kernels/kernels.cpp b/backends/cadence/hifi/kernels/kernels.cpp index 2f29b25ac8..23ad7e3418 100644 --- a/backends/cadence/hifi/kernels/kernels.cpp +++ b/backends/cadence/hifi/kernels/kernels.cpp @@ -7,8 +7,8 @@ */ #include "kernels.h" -#include "NatureDSP_Signal.h" -#include "NatureDSP_Signal_vector.h" +#include "NatureDSP_Signal_math.h" +//#include "NatureDSP_Signal_vector.h" #include "xa_nnlib_common.h" #include "xa_nnlib_common_macros.h" diff --git a/backends/cadence/hifi/kernels/kernels.h b/backends/cadence/hifi/kernels/kernels.h index 13e0470b38..76fd95762f 100644 --- a/backends/cadence/hifi/kernels/kernels.h +++ b/backends/cadence/hifi/kernels/kernels.h @@ -12,6 +12,20 @@ #include "stddef.h" #include "xa_type_def.h" +/* NNLIB C APIs */ + +extern "C" WORD32 xa_nn_elm_quantize_f32_asym8s(WORD8 * __restrict__ p_out, + const FLOAT32 * __restrict__ p_inp, + FLOAT32 out_scale, + WORD32 out_zero_bias, + WORD32 num_elm); + +extern "C" WORD32 xa_nn_elm_dequantize_asym8s_f32(FLOAT32 * __restrict__ p_out, + const WORD8 * __restrict__ p_inp, + WORD32 inp_zero_bias, + FLOAT32 inp_scale, + WORD32 num_elm); + namespace impl { namespace HiFi { namespace kernels { diff --git a/backends/cadence/hifi/operators/CMakeLists.txt b/backends/cadence/hifi/operators/CMakeLists.txt index c22dc0c997..943982b2da 100644 --- a/backends/cadence/hifi/operators/CMakeLists.txt +++ b/backends/cadence/hifi/operators/CMakeLists.txt @@ -20,10 +20,10 @@ endif() # ATen compliant ops that are needed to run this model. set(_aten_ops__srcs - "${CMAKE_CURRENT_SOURCE_DIR}/op_add.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/op_embedding.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/op_full.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/op_view_copy.cpp" + "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_add.cpp" + "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_embedding.cpp" + "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_full.cpp" + "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_view_copy.cpp" "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/activation_ops_util.cpp" "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/copy_ops_util.cpp" "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/broadcast_util.cpp" @@ -58,8 +58,8 @@ target_include_directories(aten_ops_cadence PUBLIC ${ROOT_DIR}/.. # Custom ops that are needed to run the test model. add_library( - custom_ops "quantized_linear_out.cpp" "quantized_conv_out.cpp" - "quantized_relu_out.cpp" "quantized_layer_norm.cpp" + custom_ops "quantized_linear_out.cpp" + "quantized_layer_norm.cpp" "quantize_per_tensor.cpp" "dequantize_per_tensor.cpp") target_include_directories(custom_ops PUBLIC ${ROOT_DIR}/.. ${CMAKE_BINARY_DIR} @@ -72,11 +72,11 @@ target_link_libraries(custom_ops PRIVATE cadence_kernels) # Executorch (for runtime). Here select all ops in functions.yaml gen_selected_ops( LIB_NAME "cadence_ops_lib" OPS_SCHEMA_YAML - "${CMAKE_CURRENT_LIST_DIR}/../../aot/functions.yaml" "" "" + "${CMAKE_CURRENT_LIST_DIR}/../../aot/functions_hifi.yaml" "" "" ) generate_bindings_for_kernels( LIB_NAME "cadence_ops_lib" OPS_SCHEMA_YAML - FUNCTIONS_YAML ${CMAKE_CURRENT_SOURCE_DIR}/../../aot/functions.yaml + FUNCTIONS_YAML ${CMAKE_CURRENT_SOURCE_DIR}/../../aot/functions_hifi.yaml ) message("Generated files ${gen_command_sources}") diff --git a/backends/cadence/hifi/operators/dequantize_per_tensor.cpp b/backends/cadence/hifi/operators/dequantize_per_tensor.cpp index dcc4ace789..5776b62607 100644 --- a/backends/cadence/hifi/operators/dequantize_per_tensor.cpp +++ b/backends/cadence/hifi/operators/dequantize_per_tensor.cpp @@ -35,8 +35,7 @@ void dequantize_per_tensor_out( out_data, input_data, scale, zero_point, numel); } else if (input.scalar_type() == ScalarType::Char) { const int8_t* input_data = input.const_data_ptr(); - impl::HiFi::kernels::dequantize( - out_data, input_data, scale, zero_point, numel); + xa_nn_elm_dequantize_asym8s_f32(out_data, input_data, zero_point, scale, numel); } else if (input.scalar_type() == ScalarType::Int) { const int32_t* input_data = input.const_data_ptr(); impl::HiFi::kernels::dequantize( diff --git a/backends/cadence/hifi/operators/quantize_per_tensor.cpp b/backends/cadence/hifi/operators/quantize_per_tensor.cpp index ec186cc68e..93a2c19bb0 100644 --- a/backends/cadence/hifi/operators/quantize_per_tensor.cpp +++ b/backends/cadence/hifi/operators/quantize_per_tensor.cpp @@ -37,8 +37,7 @@ void quantize_per_tensor_out( out_data, input_data, 1. / scale, zero_point, numel); } else if (out.scalar_type() == ScalarType::Char) { int8_t* out_data = out.mutable_data_ptr(); - impl::HiFi::kernels::quantize( - out_data, input_data, 1. / scale, zero_point, numel); + xa_nn_elm_quantize_f32_asym8s(out_data, input_data, scale, zero_point, numel); } else if (out.scalar_type() == ScalarType::Int) { int32_t* out_data = out.mutable_data_ptr(); impl::HiFi::kernels::quantize( diff --git a/backends/cadence/hifi/third-party/nnlib/CMakeLists.txt b/backends/cadence/hifi/third-party/nnlib/CMakeLists.txt new file mode 100644 index 0000000000..e93e0759d2 --- /dev/null +++ b/backends/cadence/hifi/third-party/nnlib/CMakeLists.txt @@ -0,0 +1,30 @@ + +cmake_minimum_required(VERSION 3.10.0) +project(cadence_nnlib) + + +add_custom_target( nnlib_target ALL COMMAND + make install_nnlib -f makefile -C ${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4/xa_nnlib/build + OBJDIR=${CMAKE_CURRENT_BINARY_DIR}/obj + LIBDIR=${CMAKE_CURRENT_BINARY_DIR}/lib + -j8 ) + +add_library(xa_nnlib STATIC IMPORTED GLOBAL) +add_dependencies(xa_nnlib nnlib_target) + +set_property( + TARGET xa_nnlib + PROPERTY + IMPORTED_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/lib/xa_nnlib.a" +) + + + + + + + + + + + From 45a0a286aa313e6a9d5028b3ed12b1c0f4c5be05 Mon Sep 17 00:00:00 2001 From: dijopaul Date: Thu, 25 Jul 2024 00:11:41 -0700 Subject: [PATCH 4/4] Fix review comments on PR#3 --- backends/cadence/hifi/kernels/kernels.cpp | 2 -- backends/cadence/hifi/kernels/kernels.h | 16 +++------------- 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/backends/cadence/hifi/kernels/kernels.cpp b/backends/cadence/hifi/kernels/kernels.cpp index 23ad7e3418..47a5c1cfc0 100644 --- a/backends/cadence/hifi/kernels/kernels.cpp +++ b/backends/cadence/hifi/kernels/kernels.cpp @@ -7,8 +7,6 @@ */ #include "kernels.h" -#include "NatureDSP_Signal_math.h" -//#include "NatureDSP_Signal_vector.h" #include "xa_nnlib_common.h" #include "xa_nnlib_common_macros.h" diff --git a/backends/cadence/hifi/kernels/kernels.h b/backends/cadence/hifi/kernels/kernels.h index 76fd95762f..f7b2147ca8 100644 --- a/backends/cadence/hifi/kernels/kernels.h +++ b/backends/cadence/hifi/kernels/kernels.h @@ -12,19 +12,9 @@ #include "stddef.h" #include "xa_type_def.h" -/* NNLIB C APIs */ - -extern "C" WORD32 xa_nn_elm_quantize_f32_asym8s(WORD8 * __restrict__ p_out, - const FLOAT32 * __restrict__ p_inp, - FLOAT32 out_scale, - WORD32 out_zero_bias, - WORD32 num_elm); - -extern "C" WORD32 xa_nn_elm_dequantize_asym8s_f32(FLOAT32 * __restrict__ p_out, - const WORD8 * __restrict__ p_inp, - WORD32 inp_zero_bias, - FLOAT32 inp_scale, - WORD32 num_elm); +/* For NNLIB APIs */ +#include "xa_nnlib_kernels_api.h" + namespace impl { namespace HiFi {