Merge pull request #2 from dijopaul/dijopaul_add-nnlib2

Adding nnlib integration
dijopaul · Jul 26, 2024 · 0f8c702 · 0f8c702
2 parents 11b2fcb + 45a0a28
commit 0f8c702
Show file tree

Hide file tree

Showing 11 changed files with 189 additions and 21 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -67,3 +67,6 @@
 [submodule "extension/llm/third-party/abseil-cpp"]
 	path = extension/llm/third-party/abseil-cpp
 	url = https://github.com/abseil/abseil-cpp.git
+[submodule "backends/cadence/hifi/third-party/nnlib/nnlib-hifi4"]
+	path = backends/cadence/hifi/third-party/nnlib/nnlib-hifi4
+	url = https://github.com/foss-xtensa/nnlib-hifi4.git
diff --git a/backends/cadence/CMakeLists.txt b/backends/cadence/CMakeLists.txt
@@ -23,7 +23,12 @@ include(${EXECUTORCH_ROOT}/build/Utils.cmake)
 
 # Let files say "include <executorch/path/to/header.h>".
 set(_common_include_directories ${EXECUTORCH_ROOT}/..)
+set (TARGET_DIR reference)
 
+if(EXECUTORCH_NNLIB_OPT)
+set (TARGET_DIR hifi)
+add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/third-party/nnlib)  
+endif()
 
-add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/reference/operators)
-add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/reference/kernels)
+add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/operators)
+add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/kernels)
diff --git a/backends/cadence/aot/functions_hifi.yaml b/backends/cadence/aot/functions_hifi.yaml
@@ -0,0 +1,128 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This yaml file contains operators that are also defined by the ATen library.
+# For lean mode:
+#   - Codegen'd target `executorch_generated_lib` will be reading all the information
+#     from this file, including operator schema and kernel metadata.
+#   - Selective build target `codegen:executorch_defined_ops` now is selecting all the
+#     operators in this file, by dumping all the op names into `selected_operators.yaml`.
+#
+# See the README.md file in executorch/kernels/portable for a description of the syntax used
+# by this file.
+
+
+# aten ops
+- op: _to_copy.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::to_copy_out
+
+- op: _softmax.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::softmax_out
+
+- op: add.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::add_out
+
+- op: bmm.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::bmm_out
+
+- op: cat.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::cat_out
+
+- op: clone.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::clone_out
+
+- op: div.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::div_out
+
+- op: div.out_mode
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::div_out_mode
+
+- op: embedding.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::embedding_out
+
+- op: full.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::full_out
+
+- op: mul.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::mul_out
+
+- op: permute_copy.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::permute_copy_out
+
+- op: sigmoid.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::sigmoid_out
+
+- op: slice_copy.Tensor_out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::slice_copy_Tensor_out
+
+- op: split_with_sizes_copy.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::split_with_sizes_copy_out
+
+- op: sub.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::sub_out
+
+- op: view_copy.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::view_copy_out
+
+- op: where.self_out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::where_out
+
+# custom ops
+- func: cadence::quantize_per_tensor.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
+  variants: function
+  kernels:
+    - arg_meta: null
+      kernel_name: impl::HiFi::quantize_per_tensor_out
+
+- func: cadence::dequantize_per_tensor.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
+  variants: function
+  kernels:
+    - arg_meta: null
+      kernel_name: impl::HiFi::dequantize_per_tensor_out
+
+
+- func: cadence::quantized_layer_norm.out(Tensor input, Tensor in_scale, Tensor in_zero_point, int[] normalized_shape, Tensor weight, Tensor bias, float eps, float output_scale, int output_zero_point, *, Tensor(a!) out) -> Tensor(a!)
+  kernels:
+    - arg_meta: null
+      kernel_name: impl::HiFi::quantized_layer_norm_out
+
+- func: cadence::quantized_linear.out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, Tensor weight_zero_point, Tensor out_multiplier, Tensor out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
+  kernels:
+    - arg_meta: null
+      kernel_name: impl::HiFi::quantized_linear_out
+
diff --git a/backends/cadence/hifi/kernels/CMakeLists.txt b/backends/cadence/hifi/kernels/CMakeLists.txt
@@ -14,9 +14,10 @@ add_library(
 target_include_directories(
   cadence_kernels
   PUBLIC .
-         ${NN_LIB_BASE_DIR}/xa_nnlib/algo/common/include/
-         ${NN_LIB_BASE_DIR}/xa_nnlib/include/nnlib
-         ${NN_LIB_BASE_DIR}/xa_nnlib/include
-         ${NN_LIB_BASE_DIR}/xa_nnlib/algo/ndsp/hifi4/include/
-         ${NXP_SDK_ROOT_DIR}/middleware/dsp/naturedsp/hifi4/include/
+         ${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4/xa_nnlib/algo/common/include/
+         ${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4/xa_nnlib/include/nnlib
+         ${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4/xa_nnlib/include
+         ${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4/xa_nnlib/algo/ndsp/hifi4/include/
 )
+
+target_link_libraries(cadence_kernels PRIVATE xa_nnlib)
diff --git a/backends/cadence/hifi/kernels/kernels.cpp b/backends/cadence/hifi/kernels/kernels.cpp
@@ -7,8 +7,6 @@
  */
 
 #include "kernels.h"
-#include "NatureDSP_Signal.h"
-#include "NatureDSP_Signal_vector.h"
 #include "xa_nnlib_common.h"
 #include "xa_nnlib_common_macros.h"
 

diff --git a/backends/cadence/hifi/kernels/kernels.h b/backends/cadence/hifi/kernels/kernels.h
@@ -12,6 +12,10 @@
 #include "stddef.h"
 #include "xa_type_def.h"
 
+/* For NNLIB APIs */
+#include "xa_nnlib_kernels_api.h"
+
+
 namespace impl {
 namespace HiFi {
 namespace kernels {

diff --git a/backends/cadence/hifi/operators/CMakeLists.txt b/backends/cadence/hifi/operators/CMakeLists.txt
@@ -20,10 +20,10 @@ endif()
 
 # ATen compliant ops that are needed to run this model.
 set(_aten_ops__srcs
-    "${CMAKE_CURRENT_SOURCE_DIR}/op_add.cpp"
-    "${CMAKE_CURRENT_SOURCE_DIR}/op_embedding.cpp"
-    "${CMAKE_CURRENT_SOURCE_DIR}/op_full.cpp"
-    "${CMAKE_CURRENT_SOURCE_DIR}/op_view_copy.cpp"
+    "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_add.cpp"
+    "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_embedding.cpp"
+    "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_full.cpp"
+    "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_view_copy.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/activation_ops_util.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/copy_ops_util.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/broadcast_util.cpp"
@@ -58,8 +58,8 @@ target_include_directories(aten_ops_cadence PUBLIC ${ROOT_DIR}/..
 
 # Custom ops that are needed to run the test model.
 add_library(
-  custom_ops "quantized_linear_out.cpp" "quantized_conv_out.cpp"
-  "quantized_relu_out.cpp" "quantized_layer_norm.cpp"
+  custom_ops "quantized_linear_out.cpp"
+  "quantized_layer_norm.cpp"
   "quantize_per_tensor.cpp" "dequantize_per_tensor.cpp")
 target_include_directories(custom_ops PUBLIC ${ROOT_DIR}/..
                                              ${CMAKE_BINARY_DIR}
@@ -72,11 +72,11 @@ target_link_libraries(custom_ops PRIVATE cadence_kernels)
 # Executorch (for runtime). Here select all ops in functions.yaml
 gen_selected_ops(
   LIB_NAME "cadence_ops_lib" OPS_SCHEMA_YAML
-  "${CMAKE_CURRENT_LIST_DIR}/../../aot/functions.yaml" "" ""
+  "${CMAKE_CURRENT_LIST_DIR}/../../aot/functions_hifi.yaml" "" ""
 )
 generate_bindings_for_kernels(
   LIB_NAME "cadence_ops_lib" OPS_SCHEMA_YAML
-  FUNCTIONS_YAML ${CMAKE_CURRENT_SOURCE_DIR}/../../aot/functions.yaml
+  FUNCTIONS_YAML ${CMAKE_CURRENT_SOURCE_DIR}/../../aot/functions_hifi.yaml
 )
 message("Generated files ${gen_command_sources}")
 

diff --git a/backends/cadence/hifi/operators/dequantize_per_tensor.cpp b/backends/cadence/hifi/operators/dequantize_per_tensor.cpp
@@ -35,8 +35,7 @@ void dequantize_per_tensor_out(
         out_data, input_data, scale, zero_point, numel);
   } else if (input.scalar_type() == ScalarType::Char) {
     const int8_t* input_data = input.const_data_ptr<int8_t>();
-    impl::HiFi::kernels::dequantize<int8_t>(
-        out_data, input_data, scale, zero_point, numel);
+    xa_nn_elm_dequantize_asym8s_f32(out_data, input_data, zero_point, scale, numel);
   } else if (input.scalar_type() == ScalarType::Int) {
     const int32_t* input_data = input.const_data_ptr<int32_t>();
     impl::HiFi::kernels::dequantize<int32_t>(

diff --git a/backends/cadence/hifi/operators/quantize_per_tensor.cpp b/backends/cadence/hifi/operators/quantize_per_tensor.cpp
@@ -37,8 +37,7 @@ void quantize_per_tensor_out(
         out_data, input_data, 1. / scale, zero_point, numel);
   } else if (out.scalar_type() == ScalarType::Char) {
     int8_t* out_data = out.mutable_data_ptr<int8_t>();
-    impl::HiFi::kernels::quantize<int8_t>(
-        out_data, input_data, 1. / scale, zero_point, numel);
+    xa_nn_elm_quantize_f32_asym8s(out_data, input_data, scale, zero_point, numel);
   } else if (out.scalar_type() == ScalarType::Int) {
     int32_t* out_data = out.mutable_data_ptr<int32_t>();
     impl::HiFi::kernels::quantize<int32_t>(

diff --git a/backends/cadence/hifi/third-party/nnlib/CMakeLists.txt b/backends/cadence/hifi/third-party/nnlib/CMakeLists.txt
@@ -0,0 +1,30 @@
+
+cmake_minimum_required(VERSION 3.10.0)
+project(cadence_nnlib)
+
+
+add_custom_target( nnlib_target ALL COMMAND 
+                    make install_nnlib -f makefile -C ${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4/xa_nnlib/build 
+                    OBJDIR=${CMAKE_CURRENT_BINARY_DIR}/obj 
+                    LIBDIR=${CMAKE_CURRENT_BINARY_DIR}/lib 
+                    -j8 )
+
+add_library(xa_nnlib STATIC IMPORTED GLOBAL)
+add_dependencies(xa_nnlib nnlib_target)
+
+set_property(
+  TARGET xa_nnlib
+  PROPERTY 
+  IMPORTED_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/lib/xa_nnlib.a"
+)
+
+
+
+
+
+
+
+
+
+
+
diff --git a/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4 b/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4