Skip to content

Commit

Permalink
Merge branch 'dijopaul_int' into dijopaul_optimization2
Browse files Browse the repository at this point in the history
  • Loading branch information
dijopaul authored Jul 31, 2024
2 parents 0b58c5d + 44b46df commit d344254
Show file tree
Hide file tree
Showing 10 changed files with 2,129 additions and 2 deletions.
5 changes: 5 additions & 0 deletions backends/cadence/aot/functions_hifi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,11 @@
kernels:
- arg_meta: null
kernel_name: torch::executor::where_out

- op: rsqrt.out
kernels:
- arg_meta: null
kernel_name: torch::executor::rsqrt_out

- op: empty.out
kernels:
Expand Down
3 changes: 3 additions & 0 deletions backends/cadence/hifi/kernels/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ add_library(
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/matmul_asym8uxasym8u_asym8u.cpp
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_add_f32_broadcast.c
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_mul_f32_broadcast.c
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_where_f32xf32_f32.c
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_div_broadcast_f32.c
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_floor_div_broadcast_f32.c
)

target_include_directories(
Expand Down
35 changes: 34 additions & 1 deletion backends/cadence/hifi/kernels/kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,40 @@

/* For NNLIB APIs */
#include "xa_nnlib_kernels_api.h"



extern "C" WORD32 xa_nn_elm_floor_div_f32xf32_f32(
FLOAT32 * __restrict__ p_out,
const FLOAT32 * __restrict__ p_inp1,
const FLOAT32 * __restrict__ p_inp2,
WORD32 num_elm);

extern "C" WORD32 xa_nn_elm_floor_div_broadcast_4D_f32xf32_f32(
FLOAT32 * __restrict__ p_out,
const WORD32 *const p_out_shape,
const FLOAT32 * __restrict__ p_inp1,
const WORD32 *const p_inp1_shape,
const FLOAT32 * __restrict__ p_inp2,
const WORD32 *const p_inp2_shape);

extern "C" WORD32 xa_nn_elm_div_broadcast_4D_f32xf32_f32(
FLOAT32 * __restrict__ p_out,
const WORD32 *const p_out_shape,
const FLOAT32 * __restrict__ p_inp1,
const WORD32 *const p_inp1_shape,
const FLOAT32 * __restrict__ p_inp2,
const WORD32 *const p_inp2_shape);

extern "C" WORD32 xa_nn_elm_where_f32xf32_f32(FLOAT32 * __restrict__ p_out, const FLOAT32 * __restrict__ p_inp1, const FLOAT32 * __restrict__ p_inp2, const unsigned char *__restrict__ p_condition, WORD32 num_elm);

extern "C" WORD32 xa_nn_elm_where_broadcast_4D_f32xf32_f32(FLOAT32 * __restrict__ p_out, const WORD32 *const p_out_shape,
const FLOAT32 * __restrict__ p_inp1,
const WORD32 *const p_inp1_shape,
const FLOAT32 * __restrict__ p_inp2,
const WORD32 *const p_inp2_shape,
const unsigned char *__restrict__ p_condition,
const WORD32 *const p_condition_shape
);

/* new functions in nnlib */
extern "C" WORD32 xa_nn_elm_add_broadcast_4D_f32xf32_f32(FLOAT32 * __restrict__ p_out,
Expand Down
8 changes: 7 additions & 1 deletion backends/cadence/hifi/operators/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ set(_aten_ops__srcs
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_add.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_embedding.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_full.cpp"
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_where.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_view_copy.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/activation_ops_util.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/copy_ops_util.cpp"
Expand All @@ -37,6 +38,9 @@ set(_aten_ops__srcs
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_clone.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_div.cpp"
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_mul.cpp"
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_div.cpp"
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_rsqrt.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_mul.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_permute_copy.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_sigmoid.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_slice_copy.cpp"
Expand All @@ -45,7 +49,9 @@ set(_aten_ops__srcs
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_sub.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_to_copy.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_where.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_empty.cpp")
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_empty.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/pattern/unary_ufunc_realhb_to_floath.cpp")

add_library(aten_ops_cadence ${_aten_ops__srcs})
target_link_libraries(aten_ops_cadence PUBLIC executorch)
target_link_libraries(aten_ops_cadence PRIVATE cadence_kernels)
Expand Down
Loading

0 comments on commit d344254

Please sign in to comment.