Skip to content

Commit

Permalink
Adding sub, sigmoid, permute, view_copy ops (#6)
Browse files Browse the repository at this point in the history
  • Loading branch information
dijopaul authored Aug 1, 2024
1 parent a445497 commit f4cf6c8
Show file tree
Hide file tree
Showing 8 changed files with 777 additions and 2 deletions.
3 changes: 2 additions & 1 deletion backends/cadence/hifi/kernels/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ add_library(
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/matmul_asym8uxasym8u_asym8u.cpp
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_add_f32_broadcast.c
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_mul_f32_broadcast.c
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_transpose_32.c
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_where_f32xf32_f32.c
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_div_broadcast_f32.c
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_floor_div_broadcast_f32.c
Expand All @@ -25,4 +26,4 @@ target_include_directories(
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4/xa_nnlib/algo/ndsp/hifi4/include/
)

target_link_libraries(cadence_kernels PRIVATE xa_nnlib)
target_link_libraries(cadence_kernels PRIVATE xa_nnlib)
8 changes: 8 additions & 0 deletions backends/cadence/hifi/kernels/kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,14 @@ extern "C" WORD32 xa_nn_elm_mul_broadcast_4D_f32xf32_f32(FLOAT32 * __restrict__
const WORD32 *const p_inp1_shape,
const FLOAT32 * __restrict__ p_inp2,
const WORD32 *const p_inp2_shape);

extern "C" WORD32 xa_nn_transpose_32_32(WORD32 * __restrict__ p_out
,const WORD32 *const p_out_shape
,const WORD32 * __restrict__ p_inp
,const WORD32 *const p_inp_shape
,const WORD32 * __restrict__ p_permute_vec
,WORD32 num_out_dims
,WORD32 num_inp_dims);

namespace impl {
namespace HiFi {
Expand Down
5 changes: 4 additions & 1 deletion backends/cadence/hifi/operators/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ set(_aten_ops__srcs
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_add.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_embedding.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_full.cpp"
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_view_copy.cpp"
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_where.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_view_copy.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/activation_ops_util.cpp"
Expand All @@ -38,6 +39,8 @@ set(_aten_ops__srcs
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_clone.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_div.cpp"
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_mul.cpp"
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_permute_copy.cpp"
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_sigmoid.cpp"
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_div.cpp"
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_rsqrt.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_mul.cpp"
Expand All @@ -46,7 +49,7 @@ set(_aten_ops__srcs
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_slice_copy.cpp"
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_softmax.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_split_with_sizes_copy.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_sub.cpp"
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_sub.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_to_copy.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_where.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_empty.cpp"
Expand Down
144 changes: 144 additions & 0 deletions backends/cadence/hifi/operators/op_permute_copy.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <executorch/kernels/portable/cpu/util/copy_ops_util.h>
#include <executorch/runtime/kernel/kernel_includes.h>
#include "kernels.h"

namespace torch {
namespace executor {
namespace native {

using SizesType = exec_aten::SizesType;
using Tensor = exec_aten::Tensor;
using IntArrayRef = exec_aten::ArrayRef<int64_t>;

namespace {

void increment_coordinate_permuted(
const Tensor& tensor,
size_t* const coordinate,
IntArrayRef dims) {
for (int i = dims.size() - 1; i >= 0; i--) {
size_t d = dims[i] >= 0 ? dims[i] : dims[i] + tensor.dim();
coordinate[d]++;
if (coordinate[d] == tensor.size(d)) {
coordinate[d] = 0;
} else {
return;
}
}
}

} // namespace

Tensor& permute_copy_out(
RuntimeContext& ctx,
const Tensor& in,
IntArrayRef dims,
Tensor& out) {
(void)ctx;

ET_KERNEL_CHECK(
ctx, check_permute_copy_args(in, dims, out), InvalidArgument, out);

Tensor::SizesType expected_out_size[kTensorDimensionLimit];
size_t expected_out_dim = 0;
get_permute_copy_out_target_size(
in, dims, expected_out_size, &expected_out_dim);
ET_KERNEL_CHECK(
ctx,
resize_tensor(out, {expected_out_size, expected_out_dim}) == Error::Ok,
InvalidArgument,
out);

const auto in_type = out.scalar_type();

if(in_type == ScalarType::Float)
{
WORD32 * p_inp = (WORD32 *)in.const_data_ptr<float>();
WORD32 * p_out = (WORD32 *)out.mutable_data_ptr<float>();

WORD32 num_inp_dims = in.dim();
WORD32 num_out_dims = num_inp_dims;

WORD32 p_inp_shape[5];
WORD32 p_out_shape[5];
WORD32 p_permute_vec[5];

for(int i = 0; i < num_inp_dims; i++)
{
p_inp_shape[i] = in.size(i);
p_out_shape[i] = in.size(dims[i]);
p_permute_vec[i] = dims[i];
}

WORD32 val = xa_nn_transpose_32_32(p_out
,p_out_shape
,p_inp
,p_inp_shape
,p_permute_vec
,num_out_dims
,num_inp_dims);

}
else if(in_type == ScalarType::Char)
{
WORD8 * p_inp = (WORD8 *)in.const_data_ptr<char>();
WORD8 * p_out = (WORD8 *)out.mutable_data_ptr<char>();

WORD32 num_inp_dims = in.dim();
WORD32 num_out_dims = num_inp_dims;

WORD32 p_inp_shape[5];
WORD32 p_out_shape[5];
WORD32 p_permute_vec[5];

for(int i = 0; i < num_inp_dims; i++)
{
p_inp_shape[i] = in.size(i);
p_out_shape[i] = in.size(dims[i]);
p_permute_vec[i] = dims[i];
}

p_inp_shape[num_inp_dims] = 4;
p_out_shape[num_inp_dims] = 4;


WORD32 val = xa_nn_transpose_8_8(p_out
,p_out_shape
,p_inp
,p_inp_shape
,p_permute_vec
,num_out_dims
,num_inp_dims);

}
else
{
// in and out must be the same dtype
ET_SWITCH_ALL_TYPES(in_type, ctx, "permute_copy.out", CTYPE, [&] {
const CTYPE* const in_data = in.const_data_ptr<CTYPE>();
CTYPE* const out_data = out.mutable_data_ptr<CTYPE>();

size_t in_coord[kTensorDimensionLimit] = {0};

for (size_t i = 0; i < out.numel(); ++i) {
out_data[i] = in_data[coordinateToIndex(in, in_coord)];
increment_coordinate_permuted(in, in_coord, dims);
}
});

}

return out;
}

} // namespace native
} // namespace executor
} // namespace torch
68 changes: 68 additions & 0 deletions backends/cadence/hifi/operators/op_sigmoid.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <cmath>

#include <executorch/kernels/portable/cpu/util/functional_util.h>
#include <executorch/runtime/kernel/kernel_includes.h>
#include "kernels.h"

namespace torch {
namespace executor {
namespace native {

using Tensor = exec_aten::Tensor;

Tensor& sigmoid_out(RuntimeContext& ctx, const Tensor& in, Tensor& out) {
(void)ctx;

ET_KERNEL_CHECK(
ctx, in.scalar_type() != ScalarType::Bool, InvalidArgument, out);
ET_KERNEL_CHECK(ctx, tensor_is_floating_type(out), InvalidArgument, out);

// Resize for dynamic shape
ET_KERNEL_CHECK_MSG(
ctx,
resize_tensor(out, in.sizes()) == Error::Ok,
InvalidArgument,
out,
"Failed to resize output tensor.");

ScalarType in_type = in.scalar_type();
ScalarType out_type = out.scalar_type();

if(in_type == ScalarType::Float)
{
float* data_in = in.mutable_data_ptr<float>();
float* data_out = out.mutable_data_ptr<float>();
xa_nn_vec_sigmoid_f32_f32(data_out, data_in, in.numel());
}
else
{
ET_SWITCH_REALHB_TYPES(in_type, ctx, "sigmoid.out", CTYPE_IN, [&]() {
ET_SWITCH_FLOATH_TYPES(out_type, ctx, "sigmoid.out", CTYPE_OUT, [&]() {
apply_unary_map_fn(
[](const CTYPE_IN val_in) {
// perform math in double to preserve precision
double in_casted = static_cast<double>(val_in);
double out_val = 1.0 / (1.0 + exp(-in_casted));
return static_cast<CTYPE_OUT>(out_val);
},
in.const_data_ptr<CTYPE_IN>(),
out.mutable_data_ptr<CTYPE_OUT>(),
in.numel());
});
});
}

return out;
}

} // namespace native
} // namespace executor
} // namespace torch
Loading

0 comments on commit f4cf6c8

Please sign in to comment.