Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrate Sub, Sigmoid, permute and view_copy #6

Merged
merged 2 commits into from
Aug 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion backends/cadence/hifi/kernels/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ add_library(
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/matmul_asym8uxasym8u_asym8u.cpp
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_add_f32_broadcast.c
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_mul_f32_broadcast.c
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_transpose_32.c
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_where_f32xf32_f32.c
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_div_broadcast_f32.c
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/xa_nn_elm_floor_div_broadcast_f32.c
Expand All @@ -25,4 +26,4 @@ target_include_directories(
${EXECUTORCH_ROOT}/backends/cadence/hifi/third-party/nnlib/nnlib-hifi4/xa_nnlib/algo/ndsp/hifi4/include/
)

target_link_libraries(cadence_kernels PRIVATE xa_nnlib)
target_link_libraries(cadence_kernels PRIVATE xa_nnlib)
8 changes: 8 additions & 0 deletions backends/cadence/hifi/kernels/kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,14 @@ extern "C" WORD32 xa_nn_elm_mul_broadcast_4D_f32xf32_f32(FLOAT32 * __restrict__
const WORD32 *const p_inp1_shape,
const FLOAT32 * __restrict__ p_inp2,
const WORD32 *const p_inp2_shape);

extern "C" WORD32 xa_nn_transpose_32_32(WORD32 * __restrict__ p_out
,const WORD32 *const p_out_shape
,const WORD32 * __restrict__ p_inp
,const WORD32 *const p_inp_shape
,const WORD32 * __restrict__ p_permute_vec
,WORD32 num_out_dims
,WORD32 num_inp_dims);

namespace impl {
namespace HiFi {
Expand Down
5 changes: 4 additions & 1 deletion backends/cadence/hifi/operators/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ set(_aten_ops__srcs
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_add.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_embedding.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_full.cpp"
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_view_copy.cpp"
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_where.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_view_copy.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/util/activation_ops_util.cpp"
Expand All @@ -38,6 +39,8 @@ set(_aten_ops__srcs
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_clone.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_div.cpp"
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_mul.cpp"
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_permute_copy.cpp"
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_sigmoid.cpp"
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_div.cpp"
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_rsqrt.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_mul.cpp"
Expand All @@ -46,7 +49,7 @@ set(_aten_ops__srcs
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_slice_copy.cpp"
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_softmax.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_split_with_sizes_copy.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_sub.cpp"
"${EXECUTORCH_ROOT}/backends/cadence/hifi/operators/op_sub.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_to_copy.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_where.cpp"
"${EXECUTORCH_ROOT}/kernels/portable/cpu/op_empty.cpp"
Expand Down
144 changes: 144 additions & 0 deletions backends/cadence/hifi/operators/op_permute_copy.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <executorch/kernels/portable/cpu/util/copy_ops_util.h>
#include <executorch/runtime/kernel/kernel_includes.h>
#include "kernels.h"

namespace torch {
namespace executor {
namespace native {

using SizesType = exec_aten::SizesType;
using Tensor = exec_aten::Tensor;
using IntArrayRef = exec_aten::ArrayRef<int64_t>;

namespace {

void increment_coordinate_permuted(
const Tensor& tensor,
size_t* const coordinate,
IntArrayRef dims) {
for (int i = dims.size() - 1; i >= 0; i--) {
size_t d = dims[i] >= 0 ? dims[i] : dims[i] + tensor.dim();
coordinate[d]++;
if (coordinate[d] == tensor.size(d)) {
coordinate[d] = 0;
} else {
return;
}
}
}

} // namespace

Tensor& permute_copy_out(
RuntimeContext& ctx,
const Tensor& in,
IntArrayRef dims,
Tensor& out) {
(void)ctx;

ET_KERNEL_CHECK(
ctx, check_permute_copy_args(in, dims, out), InvalidArgument, out);

Tensor::SizesType expected_out_size[kTensorDimensionLimit];
size_t expected_out_dim = 0;
get_permute_copy_out_target_size(
in, dims, expected_out_size, &expected_out_dim);
ET_KERNEL_CHECK(
ctx,
resize_tensor(out, {expected_out_size, expected_out_dim}) == Error::Ok,
InvalidArgument,
out);

const auto in_type = out.scalar_type();

if(in_type == ScalarType::Float)
{
WORD32 * p_inp = (WORD32 *)in.const_data_ptr<float>();
WORD32 * p_out = (WORD32 *)out.mutable_data_ptr<float>();

WORD32 num_inp_dims = in.dim();
WORD32 num_out_dims = num_inp_dims;

WORD32 p_inp_shape[5];
WORD32 p_out_shape[5];
WORD32 p_permute_vec[5];

for(int i = 0; i < num_inp_dims; i++)
{
p_inp_shape[i] = in.size(i);
p_out_shape[i] = in.size(dims[i]);
p_permute_vec[i] = dims[i];
}

WORD32 val = xa_nn_transpose_32_32(p_out
,p_out_shape
,p_inp
,p_inp_shape
,p_permute_vec
,num_out_dims
,num_inp_dims);

}
else if(in_type == ScalarType::Char)
{
WORD8 * p_inp = (WORD8 *)in.const_data_ptr<char>();
WORD8 * p_out = (WORD8 *)out.mutable_data_ptr<char>();

WORD32 num_inp_dims = in.dim();
WORD32 num_out_dims = num_inp_dims;

WORD32 p_inp_shape[5];
WORD32 p_out_shape[5];
WORD32 p_permute_vec[5];

for(int i = 0; i < num_inp_dims; i++)
{
p_inp_shape[i] = in.size(i);
p_out_shape[i] = in.size(dims[i]);
p_permute_vec[i] = dims[i];
}

p_inp_shape[num_inp_dims] = 4;
p_out_shape[num_inp_dims] = 4;


WORD32 val = xa_nn_transpose_8_8(p_out
,p_out_shape
,p_inp
,p_inp_shape
,p_permute_vec
,num_out_dims
,num_inp_dims);

}
else
{
// in and out must be the same dtype
ET_SWITCH_ALL_TYPES(in_type, ctx, "permute_copy.out", CTYPE, [&] {
const CTYPE* const in_data = in.const_data_ptr<CTYPE>();
CTYPE* const out_data = out.mutable_data_ptr<CTYPE>();

size_t in_coord[kTensorDimensionLimit] = {0};

for (size_t i = 0; i < out.numel(); ++i) {
out_data[i] = in_data[coordinateToIndex(in, in_coord)];
increment_coordinate_permuted(in, in_coord, dims);
}
});

}

return out;
}

} // namespace native
} // namespace executor
} // namespace torch
68 changes: 68 additions & 0 deletions backends/cadence/hifi/operators/op_sigmoid.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <cmath>

#include <executorch/kernels/portable/cpu/util/functional_util.h>
#include <executorch/runtime/kernel/kernel_includes.h>
#include "kernels.h"

namespace torch {
namespace executor {
namespace native {

using Tensor = exec_aten::Tensor;

Tensor& sigmoid_out(RuntimeContext& ctx, const Tensor& in, Tensor& out) {
(void)ctx;

ET_KERNEL_CHECK(
ctx, in.scalar_type() != ScalarType::Bool, InvalidArgument, out);
ET_KERNEL_CHECK(ctx, tensor_is_floating_type(out), InvalidArgument, out);

// Resize for dynamic shape
ET_KERNEL_CHECK_MSG(
ctx,
resize_tensor(out, in.sizes()) == Error::Ok,
InvalidArgument,
out,
"Failed to resize output tensor.");

ScalarType in_type = in.scalar_type();
ScalarType out_type = out.scalar_type();

if(in_type == ScalarType::Float)
{
float* data_in = in.mutable_data_ptr<float>();
float* data_out = out.mutable_data_ptr<float>();
xa_nn_vec_sigmoid_f32_f32(data_out, data_in, in.numel());
}
else
{
ET_SWITCH_REALHB_TYPES(in_type, ctx, "sigmoid.out", CTYPE_IN, [&]() {
ET_SWITCH_FLOATH_TYPES(out_type, ctx, "sigmoid.out", CTYPE_OUT, [&]() {
apply_unary_map_fn(
[](const CTYPE_IN val_in) {
// perform math in double to preserve precision
double in_casted = static_cast<double>(val_in);
double out_val = 1.0 / (1.0 + exp(-in_casted));
return static_cast<CTYPE_OUT>(out_val);
},
in.const_data_ptr<CTYPE_IN>(),
out.mutable_data_ptr<CTYPE_OUT>(),
in.numel());
});
});
}

return out;
}

} // namespace native
} // namespace executor
} // namespace torch
Loading
Loading