Skip to content

Commit

Permalink
For broadcast, added support to process distinct input dimensions (py…
Browse files Browse the repository at this point in the history
…torch#7107)

For broadcast, added support for distinct dimensions for both the
inputs. Also, added support for processing dimension size more than 5.

Signed-off-by: [email protected] <[email protected]>
Co-authored-by: [email protected] <[email protected]>
  • Loading branch information
ckmadhira and [email protected] authored Nov 27, 2024
1 parent 1dab7a9 commit d136206
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 30 deletions.
48 changes: 33 additions & 15 deletions backends/cadence/fusion_g3/operators/op_add.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,27 +76,45 @@ Tensor& add_out(
int inp2_shape[kTensorDimensionLimit];
int out_shape[kTensorDimensionLimit];

/* input shapes and output shapes */
for (auto i = 0; i < a_size.size(); i++) {
inp1_shape[i] = a_size[i];
}

for (auto i = 0; i < b_size.size(); i++) {
inp2_shape[i] = b_size[i];
}

for (auto i = 0; i < out_size.size(); i++) {
out_shape[i] = out_size[i];
}

/*find broadcast*/
const bool a_is_broadcasted = !out.sizes().equals(a.sizes());
const bool b_is_broadcasted = !out.sizes().equals(b.sizes());
const bool broadcast = (a_is_broadcasted || b_is_broadcasted);

int max_dim = a.dim() > b.dim() ? a.dim() : b.dim();
max_dim = out.dim() > max_dim ? out.dim() : max_dim;

if (compute_type == ScalarType::Int) {
bool optimized = 1;

if ((a.dim() == 0) || (b.dim() == 0)) {
optimized = 0;
}

if ((broadcast == 1) && (max_dim > kTensorDimensionLimit)) {
optimized = 0;
}

for (int i = 0; i < max_dim; i++) {
out_shape[i] = 1;
inp1_shape[i] = 1;
inp2_shape[i] = 1;
}

int offset_out = max_dim - out.dim();
int offset_inp1 = max_dim - a.dim();
int offset_inp2 = max_dim - b.dim();

for (int i = 0; i < out.dim(); i++) {
out_shape[i + offset_out] = out.size(i);
}
for (int i = 0; i < a.dim(); i++) {
inp1_shape[i + offset_inp1] = a.size(i);
}
for (int i = 0; i < b.dim(); i++) {
inp2_shape[i + offset_inp2] = b.size(i);
}

if ((compute_type == ScalarType::Int) && (optimized)){
const int* const inp1_data = a.const_data_ptr<int>();
const int* const inp2_data = b.const_data_ptr<int>();
int* const out_data = out.mutable_data_ptr<int>();
Expand All @@ -117,7 +135,7 @@ Tensor& add_out(
xa_nn_elm_add_32x32_32(
out_data, inp1_data, inp2_data, alpha_val, out.numel());
}
} else if (compute_type == ScalarType::Float) {
} else if ((compute_type == ScalarType::Float) && (optimized)) {
const float* const inp1_data = a.const_data_ptr<float>();
const float* const inp2_data = b.const_data_ptr<float>();
float* const out_data = out.mutable_data_ptr<float>();
Expand Down
48 changes: 33 additions & 15 deletions backends/cadence/fusion_g3/operators/op_mul.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,27 +68,45 @@ Tensor& mul_out(
int inp2_shape[kTensorDimensionLimit];
int out_shape[kTensorDimensionLimit];

/* input shapes and output shapes */
for (auto i = 0; i < a_size.size(); i++) {
inp1_shape[i] = a_size[i];
}

for (auto i = 0; i < b_size.size(); i++) {
inp2_shape[i] = b_size[i];
}

for (auto i = 0; i < out_size.size(); i++) {
out_shape[i] = out_size[i];
}

/*find broadcast*/
const bool a_is_broadcasted = !out.sizes().equals(a.sizes());
const bool b_is_broadcasted = !out.sizes().equals(b.sizes());
const bool broadcast = (a_is_broadcasted || b_is_broadcasted);

int max_dim = a.dim() > b.dim() ? a.dim() : b.dim();
max_dim = out.dim() > max_dim ? out.dim() : max_dim;

if (compute_type == ScalarType::Int) {
bool optimized = 1;

if ((a.dim() == 0) || (b.dim() == 0)) {
optimized = 0;
}

if ((broadcast == 1) && (max_dim > kTensorDimensionLimit)) {
optimized = 0;
}

for (int i = 0; i < max_dim; i++) {
out_shape[i] = 1;
inp1_shape[i] = 1;
inp2_shape[i] = 1;
}

int offset_out = max_dim - out.dim();
int offset_inp1 = max_dim - a.dim();
int offset_inp2 = max_dim - b.dim();

for (int i = 0; i < out.dim(); i++) {
out_shape[i + offset_out] = out.size(i);
}
for (int i = 0; i < a.dim(); i++) {
inp1_shape[i + offset_inp1] = a.size(i);
}
for (int i = 0; i < b.dim(); i++) {
inp2_shape[i + offset_inp2] = b.size(i);
}

if ((compute_type == ScalarType::Int) && (optimized)) {
const int* const inp1_data = a.const_data_ptr<int>();
const int* const inp2_data = b.const_data_ptr<int>();
int* const out_data = out.mutable_data_ptr<int>();
Expand All @@ -105,7 +123,7 @@ Tensor& mul_out(
} else {
xa_nn_elm_mul_32x32_32(out_data, inp1_data, inp2_data, out.numel());
}
} else if (compute_type == ScalarType::Float) {
} else if ((compute_type == ScalarType::Float) && (optimized)) {
const float* const inp1_data = a.const_data_ptr<float>();
const float* const inp2_data = b.const_data_ptr<float>();
float* const out_data = out.mutable_data_ptr<float>();
Expand Down

0 comments on commit d136206

Please sign in to comment.