diff --git a/Include/arm_nnfunctions.h b/Include/arm_nnfunctions.h
index d8062f80..ade3417c 100644
--- a/Include/arm_nnfunctions.h
+++ b/Include/arm_nnfunctions.h
@@ -21,8 +21,8 @@
* Title: arm_nnfunctions.h
* Description: Public header file for CMSIS NN Library
*
- * $Date: 9 January 2024
- * $Revision: V.12.5.1
+ * $Date: 11 January 2024
+ * $Revision: V.12.6.0
*
* Target : Arm(R) M-Profile Architecture
* -------------------------------------------------------------------- */
@@ -365,8 +365,10 @@ arm_cmsis_nn_status arm_convolve_s4(const cmsis_nn_context *ctx,
* It contains the multiplier and shift values to be applied to each output channel
* @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
* @param[in] input_data Input (activation) data pointer. Data type: int8
- * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the
- * spatial filter dimensions
+ * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, CK] where HK, WK and CK are the
+ * spatial filter dimensions. CK != C_IN is used for grouped convolution, in which
+ * case the required conditions are C_IN = N * CK and C_OUT = N * M for N groups of
+ * size M.
* @param[in] filter_data Filter data pointer. Data type: int8
* @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
* @param[in] bias_data Optional bias data pointer. Data type: int32
diff --git a/Include/arm_nnsupportfunctions.h b/Include/arm_nnsupportfunctions.h
index 9c0ebda5..20cbfd38 100644
--- a/Include/arm_nnsupportfunctions.h
+++ b/Include/arm_nnsupportfunctions.h
@@ -21,8 +21,8 @@
* Title: arm_nnsupportfunctions.h
* Description: Public header file of support functions for CMSIS NN Library
*
- * $Date: 9 January 2024
- * $Revision: V.17.6.3
+ * $Date: 11 January 2024
+ * $Revision: V.17.7.0
*
* Target : Arm(R) M-Profile Architecture
* -------------------------------------------------------------------- */
@@ -423,6 +423,7 @@ arm_cmsis_nn_status arm_nn_mat_mult_nt_t_s4(const int8_t *lhs,
* @param[in] dst_offset Offset to be applied the output result
* @param[in] activation_min Minimum value to clamp down the output. Range : int8
* @param[in] activation_max Maximum value to clamp up the output. Range : int8
+ * @param[in] row_address_offset Address offset between rows in output. NOTE: Only used for MVEI extension.
* @param[in] lhs_cols_offset Column offset between subsequent lhs_rows
*
* @return The function returns ARM_CMSIS_NN_SUCCESS
@@ -441,6 +442,7 @@ arm_cmsis_nn_status arm_nn_mat_mult_nt_t_s8(const int8_t *lhs,
const int32_t dst_offset,
const int32_t activation_min,
const int32_t activation_max,
+ const int32_t row_address_offset,
const int32_t lhs_cols_offset);
/**
@@ -1006,6 +1008,47 @@ int8_t *arm_nn_mat_mult_kernel_s8_s16(const int8_t *input_a,
const int32_t *const output_bias,
int8_t *out_0);
+/**
+ * @brief Matrix-multiplication function for convolution with per-channel requantization, supporting an address offset
+ * between rows.
+ * @param[in] input_a pointer to operand A
+ * @param[in] input_b pointer to operand B, always consists of 2 vectors.
+ * @param[in] output_ch number of rows of A
+ * @param[in] out_shift pointer to per output channel requantization shift parameter.
+ * @param[in] out_mult pointer to per output channel requantization multiplier parameter.
+ * @param[in] out_offset output tensor offset.
+ * @param[in] activation_min minimum value to clamp the output to. Range : int8
+ * @param[in] activation_max maximum value to clamp the output to. Range : int8
+ * @param[in] num_col_a number of columns of A
+ * @param[in] aligned_num_col_a number of columns of A aligned by 4
+ * @param[in] output_bias per output channel bias. Range : int32
+ * @param[in] row_address_offset address offset between rows in the output
+ * @param[in,out] out_0 pointer to output
+ * @return The function returns one of the two
+ * 1. The incremented output pointer for a successful operation or
+ * 2. NULL if implementation is not available.
+ *
+ * @details This function does the matrix multiplication of weight matrix for all output channels
+ * with 2 columns from im2col and produces two elements/output_channel. The outputs are
+ * clamped in the range provided by activation min and max.
+ *
+ * This function is slighly less performant than arm_nn_mat_mult_kernel_s8_s16, but allows support for
+ * grouped convolution. Supported framework: TensorFlow Lite micro.
+ */
+int8_t *arm_nn_mat_mult_kernel_row_offset_s8_s16(const int8_t *input_a,
+ const int16_t *input_b,
+ const uint16_t output_ch,
+ const int32_t *out_shift,
+ const int32_t *out_mult,
+ const int32_t out_offset,
+ const int16_t activation_min,
+ const int16_t activation_max,
+ const int32_t num_col_a,
+ const int32_t aligned_num_col_a,
+ const int32_t *const output_bias,
+ const int32_t row_address_offset,
+ int8_t *out_0);
+
/**
* @brief Common softmax function for s8 input and s8 or s16 output
* @param[in] input Pointer to the input tensor
diff --git a/Source/ConvolutionFunctions/arm_convolve_1_x_n_s8.c b/Source/ConvolutionFunctions/arm_convolve_1_x_n_s8.c
index 9e2aeb05..a255fe5e 100644
--- a/Source/ConvolutionFunctions/arm_convolve_1_x_n_s8.c
+++ b/Source/ConvolutionFunctions/arm_convolve_1_x_n_s8.c
@@ -1,5 +1,5 @@
/*
- * SPDX-FileCopyrightText: Copyright 2010-2023 Arm Limited and/or its affiliates
+ * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates
*
* SPDX-License-Identifier: Apache-2.0
*
@@ -21,8 +21,8 @@
* Title: arm_convolve_1_x_n_s8.c
* Description: s8 version of 1xN convolution using symmetric quantization.
*
- * $Date: 8 March 2023
- * $Revision: V.3.4.0
+ * $Date: 04 January 2024
+ * $Revision: V.3.5.0
*
* Target : Arm(R) M-Profile Architecture
*
@@ -30,7 +30,6 @@
#include "arm_nnfunctions.h"
#include "arm_nnsupportfunctions.h"
-
/**
* @ingroup Public
*/
@@ -149,6 +148,7 @@ arm_cmsis_nn_status arm_convolve_1_x_n_s8(const cmsis_nn_context *ctx,
conv_params->output_offset,
conv_params->activation.min,
conv_params->activation.max,
+ rhs_rows,
lhs_offset);
output_data += lhs_rows * rhs_rows;
diff --git a/Source/ConvolutionFunctions/arm_convolve_1x1_s8.c b/Source/ConvolutionFunctions/arm_convolve_1x1_s8.c
index 7f3030eb..7cebffc0 100644
--- a/Source/ConvolutionFunctions/arm_convolve_1x1_s8.c
+++ b/Source/ConvolutionFunctions/arm_convolve_1x1_s8.c
@@ -1,5 +1,5 @@
/*
- * SPDX-FileCopyrightText: Copyright 2022-2023 Arm Limited and/or its affiliates
+ * SPDX-FileCopyrightText: Copyright 2022-2024 Arm Limited and/or its affiliates
*
* SPDX-License-Identifier: Apache-2.0
*
@@ -21,8 +21,8 @@
* Title: arm_convolve_1x1_s8.c
* Description: Generic s8 version of 1x1 convolution
*
- * $Date: 20 January 2023
- * $Revision: V.1.0.1
+ * $Date: 04 January 2024
+ * $Revision: V.1.1.0
*
* Target : Arm(R) M-Profile Architecture
*
@@ -96,6 +96,7 @@ arm_cmsis_nn_status arm_convolve_1x1_s8(const cmsis_nn_context *ctx,
conv_params->output_offset,
conv_params->activation.min,
conv_params->activation.max,
+ rhs_rows,
rhs_cols * stride_w);
if (result != ARM_CMSIS_NN_SUCCESS)
{
diff --git a/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c b/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c
index e832d0b3..8a400f2b 100644
--- a/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c
+++ b/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c
@@ -1,5 +1,5 @@
/*
- * SPDX-FileCopyrightText: Copyright 2010-2023 Arm Limited and/or its affiliates
+ * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates
*
* SPDX-License-Identifier: Apache-2.0
*
@@ -21,8 +21,8 @@
* Title: arm_convolve_1x1_s8_fast.c
* Description: Fast s8 version of 1x1 convolution (non-square shape)
*
- * $Date: 30 October 2023
- * $Revision: V.3.4.0
+ * $Date: 04 January 2024
+ * $Revision: V.3.5.0
*
* Target : Arm(R) M-Profile Architecture
*
@@ -86,6 +86,7 @@ arm_cmsis_nn_status arm_convolve_1x1_s8_fast(const cmsis_nn_context *ctx,
conv_params->output_offset,
conv_params->activation.min,
conv_params->activation.max,
+ rhs_rows,
rhs_cols);
/* Return to application */
diff --git a/Source/ConvolutionFunctions/arm_convolve_s8.c b/Source/ConvolutionFunctions/arm_convolve_s8.c
index 0b14eaf4..8c4ac67a 100644
--- a/Source/ConvolutionFunctions/arm_convolve_s8.c
+++ b/Source/ConvolutionFunctions/arm_convolve_s8.c
@@ -1,5 +1,5 @@
/*
- * SPDX-FileCopyrightText: Copyright 2010-2023 Arm Limited and/or its affiliates
+ * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates
*
* SPDX-License-Identifier: Apache-2.0
*
@@ -21,8 +21,8 @@
* Title: arm_convolve_s8.c
* Description: s8 version of convolution using symmetric quantization.
*
- * $Date: 08 June 2023
- * $Revision: V.3.5.0
+ * $Date: 04 January 2024
+ * $Revision: V.3.6.0
*
* Target : Arm(R) M-Profile Architecture
*
@@ -30,7 +30,6 @@
#include "arm_nnfunctions.h"
#include "arm_nnsupportfunctions.h"
-
/**
* @ingroup Public
*/
@@ -73,6 +72,7 @@ arm_cmsis_nn_status arm_convolve_s8(const cmsis_nn_context *ctx,
const uint16_t input_ch = input_dims->c;
const uint16_t kernel_x = filter_dims->w;
const uint16_t kernel_y = filter_dims->h;
+ const uint16_t kernel_ch = filter_dims->c;
const uint16_t output_x = output_dims->w;
const uint16_t output_y = output_dims->h;
const uint16_t output_ch = output_dims->c;
@@ -86,20 +86,26 @@ arm_cmsis_nn_status arm_convolve_s8(const cmsis_nn_context *ctx,
const int32_t out_offset = conv_params->output_offset;
const int32_t out_activation_min = conv_params->activation.min;
const int32_t out_activation_max = conv_params->activation.max;
- const int32_t rhs_cols = kernel_x * kernel_y * input_ch;
const int32_t input_offset = conv_params->input_offset;
+ const int32_t groups = input_ch / kernel_ch;
+ const int32_t rhs_cols = kernel_x * kernel_y * kernel_ch;
+ const int32_t output_ch_per_group = output_ch / groups;
+
int32_t *output_mult = quant_params->multiplier;
int32_t *output_shift = quant_params->shift;
+ if (input_ch % groups != 0 || output_ch % groups != 0)
+ {
+ return ARM_CMSIS_NN_ARG_ERROR;
+ }
+
int i_batch;
for (i_batch = 0; i_batch < input_batches; i_batch++)
{
-
#if defined(ARM_MATH_MVEI)
/* Generate up to four columns from the input tensor a GEMM computation */
int8_t *im2col_buf = (int8_t *)buffer_a;
- const int32_t rhs_rows = output_dims->c;
#else
const int32_t remainder = rhs_cols % 4;
const int32_t aligned_rhs_cols = remainder != 0 ? rhs_cols + 4 - remainder : rhs_cols;
@@ -108,175 +114,221 @@ arm_cmsis_nn_status arm_convolve_s8(const cmsis_nn_context *ctx,
int8_t *im2col_buf = (int8_t *)buffer_a + aligned_rhs_cols * 2;
int16_t *im2col_buf_start_s16 = buffer_a;
#endif
- int8_t *out = output_data;
int32_t lhs_rows = 0;
+ const int8_t *filter_data_ptr = &filter_data[0];
+ const int32_t *bias_data_ptr = &bias_data[0];
+ const int32_t *output_mult_ptr = &output_mult[0];
+ const int32_t *output_shift_ptr = &output_shift[0];
+
/* This part implements the im2col function */
- for (int i_out_y = 0; i_out_y < output_y; i_out_y++)
+ for (int32_t i_group = 0; i_group < groups; i_group++)
{
- for (int i_out_x = 0; i_out_x < output_x; i_out_x++)
+ int8_t *out = output_data + i_group * output_ch_per_group;
+ for (int i_out_y = 0; i_out_y < output_y; i_out_y++)
{
- const int32_t base_idx_x = stride_x * i_out_x - pad_x;
- const int32_t base_idx_y = stride_y * i_out_y - pad_y;
-
- for (int32_t i_ker_y = 0; i_ker_y < kernel_y; i_ker_y++)
+ for (int i_out_x = 0; i_out_x < output_x; i_out_x++)
{
- for (int32_t i_ker_x = 0; i_ker_x < kernel_x; i_ker_x++)
- {
- const int32_t k_y = base_idx_y + dilation_y * i_ker_y;
- const int32_t k_x = base_idx_x + dilation_x * i_ker_x;
+ const int32_t base_idx_x = stride_x * i_out_x - pad_x;
+ const int32_t base_idx_y = stride_y * i_out_y - pad_y;
- if (k_y < 0 || k_y >= input_y || k_x < 0 || k_x >= input_x)
- {
- arm_memset_s8(im2col_buf, (int8_t)-input_offset, sizeof(int8_t) * input_ch);
- }
- else
+ for (int32_t i_ker_y = 0; i_ker_y < kernel_y; i_ker_y++)
+ {
+ for (int32_t i_ker_x = 0; i_ker_x < kernel_x; i_ker_x++)
{
- arm_memcpy_s8(im2col_buf, input_data + (k_y * input_x + k_x) * input_ch, input_ch);
+ const int32_t k_y = base_idx_y + dilation_y * i_ker_y;
+ const int32_t k_x = base_idx_x + dilation_x * i_ker_x;
+
+ if (k_y < 0 || k_y >= input_y || k_x < 0 || k_x >= input_x)
+ {
+ arm_memset_s8(im2col_buf, (int8_t)-input_offset, sizeof(int8_t) * kernel_ch);
+ }
+ else
+ {
+ arm_memcpy_s8(im2col_buf,
+ input_data + (k_y * input_x + k_x) * input_ch + i_group * kernel_ch,
+ sizeof(int8_t) * kernel_ch);
+ }
+ im2col_buf += kernel_ch;
}
- im2col_buf += input_ch;
}
- }
- lhs_rows++;
+ lhs_rows++;
#if defined(ARM_MATH_MVEI)
- /* Computation is filed for every 4 columns */
- if (lhs_rows == 4)
- {
- arm_nn_mat_mult_nt_t_s8((int8_t *)buffer_a,
- filter_data,
- bias_data,
- out,
- output_mult,
- output_shift,
- lhs_rows,
- rhs_rows,
- rhs_cols,
- input_offset,
- out_offset,
- out_activation_min,
- out_activation_max,
- rhs_cols);
- out += lhs_rows * rhs_rows;
-
- lhs_rows = 0;
- im2col_buf = (int8_t *)buffer_a;
- }
+
+ /* Computation is filed for every 4 columns */
+ if (lhs_rows == 4)
+ {
+ arm_nn_mat_mult_nt_t_s8((int8_t *)buffer_a,
+ filter_data_ptr,
+ bias_data_ptr,
+ out,
+ output_mult_ptr,
+ output_shift_ptr,
+ lhs_rows,
+ output_ch_per_group,
+ rhs_cols,
+ input_offset,
+ out_offset,
+ out_activation_min,
+ out_activation_max,
+ output_ch,
+ rhs_cols);
+
+ out += lhs_rows * output_ch;
+
+ lhs_rows = 0;
+ im2col_buf = (int8_t *)buffer_a;
+ }
#else
#if defined(ARM_MATH_DSP)
- /* Copy one column with input offset and no ordering */
- arm_s8_to_s16_unordered_with_offset(
- im2col_buf - rhs_cols, im2col_buf_start_s16, rhs_cols, (int16_t)input_offset);
+ /* Copy one column with input offset and no ordering */
+ arm_s8_to_s16_unordered_with_offset(
+ im2col_buf - rhs_cols, im2col_buf_start_s16, rhs_cols, (int16_t)input_offset);
#else
- arm_q7_to_q15_with_offset(im2col_buf - rhs_cols, im2col_buf_start_s16, rhs_cols, (int16_t)input_offset);
+
+ arm_q7_to_q15_with_offset(
+ im2col_buf - rhs_cols, im2col_buf_start_s16, rhs_cols, (int16_t)input_offset);
+
#endif
- im2col_buf_start_s16 += aligned_rhs_cols;
+ im2col_buf_start_s16 += aligned_rhs_cols;
- if (lhs_rows == 2)
- {
- out = arm_nn_mat_mult_kernel_s8_s16(filter_data,
- buffer_a,
- output_ch,
- output_shift,
- output_mult,
- out_offset,
- out_activation_min,
- out_activation_max,
- rhs_cols,
- aligned_rhs_cols,
- bias_data,
- out);
-
- /* counter reset */
- im2col_buf_start_s16 = buffer_a;
- im2col_buf = (int8_t *)buffer_a + aligned_rhs_cols * 2;
- lhs_rows = 0;
- }
+ if (lhs_rows == 2)
+ {
+ if (groups > 1)
+ {
+ out = arm_nn_mat_mult_kernel_row_offset_s8_s16(filter_data_ptr,
+ buffer_a,
+ output_ch_per_group,
+ output_shift_ptr,
+ output_mult_ptr,
+ out_offset,
+ out_activation_min,
+ out_activation_max,
+ rhs_cols,
+ aligned_rhs_cols,
+ bias_data_ptr,
+ output_ch,
+ out);
+ }
+ else
+ {
+ out = arm_nn_mat_mult_kernel_s8_s16(filter_data_ptr,
+ buffer_a,
+ output_ch_per_group,
+ output_shift_ptr,
+ output_mult_ptr,
+ out_offset,
+ out_activation_min,
+ out_activation_max,
+ rhs_cols,
+ aligned_rhs_cols,
+ bias_data_ptr,
+ out);
+ }
+
+ /* counter reset */
+ im2col_buf_start_s16 = buffer_a;
+ im2col_buf = (int8_t *)buffer_a + aligned_rhs_cols * 2;
+ lhs_rows = 0;
+ }
#endif
+ }
}
if (out == NULL)
{
return ARM_CMSIS_NN_NO_IMPL_ERROR;
}
- }
- /* Handle left over columns */
- if (lhs_rows != 0)
- {
+ /* Handle left over columns */
+ if (lhs_rows != 0)
+ {
#if defined(ARM_MATH_MVEI)
- arm_nn_mat_mult_nt_t_s8((int8_t *)buffer_a,
- filter_data,
- bias_data,
- out,
- output_mult,
- output_shift,
- lhs_rows,
- rhs_rows,
- rhs_cols,
- input_offset,
- out_offset,
- out_activation_min,
- out_activation_max,
- rhs_cols);
- out += lhs_rows * rhs_rows;
- lhs_rows = 0;
- im2col_buf = (int8_t *)buffer_a;
+ arm_nn_mat_mult_nt_t_s8((int8_t *)buffer_a,
+ filter_data_ptr,
+ bias_data_ptr,
+ out,
+ output_mult_ptr,
+ output_shift_ptr,
+ lhs_rows,
+ output_ch_per_group,
+ rhs_cols,
+ input_offset,
+ out_offset,
+ out_activation_min,
+ out_activation_max,
+ output_ch,
+ rhs_cols);
+
+ out += lhs_rows * output_ch;
+ lhs_rows = 0;
+ im2col_buf = (int8_t *)buffer_a;
#else // #if defined(ARM_MATH_MVEI)
- const int8_t *ker_a = filter_data;
- int i;
+ const int8_t *ker_a = filter_data_ptr;
+ int i;
- for (i = 0; i < output_ch; i++)
- {
- /* Load the accumulator with bias first */
- int32_t sum = 0;
- if (bias_data)
+ for (i = 0; i < output_ch_per_group; i++)
{
- sum = bias_data[i];
- }
+ /* Load the accumulator with bias first */
+ int32_t sum = 0;
+ if (bias_data_ptr)
+ {
+ sum = bias_data_ptr[i];
+ }
- const int16_t *ip_as_col = buffer_a;
+ const int16_t *ip_as_col = buffer_a;
#if defined(ARM_MATH_DSP)
- /* 4 multiply and accumulates are done in one loop. */
- uint16_t col_count = rhs_cols / 4;
- while (col_count)
- {
- int32_t ker_a1, ker_a2;
- int32_t ip_b1, ip_b2;
+ /* 4 multiply and accumulates are done in one loop. */
+ uint16_t col_count = rhs_cols / 4;
+ while (col_count)
+ {
+ int32_t ker_a1, ker_a2;
+ int32_t ip_b1, ip_b2;
- ker_a = read_and_pad_reordered(ker_a, &ker_a1, &ker_a2);
+ ker_a = read_and_pad_reordered(ker_a, &ker_a1, &ker_a2);
- ip_b1 = arm_nn_read_q15x2_ia(&ip_as_col);
- sum = SMLAD(ker_a1, ip_b1, sum);
- ip_b2 = arm_nn_read_q15x2_ia(&ip_as_col);
- sum = SMLAD(ker_a2, ip_b2, sum);
+ ip_b1 = arm_nn_read_q15x2_ia(&ip_as_col);
+ sum = SMLAD(ker_a1, ip_b1, sum);
+ ip_b2 = arm_nn_read_q15x2_ia(&ip_as_col);
+ sum = SMLAD(ker_a2, ip_b2, sum);
- col_count--;
- }
- /* Handle left over mac */
- col_count = rhs_cols & 0x3;
+ col_count--;
+ }
+ /* Handle left over mac */
+ col_count = rhs_cols & 0x3;
#else
- uint16_t col_count = rhs_cols;
+ uint16_t col_count = rhs_cols;
+
#endif
- while (col_count)
- {
- int8_t ker_a1 = *ker_a++;
- int16_t ip_b1 = *ip_as_col++;
- sum += ker_a1 * ip_b1;
- col_count--;
+ while (col_count)
+ {
+ int8_t ker_a1 = *ker_a++;
+ int16_t ip_b1 = *ip_as_col++;
+
+ sum += ker_a1 * ip_b1;
+ col_count--;
+ }
+
+ sum = arm_nn_requantize(sum, output_mult_ptr[i], output_shift_ptr[i]);
+ sum += out_offset;
+ sum = MAX(sum, out_activation_min);
+ sum = MIN(sum, out_activation_max);
+ *out++ = (int8_t)sum;
}
- sum = arm_nn_requantize(sum, output_mult[i], output_shift[i]);
- sum += out_offset;
- sum = MAX(sum, out_activation_min);
- sum = MIN(sum, out_activation_max);
- *out++ = (int8_t)sum;
- }
+ im2col_buf_start_s16 = buffer_a;
+ im2col_buf = (int8_t *)buffer_a + aligned_rhs_cols * 2;
+ lhs_rows = 0;
#endif // #if defined(ARM_MATH_MVEI)
+ }
+ filter_data_ptr += output_ch_per_group * rhs_cols;
+ bias_data_ptr += output_ch_per_group;
+ output_mult_ptr += output_ch_per_group;
+ output_shift_ptr += output_ch_per_group;
}
-
/* Advance to the next batch */
input_data += (input_x * input_y * input_ch);
output_data += (output_x * output_y * output_ch);
diff --git a/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c b/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c
index 07f29327..98f90585 100644
--- a/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c
+++ b/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c
@@ -1,5 +1,5 @@
/*
- * SPDX-FileCopyrightText: Copyright 2010-2023 Arm Limited and/or its affiliates
+ * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates
*
* SPDX-License-Identifier: Apache-2.0
*
@@ -22,8 +22,8 @@
* Description: s8 convolution layer wrapper function with the main purpose to call the optimal kernel available in
* cmsis-nn to perform the convolution.
*
- * $Date: 8 March 2023
- * $Revision: V.2.4.0
+ * $Date: 04 January 2024
+ * $Revision: V.2.5.0
*
* Target : Arm(R) M-Profile Architecture
*
@@ -60,7 +60,8 @@ arm_cmsis_nn_status arm_convolve_wrapper_s8(const cmsis_nn_context *ctx,
int8_t *output_data)
{
if ((conv_params->padding.w == 0) && (conv_params->padding.h == 0) && (filter_dims->w == 1) &&
- (filter_dims->h == 1) && (conv_params->dilation.w == 1 && conv_params->dilation.h == 1))
+ (filter_dims->h == 1) && (conv_params->dilation.w == 1 && conv_params->dilation.h == 1) &&
+ (input_dims->c == filter_dims->c))
{
if ((conv_params->stride.w == 1) && (conv_params->stride.h == 1))
{
@@ -92,7 +93,7 @@ arm_cmsis_nn_status arm_convolve_wrapper_s8(const cmsis_nn_context *ctx,
}
}
else if ((input_dims->h == 1) && conv_params->dilation.w == 1 && (filter_dims->h == 1) &&
- ((conv_params->stride.w * input_dims->c) % 4 == 0))
+ ((conv_params->stride.w * input_dims->c) % 4 == 0) && (input_dims->c == filter_dims->c))
{
return arm_convolve_1_x_n_s8(ctx,
conv_params,
diff --git a/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_row_offset_s8_s16.c b/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_row_offset_s8_s16.c
new file mode 100644
index 00000000..7a400e12
--- /dev/null
+++ b/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_row_offset_s8_s16.c
@@ -0,0 +1,253 @@
+/*
+ * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project: CMSIS NN Library
+ * Title: arm_nn_mat_mult_kernel_row_offset_s8_s16.c
+ * Description: Matrix-multiplication function for grouped convolution
+ *
+ * $Date: 04 January 2024
+ * $Revision: V.1.0.0
+ *
+ * Target : Arm(R) M-Profile Architecture
+ * -------------------------------------------------------------------- */
+
+#include "arm_nnfunctions.h"
+#include "arm_nnsupportfunctions.h"
+/*
+ * Matrix-multiplication function for convolution with per-channel requantization, supporting an address offset between
+ * rows.
+ *
+ * Refer header file for details.
+ *
+ */
+
+int8_t *arm_nn_mat_mult_kernel_row_offset_s8_s16(const int8_t *input_a,
+ const int16_t *input_b,
+ const uint16_t output_ch,
+ const int32_t *out_shift,
+ const int32_t *out_mult,
+ const int32_t out_offset,
+ const int16_t activation_min,
+ const int16_t activation_max,
+ const int32_t num_col_a,
+ const int32_t aligned_num_col_a,
+ const int32_t *const output_bias,
+ const int32_t row_address_offset,
+ int8_t *out_0)
+{
+
+#if !defined(ARM_MATH_MVEI)
+ /* set up the second output pointers */
+
+ int8_t *out_1 = out_0 + row_address_offset;
+ const int32_t *bias = output_bias;
+
+ uint16_t row_count = output_ch / 2;
+ const int8_t *ip_a0 = input_a;
+ /* this loop over rows in A */
+ while (row_count)
+ {
+ /* setup pointers for B */
+ const int16_t *ip_b0 = input_b;
+ const int16_t *ip_b1 = ip_b0 + aligned_num_col_a;
+
+ /* align the second pointer for A */
+ const int8_t *ip_a1 = ip_a0 + num_col_a;
+
+ int32_t ch_0_out_0 = 0;
+ int32_t ch_0_out_1 = 0;
+ int32_t ch_1_out_0 = 0;
+ int32_t ch_1_out_1 = 0;
+ /* Init accumulator with bias for channel N and N + 1 */
+ if (bias)
+ {
+ ch_0_out_0 = *bias;
+ ch_0_out_1 = *bias++;
+ ch_1_out_0 = *bias;
+ ch_1_out_1 = *bias++;
+ }
+
+ #if defined(ARM_MATH_DSP)
+ int32_t col_count = num_col_a / 4;
+ /* accumulate over the vector */
+ while (col_count)
+ {
+ int32_t a01, a02, a11, a12;
+ int32_t b0 = arm_nn_read_q15x2_ia(&ip_b0);
+ int32_t b1 = arm_nn_read_q15x2_ia(&ip_b1);
+
+ ip_a0 = read_and_pad_reordered(ip_a0, &a01, &a02);
+ ip_a1 = read_and_pad_reordered(ip_a1, &a11, &a12);
+
+ ch_0_out_0 = SMLAD(a01, b0, ch_0_out_0);
+ ch_0_out_1 = SMLAD(a01, b1, ch_0_out_1);
+ ch_1_out_0 = SMLAD(a11, b0, ch_1_out_0);
+ ch_1_out_1 = SMLAD(a11, b1, ch_1_out_1);
+
+ b0 = arm_nn_read_q15x2_ia(&ip_b0);
+ b1 = arm_nn_read_q15x2_ia(&ip_b1);
+
+ ch_0_out_0 = SMLAD(a02, b0, ch_0_out_0);
+ ch_0_out_1 = SMLAD(a02, b1, ch_0_out_1);
+ ch_1_out_0 = SMLAD(a12, b0, ch_1_out_0);
+ ch_1_out_1 = SMLAD(a12, b1, ch_1_out_1);
+
+ col_count--;
+ } /* while over col_count */
+
+ col_count = num_col_a & 0x3;
+
+ #else
+ int32_t col_count = num_col_a;
+ #endif
+ while (col_count)
+ {
+ int8_t a0 = *ip_a0++;
+ int16_t b0 = *ip_b0++;
+ int8_t a1 = *ip_a1++;
+ int16_t b1 = *ip_b1++;
+
+ ch_0_out_0 += a0 * b0;
+ ch_0_out_1 += a0 * b1;
+ ch_1_out_0 += a1 * b0;
+ ch_1_out_1 += a1 * b1;
+ col_count--;
+ } /* while over col_count */
+
+ ch_0_out_0 = arm_nn_requantize(ch_0_out_0, *out_mult, *out_shift);
+ ch_0_out_0 += out_offset;
+ ch_0_out_0 = MAX(ch_0_out_0, activation_min);
+ ch_0_out_0 = MIN(ch_0_out_0, activation_max);
+ *out_0++ = (int8_t)ch_0_out_0;
+
+ ch_0_out_1 = arm_nn_requantize(ch_0_out_1, *out_mult, *out_shift);
+ ch_0_out_1 += out_offset;
+ ch_0_out_1 = MAX(ch_0_out_1, activation_min);
+ ch_0_out_1 = MIN(ch_0_out_1, activation_max);
+ *out_1++ = (int8_t)ch_0_out_1;
+ out_mult++;
+ out_shift++;
+
+ ch_1_out_0 = arm_nn_requantize(ch_1_out_0, *out_mult, *out_shift);
+ ch_1_out_0 += out_offset;
+ ch_1_out_0 = MAX(ch_1_out_0, activation_min);
+ ch_1_out_0 = MIN(ch_1_out_0, activation_max);
+ *out_0++ = (int8_t)ch_1_out_0;
+
+ ch_1_out_1 = arm_nn_requantize(ch_1_out_1, *out_mult, *out_shift);
+ ch_1_out_1 += out_offset;
+ ch_1_out_1 = MAX(ch_1_out_1, activation_min);
+ ch_1_out_1 = MIN(ch_1_out_1, activation_max);
+ *out_1++ = (int8_t)ch_1_out_1;
+ out_mult++;
+ out_shift++;
+
+ /* skip row */
+ ip_a0 += num_col_a;
+ row_count--;
+ }
+
+ /* compute the last odd numbered row if any */
+ if (output_ch & 0x1)
+ {
+ /* setup pointers for B */
+ const int16_t *ip_b0 = input_b;
+ const int16_t *ip_b1 = ip_b0 + aligned_num_col_a;
+
+ int32_t ch_0_out_0 = 0;
+ int32_t ch_0_out_1 = 0;
+
+ /* load the bias */
+ if (bias)
+ {
+ ch_0_out_0 = *bias;
+ ch_0_out_1 = *bias++;
+ }
+
+ #if defined(ARM_MATH_DSP)
+ int32_t col_count = num_col_a >> 2;
+ while (col_count)
+ {
+ int32_t a01, a02;
+ int32_t b0 = arm_nn_read_q15x2_ia(&ip_b0);
+ int32_t b1 = arm_nn_read_q15x2_ia(&ip_b1);
+
+ ip_a0 = read_and_pad_reordered(ip_a0, &a01, &a02);
+
+ ch_0_out_0 = SMLAD(a01, b0, ch_0_out_0);
+ ch_0_out_1 = SMLAD(a01, b1, ch_0_out_1);
+
+ b0 = arm_nn_read_q15x2_ia(&ip_b0);
+ b1 = arm_nn_read_q15x2_ia(&ip_b1);
+ ch_0_out_0 = SMLAD(a02, b0, ch_0_out_0);
+ ch_0_out_1 = SMLAD(a02, b1, ch_0_out_1);
+
+ col_count--;
+ }
+ col_count = num_col_a & 0x3;
+
+ #else
+ int32_t col_count = num_col_a;
+ #endif
+ while (col_count)
+ {
+ int8_t a0 = *ip_a0++;
+ int16_t b0 = *ip_b0++;
+ int16_t b1 = *ip_b1++;
+
+ ch_0_out_0 += a0 * b0;
+ ch_0_out_1 += a0 * b1;
+ col_count--;
+ }
+
+ ch_0_out_0 = arm_nn_requantize(ch_0_out_0, *out_mult, *out_shift);
+ ch_0_out_0 += out_offset;
+ ch_0_out_0 = MAX(ch_0_out_0, activation_min);
+ ch_0_out_0 = MIN(ch_0_out_0, activation_max);
+ *out_0++ = (int8_t)ch_0_out_0;
+
+ ch_0_out_1 = arm_nn_requantize(ch_0_out_1, *out_mult, *out_shift);
+ ch_0_out_1 += out_offset;
+ ch_0_out_1 = MAX(ch_0_out_1, activation_min);
+ ch_0_out_1 = MIN(ch_0_out_1, activation_max);
+ *out_1++ = (int8_t)ch_0_out_1;
+ out_mult++;
+ out_shift++;
+ }
+
+ out_0 += 2 * row_address_offset - output_ch;
+
+ /* return the new output pointer with offset */
+ return out_0;
+#else
+ (void)input_a;
+ (void)input_b;
+ (void)output_ch;
+ (void)out_shift;
+ (void)out_mult;
+ (void)out_offset;
+ (void)activation_min;
+ (void)activation_max;
+ (void)aligned_num_col_a, (void)num_col_a;
+ (void)output_bias;
+ (void)row_address_offset;
+ (void)out_0;
+ return NULL;
+#endif
+}
diff --git a/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c b/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c
index 13c28c59..8b93ca49 100644
--- a/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c
+++ b/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c
@@ -1,5 +1,5 @@
/*
- * SPDX-FileCopyrightText: Copyright 2020-2023 Arm Limited and/or its affiliates
+ * SPDX-FileCopyrightText: Copyright 2020-2024 Arm Limited and/or its affiliates
*
* SPDX-License-Identifier: Apache-2.0
*
@@ -21,8 +21,8 @@
* Title: arm_nn_mat_mult_s8_nt_t_s8
* Description: Matrix multiplication support function with the right-hand-side (rhs) matrix transposed
*
- * $Date: 22 March 2023
- * $Revision: V.2.1.2
+ * $Date: 04 January 2024
+ * $Revision: V.3.0.0
*
* Target : Arm(R) M-Profile Architecture
*
@@ -58,6 +58,7 @@ arm_cmsis_nn_status arm_nn_mat_mult_nt_t_s8(const int8_t *lhs,
const int32_t dst_offset,
const int32_t activation_min,
const int32_t activation_max,
+ const int32_t row_address_offset,
const int32_t lhs_cols_offset)
{
@@ -140,12 +141,13 @@ arm_cmsis_nn_status arm_nn_mat_mult_nt_t_s8(const int8_t *lhs,
res = vmaxq_s32(res, vdupq_n_s32(activation_min));
res = vminq_s32(res, vdupq_n_s32(activation_max));
- const uint32x4_t scatter_offset = {0, (uint32_t)rhs_rows, (uint32_t)rhs_rows * 2, (uint32_t)rhs_rows * 3};
+ const uint32x4_t scatter_offset = {
+ 0, (uint32_t)row_address_offset, (uint32_t)row_address_offset * 2, (uint32_t)row_address_offset * 3};
vstrbq_scatter_offset_s32(dst, scatter_offset, res);
dst++;
}
lhs += 4 * lhs_cols_offset;
- dst += (3 * rhs_rows);
+ dst += 4 * row_address_offset - rhs_rows;
}
for (; i_items < lhs_rows; i_items++)
@@ -217,9 +219,11 @@ arm_cmsis_nn_status arm_nn_mat_mult_nt_t_s8(const int8_t *lhs,
acc_n0 = MIN(acc_n0, activation_max);
*dst++ = (int8_t)acc_n0;
}
+ dst += row_address_offset - rhs_rows;
}
#elif defined(ARM_MATH_DSP)
+ (void)row_address_offset;
const int32_t rhs_off0 = rhs_cols - 4;
const int32_t lhs_off0 = lhs_cols_offset - 4;
@@ -618,6 +622,7 @@ arm_cmsis_nn_status arm_nn_mat_mult_nt_t_s8(const int8_t *lhs,
}
}
#else
+ (void)row_address_offset;
for (int32_t rhs_rows_idx = 0; rhs_rows_idx <= (rhs_rows - 2); rhs_rows_idx += 2)
{
const int8_t *lhs_ptr = &lhs[0];
diff --git a/Tests/UnitTest/CMakeLists.txt b/Tests/UnitTest/CMakeLists.txt
index 3f24afe1..c6c0ef03 100644
--- a/Tests/UnitTest/CMakeLists.txt
+++ b/Tests/UnitTest/CMakeLists.txt
@@ -1,5 +1,5 @@
#
-# SPDX-FileCopyrightText: Copyright 2019-2023 Arm Limited and/or its affiliates
+# SPDX-FileCopyrightText: Copyright 2019-2024 Arm Limited and/or its affiliates
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -96,6 +96,7 @@ add_subdirectory(TestCases/test_arm_elementwise_mul_s8)
add_subdirectory(TestCases/test_arm_fully_connected_s16)
add_subdirectory(TestCases/test_arm_fully_connected_s8)
add_subdirectory(TestCases/test_arm_fully_connected_s4)
+add_subdirectory(TestCases/test_arm_grouped_convolve_s8)
add_subdirectory(TestCases/test_arm_lstm_unidirectional_s16_s8)
add_subdirectory(TestCases/test_arm_max_pool_s16)
add_subdirectory(TestCases/test_arm_max_pool_s8)
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv/biases_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv/biases_data.h
new file mode 100644
index 00000000..35f54d09
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv/biases_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include
+
+const int32_t grouped_conv_biases[1] = {-22573};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv/config_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv/config_data.h
new file mode 100644
index 00000000..054820ba
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv/config_data.h
@@ -0,0 +1,24 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#define GROUPED_CONV_OUT_CH 1
+#define GROUPED_CONV_IN_CH 1
+#define GROUPED_CONV_INPUT_W 7
+#define GROUPED_CONV_INPUT_H 7
+#define GROUPED_CONV_DST_SIZE 16
+#define GROUPED_CONV_INPUT_SIZE 49
+#define GROUPED_CONV_OUT_ACTIVATION_MIN -128
+#define GROUPED_CONV_OUT_ACTIVATION_MAX 127
+#define GROUPED_CONV_INPUT_BATCHES 1
+#define GROUPED_CONV_FILTER_X 3
+#define GROUPED_CONV_FILTER_Y 3
+#define GROUPED_CONV_STRIDE_X 2
+#define GROUPED_CONV_STRIDE_Y 2
+#define GROUPED_CONV_PAD_X 1
+#define GROUPED_CONV_PAD_Y 1
+#define GROUPED_CONV_OUTPUT_W 4
+#define GROUPED_CONV_OUTPUT_H 4
+#define GROUPED_CONV_INPUT_OFFSET 128
+#define GROUPED_CONV_OUTPUT_OFFSET 127
+#define GROUPED_CONV_DILATION_X 1
+#define GROUPED_CONV_DILATION_Y 1
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv/input_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv/input_data.h
new file mode 100644
index 00000000..e684c4ed
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv/input_data.h
@@ -0,0 +1,9 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include
+
+const int8_t grouped_conv_input[49] = {-97, 6, 19, -32, 76, 79, -74, 85, -66, -7, -29, 107, 118,
+ 65, 30, 45, -92, -123, -42, -25, -49, 74, -81, -2, -106, 3,
+ 69, -55, -13, -107, -59, -56, -30, 52, 124, 91, 90, 19, -71,
+ -47, 11, 113, 38, 77, -54, 24, 57, -90, 115};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv/output_mult_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv/output_mult_data.h
new file mode 100644
index 00000000..b897ee2a
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv/output_mult_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include
+
+const int32_t grouped_conv_output_mult[1] = {1972783891};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv/output_ref_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv/output_ref_data.h
new file mode 100644
index 00000000..ff634014
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv/output_ref_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include
+
+const int8_t grouped_conv_output_ref[16] = {112, 23, 7, 40, 48, 29, -15, 5, 28, 36, -12, 10, -7, -3, -16, -11};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv/output_shift_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv/output_shift_data.h
new file mode 100644
index 00000000..190c42cb
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv/output_shift_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include
+
+const int32_t grouped_conv_output_shift[1] = {-9};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv/test_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv/test_data.h
new file mode 100644
index 00000000..2c64febe
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv/test_data.h
@@ -0,0 +1,9 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#include "biases_data.h"
+#include "config_data.h"
+#include "input_data.h"
+#include "output_mult_data.h"
+#include "output_ref_data.h"
+#include "output_shift_data.h"
+#include "weights_data.h"
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv/weights_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv/weights_data.h
new file mode 100644
index 00000000..b00fa385
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv/weights_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include
+
+const int8_t grouped_conv_weights[9] = {-13, -73, -73, -113, -127, 4, -75, 117, -116};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_1/biases_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/biases_data.h
new file mode 100644
index 00000000..09b09752
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/biases_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include
+
+const int32_t grouped_conv_1_biases[6] = {0, 0, 0, 0, 0, 0};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_1/config_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/config_data.h
new file mode 100644
index 00000000..8c0bb9c9
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/config_data.h
@@ -0,0 +1,25 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#define GROUPED_CONV_1_OUT_CH 6
+#define GROUPED_CONV_1_IN_CH 2
+#define GROUPED_CONV_1_INPUT_W 5
+#define GROUPED_CONV_1_INPUT_H 5
+#define GROUPED_CONV_1_DST_SIZE 192
+#define GROUPED_CONV_1_INPUT_SIZE 50
+#define GROUPED_CONV_1_OUT_ACTIVATION_MIN -128
+#define GROUPED_CONV_1_OUT_ACTIVATION_MAX 127
+#define GROUPED_CONV_1_INPUT_BATCHES 2
+#define GROUPED_CONV_1_FILTER_X 2
+#define GROUPED_CONV_1_FILTER_Y 2
+#define GROUPED_CONV_1_STRIDE_X 1
+#define GROUPED_CONV_1_STRIDE_Y 1
+#define GROUPED_CONV_1_PAD_X 0
+#define GROUPED_CONV_1_PAD_Y 0
+#define GROUPED_CONV_1_OUTPUT_W 4
+#define GROUPED_CONV_1_OUTPUT_H 4
+#define GROUPED_CONV_1_INPUT_OFFSET 128
+#define GROUPED_CONV_1_OUTPUT_OFFSET -56
+#define GROUPED_CONV_1_DILATION_X 1
+#define GROUPED_CONV_1_DILATION_Y 1
+#define GROUPED_CONV_1_FILTER_CH 1
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_1/input_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/input_data.h
new file mode 100644
index 00000000..cd7ba022
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/input_data.h
@@ -0,0 +1,11 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include
+
+const int8_t grouped_conv_1_input[100] = {
+ 34, -101, 76, -112, -64, -54, 24, 76, -103, 12, 81, -64, 124, 27, -88, 18, -98, -40, -104, 24,
+ -105, -114, -108, -39, 46, 124, -28, -17, -117, -91, 89, -25, -5, 23, -11, 86, -45, -125, 72, -35,
+ 48, -126, 25, -6, -37, -114, 95, -48, -107, 115, -109, 123, 80, 81, -51, -118, -113, -85, -3, -75,
+ 21, -65, -120, -2, -110, -109, -98, -99, -83, -116, -50, 76, 33, 33, 92, 61, -14, 24, -103, 84,
+ 115, 79, 15, -62, -50, -102, -108, -128, 125, 63, 110, 100, -71, -47, 24, 33, 69, -27, -81, 40};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_1/output_mult_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/output_mult_data.h
new file mode 100644
index 00000000..dff13443
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/output_mult_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include
+
+const int32_t grouped_conv_1_output_mult[6] = {1438488183, 1655654472, 1389835298, 1454332884, 1200344887, 2074611644};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_1/output_ref_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/output_ref_data.h
new file mode 100644
index 00000000..8dd8212c
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/output_ref_data.h
@@ -0,0 +1,15 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include
+
+const int8_t grouped_conv_1_output_ref[192] = {
+ -113, -39, 63, -87, -14, -19, -38, -1, 56, -33, 9, -18, -106, -62, -54, 11, 43, -19, -76, -43, -20, -76,
+ 51, -9, -128, -64, -57, -46, 14, -19, -109, -46, 28, -97, 61, 16, -35, -28, 26, -11, 38, -29, -42, -38,
+ -21, -2, 19, -35, -21, -21, 45, -47, 8, -14, -96, -56, -17, -17, 81, 16, -86, -36, 12, -2, 39, -52,
+ -69, -44, 36, -97, -12, -31, -90, -27, 52, -71, 23, -12, -78, -35, 14, 15, 43, -35, -95, -50, 33, -121,
+ -6, -39, -76, -30, -3, -82, 20, 9, -89, -49, -42, -64, 70, -8, -104, -48, -29, -60, -3, -56, -68, -50,
+ -33, -48, -35, -45, -96, -65, -56, -43, -29, -49, -71, -39, 33, -15, 39, -11, -43, -38, 49, -85, 19, -15,
+ -22, -22, 41, -38, 2, -22, -47, -38, -17, -78, 5, -9, -69, -28, 40, -1, 55, -31, -118, -44, -6, -17,
+ 34, -33, -100, -38, -11, -39, 19, -42, -97, -59, 28, -86, 59, 11, -74, -11, 48, -28, 34, -38, -98, -51,
+ 9, -88, -2, -18, -51, -33, 52, -41, -17, -37, -96, -47, -26, -19, 36, 1};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_1/output_shift_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/output_shift_data.h
new file mode 100644
index 00000000..c5689f9b
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/output_shift_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include
+
+const int32_t grouped_conv_1_output_shift[6] = {-8, -9, -8, -8, -8, -9};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_1/test_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/test_data.h
new file mode 100644
index 00000000..2c64febe
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/test_data.h
@@ -0,0 +1,9 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#include "biases_data.h"
+#include "config_data.h"
+#include "input_data.h"
+#include "output_mult_data.h"
+#include "output_ref_data.h"
+#include "output_shift_data.h"
+#include "weights_data.h"
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_1/weights_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/weights_data.h
new file mode 100644
index 00000000..af93bc4d
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/weights_data.h
@@ -0,0 +1,7 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include
+
+const int8_t grouped_conv_1_weights[24] = {-42, -127, 99, -38, 50, -72, 127, -34, 65, -75, 127, 100,
+ -77, 116, 127, -126, 73, 127, 51, 77, -4, 55, -16, 127};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_2/biases_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/biases_data.h
new file mode 100644
index 00000000..3ec466aa
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/biases_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include
+
+const int32_t grouped_conv_2_biases[2] = {-22972, -40991};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_2/config_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/config_data.h
new file mode 100644
index 00000000..ca166e5b
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/config_data.h
@@ -0,0 +1,25 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#define GROUPED_CONV_2_OUT_CH 2
+#define GROUPED_CONV_2_IN_CH 4
+#define GROUPED_CONV_2_INPUT_W 7
+#define GROUPED_CONV_2_INPUT_H 3
+#define GROUPED_CONV_2_DST_SIZE 28
+#define GROUPED_CONV_2_INPUT_SIZE 84
+#define GROUPED_CONV_2_OUT_ACTIVATION_MIN -128
+#define GROUPED_CONV_2_OUT_ACTIVATION_MAX 127
+#define GROUPED_CONV_2_INPUT_BATCHES 1
+#define GROUPED_CONV_2_FILTER_X 1
+#define GROUPED_CONV_2_FILTER_Y 2
+#define GROUPED_CONV_2_STRIDE_X 1
+#define GROUPED_CONV_2_STRIDE_Y 1
+#define GROUPED_CONV_2_PAD_X 0
+#define GROUPED_CONV_2_PAD_Y 0
+#define GROUPED_CONV_2_OUTPUT_W 7
+#define GROUPED_CONV_2_OUTPUT_H 2
+#define GROUPED_CONV_2_INPUT_OFFSET 128
+#define GROUPED_CONV_2_OUTPUT_OFFSET 127
+#define GROUPED_CONV_2_DILATION_X 1
+#define GROUPED_CONV_2_DILATION_Y 1
+#define GROUPED_CONV_2_FILTER_CH 2
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_2/input_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/input_data.h
new file mode 100644
index 00000000..e6292c97
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/input_data.h
@@ -0,0 +1,10 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include
+
+const int8_t grouped_conv_2_input[84] = {
+ 84, -53, -94, -95, -64, -85, -94, 3, -21, -109, 123, -90, -108, -34, 97, -106, 104, 9, -106, -14, -110,
+ 116, 95, 8, 116, 77, -70, -51, -67, -84, 3, -10, -69, -92, 23, -116, 104, 86, 37, -97, -108, -108,
+ -90, 8, -56, 50, -48, -23, 102, -76, -73, 0, 5, 0, -8, -100, 112, 31, 41, 99, 116, 96, -46,
+ 93, -110, -96, -70, -61, 120, 62, 2, -70, -48, 56, -121, 117, -24, -21, 61, -104, 41, -77, -97, 99};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_2/output_mult_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/output_mult_data.h
new file mode 100644
index 00000000..4ba76479
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/output_mult_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include
+
+const int32_t grouped_conv_2_output_mult[2] = {1191882470, 1352029555};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_2/output_ref_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/output_ref_data.h
new file mode 100644
index 00000000..a6626227
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/output_ref_data.h
@@ -0,0 +1,7 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include
+
+const int8_t grouped_conv_2_output_ref[28] = {127, -128, 3, -82, 63, -27, -93, -61, 1, -123, 36, -26, 103, -86,
+ 70, -128, 9, -128, 59, -51, 15, -99, -128, -128, 127, -95, 127, -128};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_2/output_shift_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/output_shift_data.h
new file mode 100644
index 00000000..a1ce4b56
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/output_shift_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include
+
+const int32_t grouped_conv_2_output_shift[2] = {-6, -7};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_2/test_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/test_data.h
new file mode 100644
index 00000000..2c64febe
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/test_data.h
@@ -0,0 +1,9 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#include "biases_data.h"
+#include "config_data.h"
+#include "input_data.h"
+#include "output_mult_data.h"
+#include "output_ref_data.h"
+#include "output_shift_data.h"
+#include "weights_data.h"
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_2/weights_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/weights_data.h
new file mode 100644
index 00000000..0b6ef315
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/weights_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include
+
+const int8_t grouped_conv_2_weights[8] = {120, -52, 127, -120, 96, 67, -79, -127};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_3/biases_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/biases_data.h
new file mode 100644
index 00000000..3d1a303c
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/biases_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include
+
+const int32_t grouped_conv_3_biases[4] = {-16494, 1675, -6184, -27704};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_3/config_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/config_data.h
new file mode 100644
index 00000000..0ce23d83
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/config_data.h
@@ -0,0 +1,25 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#define GROUPED_CONV_3_OUT_CH 4
+#define GROUPED_CONV_3_IN_CH 2
+#define GROUPED_CONV_3_INPUT_W 3
+#define GROUPED_CONV_3_INPUT_H 2
+#define GROUPED_CONV_3_DST_SIZE 16
+#define GROUPED_CONV_3_INPUT_SIZE 12
+#define GROUPED_CONV_3_OUT_ACTIVATION_MIN -128
+#define GROUPED_CONV_3_OUT_ACTIVATION_MAX 127
+#define GROUPED_CONV_3_INPUT_BATCHES 2
+#define GROUPED_CONV_3_FILTER_X 3
+#define GROUPED_CONV_3_FILTER_Y 2
+#define GROUPED_CONV_3_STRIDE_X 2
+#define GROUPED_CONV_3_STRIDE_Y 2
+#define GROUPED_CONV_3_PAD_X 1
+#define GROUPED_CONV_3_PAD_Y 0
+#define GROUPED_CONV_3_OUTPUT_W 2
+#define GROUPED_CONV_3_OUTPUT_H 1
+#define GROUPED_CONV_3_INPUT_OFFSET 128
+#define GROUPED_CONV_3_OUTPUT_OFFSET 47
+#define GROUPED_CONV_3_DILATION_X 1
+#define GROUPED_CONV_3_DILATION_Y 1
+#define GROUPED_CONV_3_FILTER_CH 1
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_3/input_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/input_data.h
new file mode 100644
index 00000000..09c0bdfe
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/input_data.h
@@ -0,0 +1,7 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include
+
+const int8_t grouped_conv_3_input[24] = {49, 51, -76, 16, 5, -65, 34, 38, -9, -95, -84, -104,
+ 41, -114, 85, -57, 58, -110, -3, -122, -115, -89, 60, 55};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_3/output_mult_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/output_mult_data.h
new file mode 100644
index 00000000..bf56c239
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/output_mult_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include
+
+const int32_t grouped_conv_3_output_mult[4] = {1096383366, 1906223722, 1992067095, 1833146972};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_3/output_ref_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/output_ref_data.h
new file mode 100644
index 00000000..aad049e5
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/output_ref_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include
+
+const int8_t grouped_conv_3_output_ref[16] = {-41, 43, -38, 16, -16, 77, 60, -6, -22, 30, 11, 7, -18, 93, 32, -7};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_3/output_shift_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/output_shift_data.h
new file mode 100644
index 00000000..1bccd965
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/output_shift_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include
+
+const int32_t grouped_conv_3_output_shift[4] = {-8, -9, -9, -9};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_3/test_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/test_data.h
new file mode 100644
index 00000000..2c64febe
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/test_data.h
@@ -0,0 +1,9 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#include "biases_data.h"
+#include "config_data.h"
+#include "input_data.h"
+#include "output_mult_data.h"
+#include "output_ref_data.h"
+#include "output_shift_data.h"
+#include "weights_data.h"
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_3/weights_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/weights_data.h
new file mode 100644
index 00000000..9d7c6217
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/weights_data.h
@@ -0,0 +1,7 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include
+
+const int8_t grouped_conv_3_weights[24] = {117, -127, 67, -4, -92, 52, 127, 22, -50, 63, -37, 8,
+ 127, -66, -106, 14, -56, -125, -16, 38, -12, -127, 4, 108};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_4/biases_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/biases_data.h
new file mode 100644
index 00000000..fa8fc17e
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/biases_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include
+
+const int32_t grouped_conv_4_biases[6] = {-14062, 34706, -14058, 6024, 3930, 9268};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_4/config_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/config_data.h
new file mode 100644
index 00000000..3ab708f9
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/config_data.h
@@ -0,0 +1,25 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#define GROUPED_CONV_4_OUT_CH 6
+#define GROUPED_CONV_4_IN_CH 3
+#define GROUPED_CONV_4_INPUT_W 9
+#define GROUPED_CONV_4_INPUT_H 9
+#define GROUPED_CONV_4_DST_SIZE 486
+#define GROUPED_CONV_4_INPUT_SIZE 243
+#define GROUPED_CONV_4_OUT_ACTIVATION_MIN -128
+#define GROUPED_CONV_4_OUT_ACTIVATION_MAX 127
+#define GROUPED_CONV_4_INPUT_BATCHES 1
+#define GROUPED_CONV_4_FILTER_X 2
+#define GROUPED_CONV_4_FILTER_Y 2
+#define GROUPED_CONV_4_STRIDE_X 1
+#define GROUPED_CONV_4_STRIDE_Y 1
+#define GROUPED_CONV_4_PAD_X 1
+#define GROUPED_CONV_4_PAD_Y 1
+#define GROUPED_CONV_4_OUTPUT_W 9
+#define GROUPED_CONV_4_OUTPUT_H 9
+#define GROUPED_CONV_4_INPUT_OFFSET 128
+#define GROUPED_CONV_4_OUTPUT_OFFSET -62
+#define GROUPED_CONV_4_DILATION_X 3
+#define GROUPED_CONV_4_DILATION_Y 3
+#define GROUPED_CONV_4_FILTER_CH 1
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_4/input_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/input_data.h
new file mode 100644
index 00000000..f1d86ff5
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/input_data.h
@@ -0,0 +1,19 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include
+
+const int8_t grouped_conv_4_input[243] = {
+ 74, -104, -81, 46, -118, -58, -106, 95, 114, -72, 59, 89, 123, -2, -41, -104, 87, 23, 0,
+ 66, -119, -102, 103, 85, 35, 67, 59, -118, -108, -74, -67, -100, -55, 111, -58, -6, 89, -22,
+ -66, 45, -54, -62, -91, 87, 0, -122, 59, -78, 78, 59, -24, -46, 66, 125, 71, -15, 124,
+ 82, -21, 12, -119, 3, 126, 82, -29, 85, 14, -25, -91, 77, -23, -106, 107, -35, -14, 64,
+ 83, -39, 79, 29, 19, 92, 95, 91, 70, 21, 70, 38, 110, 49, 112, -41, -125, -97, 6,
+ -48, 36, -76, 48, -8, -79, 93, -33, -96, -61, 57, -90, -41, 36, -63, 92, -39, 76, -39,
+ 33, 65, -58, -13, -94, -91, 115, 104, -42, -11, -97, -66, -100, 82, 125, 67, 39, 75, -21,
+ 2, -53, 2, 120, 16, -39, 64, 70, -34, -52, 84, -114, 79, -80, 42, 86, 105, -53, 49,
+ -22, -37, -72, -115, -33, -99, 55, -57, 34, -126, 1, 123, 16, -61, -76, 25, 98, 111, 124,
+ -64, 97, 103, 5, 32, 76, -30, 6, -96, 106, -33, -36, -8, 56, -103, -61, -39, -41, -99,
+ 55, -112, 109, 106, -115, 58, 110, -55, -85, -81, -3, -20, -58, -21, 122, -16, 93, -37, 75,
+ -64, -90, -94, 47, -83, 15, -111, 97, -76, 69, 90, 91, -112, -87, -4, 18, -65, 79, 48,
+ -115, 19, -45, -31, -99, -38, -8, 29, -79, 75, 15, 10, -79, -46, 29};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_4/output_mult_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/output_mult_data.h
new file mode 100644
index 00000000..c58a42ae
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/output_mult_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include
+
+const int32_t grouped_conv_4_output_mult[6] = {1813678179, 1858473757, 1768903071, 1183163162, 1102339756, 1617679616};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_4/output_ref_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/output_ref_data.h
new file mode 100644
index 00000000..e9328247
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/output_ref_data.h
@@ -0,0 +1,28 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include
+
+const int8_t grouped_conv_4_output_ref[486] = {
+ -83, -3, -62, -40, 5, -46, -77, 53, -44, -39, 20, -2, -93, 51, -45, -39, -32, -23, -47, 12, -40, -38, -24,
+ -3, -74, 57, -48, -40, -7, -10, -71, 40, -27, -32, -30, -41, -79, 54, -36, -35, -18, -43, -124, 47, -66, -46,
+ -43, -28, -117, 38, -42, -42, -45, -33, -52, 8, -14, -62, 49, -60, -30, 33, 1, -54, 29, -13, -65, 23, -14,
+ -49, 15, -4, -75, 41, 4, -5, 73, 21, -85, 51, -30, -17, 27, -6, -18, -25, -19, -38, 35, -29, -64, 45,
+ -39, -11, 50, 1, -78, 4, -73, 6, -31, -8, -96, 13, -76, 15, -21, 3, -44, 13, -42, -45, -7, -54, -79,
+ 43, -51, -53, -1, -3, -33, 27, 7, -29, 1, -22, -38, 6, -10, -49, 15, -20, -53, -7, -15, -29, 29, -31,
+ -43, 42, 18, -34, 36, -25, -73, 26, -28, -6, 50, -27, -89, 2, -40, 9, -23, 6, -75, 10, -49, 7, -21,
+ 7, -67, 3, -54, -61, 60, -61, -53, 3, 16, -9, 57, 13, -17, 1, 6, -13, 47, 12, -76, 25, -23, -13,
+ 29, 36, -16, -18, -19, -24, 10, -6, -57, 24, -6, -39, 39, -4, -32, -6, 2, -24, -3, -34, -51, -17, -72,
+ -22, -38, -25, -61, -10, -77, 10, -25, 1, -34, 15, -11, -64, 50, -56, -38, 3, 19, 24, 42, 20, -28, -7,
+ -27, -13, 54, 13, -62, 32, -3, 26, 45, 20, 5, -5, -15, -18, 47, -18, -73, 31, -15, 1, -1, -1, -46,
+ 3, -36, -29, 14, -14, -99, 30, -65, -33, -17, 9, -85, 9, -47, -34, -43, -31, -42, 13, -18, -58, -19, -52,
+ -42, -15, -34, -27, 14, -6, -75, 48, 9, -11, 5, -33, -29, 33, -11, 15, 29, -24, -49, -4, -11, -52, 42,
+ -33, -37, -7, -40, -3, 60, -23, -89, 39, -19, -44, -2, -2, -94, 12, -41, 15, -16, 10, -51, -23, -76, -2,
+ -11, 20, -73, 2, -53, -51, 34, -59, -83, 31, -8, 8, 37, 12, -93, 36, 16, -7, 51, -21, -50, 0, -30,
+ -45, 35, 12, -65, 18, -6, 17, -16, -9, -8, -9, -24, 20, 62, 1, -73, 12, -41, -15, 5, -12, -86, 10,
+ -52, -29, -48, -37, -99, 27, -55, -36, -17, 10, -76, 2, -52, -80, 10, -62, -55, -20, -52, -9, 23, -34, -65,
+ -10, -62, -55, 17, -31, -34, -33, -64, -1, -14, -3, -62, -7, -70, 0, -2, -10, -52, -19, -58, -29, -22, -12,
+ -62, -15, -71, -24, -28, -47, -36, -36, -84, -23, -42, -31, -60, -20, -83, 1, -51, -44, -77, 1, -52, -80, -36,
+ -52, -77, -6, -76, -5, -20, -52, -31, -34, -73, 6, -25, -52, -36, -24, -67, 2, 11, -47, -72, -7, -57, -62,
+ -22, -28, -61, -18, -79, -34, 4, -38, -31, -37, -64, -37, -22, -8, -66, -16, -83, 6, -46, -36, -77, -9, -84,
+ -40, -32, -16};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_4/output_shift_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/output_shift_data.h
new file mode 100644
index 00000000..4ebe6c11
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/output_shift_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include
+
+const int32_t grouped_conv_4_output_shift[6] = {-9, -9, -9, -8, -8, -9};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_4/test_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/test_data.h
new file mode 100644
index 00000000..2c64febe
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/test_data.h
@@ -0,0 +1,9 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#include "biases_data.h"
+#include "config_data.h"
+#include "input_data.h"
+#include "output_mult_data.h"
+#include "output_ref_data.h"
+#include "output_shift_data.h"
+#include "weights_data.h"
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_4/weights_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/weights_data.h
new file mode 100644
index 00000000..87cb7b8d
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/weights_data.h
@@ -0,0 +1,7 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include
+
+const int8_t grouped_conv_4_weights[24] = {127, 26, -101, 118, -83, 14, 127, 39, 6, 84, 127, 107,
+ 127, -60, 15, 31, 63, 127, 49, 116, 127, -37, 120, 5};
diff --git a/Tests/UnitTest/TestCases/test_arm_convolve_1_x_n_s8/test_arm_convolve_1_x_n_s8.c b/Tests/UnitTest/TestCases/test_arm_convolve_1_x_n_s8/test_arm_convolve_1_x_n_s8.c
index 1d9f9824..afe45fa3 100644
--- a/Tests/UnitTest/TestCases/test_arm_convolve_1_x_n_s8/test_arm_convolve_1_x_n_s8.c
+++ b/Tests/UnitTest/TestCases/test_arm_convolve_1_x_n_s8/test_arm_convolve_1_x_n_s8.c
@@ -1,5 +1,5 @@
/*
- * SPDX-FileCopyrightText: Copyright 2023 Arm Limited and/or its affiliates
+ * SPDX-FileCopyrightText: Copyright 2023-2024 Arm Limited and/or its affiliates
*
* SPDX-License-Identifier: Apache-2.0
*
@@ -53,6 +53,7 @@ void conv_1_x_n_1_arm_convolve_s8(void)
input_dims.c = CONV_1_X_N_1_IN_CH;
filter_dims.w = CONV_1_X_N_1_FILTER_X;
filter_dims.h = CONV_1_X_N_1_FILTER_Y;
+ filter_dims.c = CONV_1_X_N_1_IN_CH;
output_dims.w = CONV_1_X_N_1_OUTPUT_W;
output_dims.h = CONV_1_X_N_1_OUTPUT_H;
output_dims.c = CONV_1_X_N_1_OUT_CH;
@@ -144,6 +145,7 @@ void conv_1_x_n_2_arm_convolve_s8(void)
input_dims.c = CONV_1_X_N_2_IN_CH;
filter_dims.w = CONV_1_X_N_2_FILTER_X;
filter_dims.h = CONV_1_X_N_2_FILTER_Y;
+ filter_dims.c = CONV_1_X_N_2_IN_CH;
output_dims.w = CONV_1_X_N_2_OUTPUT_W;
output_dims.h = CONV_1_X_N_2_OUTPUT_H;
output_dims.c = CONV_1_X_N_2_OUT_CH;
@@ -233,6 +235,7 @@ void conv_1_x_n_3_arm_convolve_s8(void)
input_dims.c = CONV_1_X_N_3_IN_CH;
filter_dims.w = CONV_1_X_N_3_FILTER_X;
filter_dims.h = CONV_1_X_N_3_FILTER_Y;
+ filter_dims.c = CONV_1_X_N_3_IN_CH;
output_dims.w = CONV_1_X_N_3_OUTPUT_W;
output_dims.h = CONV_1_X_N_3_OUTPUT_H;
output_dims.c = CONV_1_X_N_3_OUT_CH;
@@ -324,6 +327,7 @@ void conv_1_x_n_4_arm_convolve_s8(void)
input_dims.c = CONV_1_X_N_4_IN_CH;
filter_dims.w = CONV_1_X_N_4_FILTER_X;
filter_dims.h = CONV_1_X_N_4_FILTER_Y;
+ filter_dims.c = CONV_1_X_N_4_IN_CH;
output_dims.w = CONV_1_X_N_4_OUTPUT_W;
output_dims.h = CONV_1_X_N_4_OUTPUT_H;
output_dims.c = CONV_1_X_N_4_OUT_CH;
@@ -415,6 +419,7 @@ void conv_1_x_n_5_arm_convolve_s8(void)
input_dims.c = CONV_1_X_N_5_IN_CH;
filter_dims.w = CONV_1_X_N_5_FILTER_X;
filter_dims.h = CONV_1_X_N_5_FILTER_Y;
+ filter_dims.c = CONV_1_X_N_5_IN_CH;
output_dims.w = CONV_1_X_N_5_OUTPUT_W;
output_dims.h = CONV_1_X_N_5_OUTPUT_H;
output_dims.c = CONV_1_X_N_5_OUT_CH;
@@ -502,6 +507,7 @@ void conv_1_x_n_6_arm_convolve_s8(void)
input_dims.c = CONV_1_X_N_3_IN_CH;
filter_dims.w = CONV_1_X_N_3_FILTER_X;
filter_dims.h = CONV_1_X_N_3_FILTER_Y;
+ filter_dims.c = CONV_1_X_N_3_IN_CH;
output_dims.w = CONV_1_X_N_3_OUTPUT_W;
output_dims.h = CONV_1_X_N_3_OUTPUT_H;
output_dims.c = CONV_1_X_N_3_OUT_CH;
diff --git a/Tests/UnitTest/TestCases/test_arm_convolve_s8/test_arm_convolve_s8.c b/Tests/UnitTest/TestCases/test_arm_convolve_s8/test_arm_convolve_s8.c
index 3d37c7ef..a6065946 100644
--- a/Tests/UnitTest/TestCases/test_arm_convolve_s8/test_arm_convolve_s8.c
+++ b/Tests/UnitTest/TestCases/test_arm_convolve_s8/test_arm_convolve_s8.c
@@ -1,5 +1,5 @@
/*
- * SPDX-FileCopyrightText: Copyright 2010-2023 Arm Limited and/or its affiliates
+ * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates
*
* SPDX-License-Identifier: Apache-2.0
*
@@ -61,6 +61,7 @@ void basic_arm_convolve_s8(void)
input_dims.c = BASIC_IN_CH;
filter_dims.w = BASIC_FILTER_X;
filter_dims.h = BASIC_FILTER_Y;
+ filter_dims.c = BASIC_IN_CH;
output_dims.w = BASIC_OUTPUT_W;
output_dims.h = BASIC_OUTPUT_H;
output_dims.c = BASIC_OUT_CH;
@@ -155,6 +156,7 @@ void stride2pad1_arm_convolve_s8(void)
input_dims.c = STRIDE2PAD1_IN_CH;
filter_dims.w = STRIDE2PAD1_FILTER_X;
filter_dims.h = STRIDE2PAD1_FILTER_Y;
+ filter_dims.c = STRIDE2PAD1_IN_CH;
output_dims.w = STRIDE2PAD1_OUTPUT_W;
output_dims.h = STRIDE2PAD1_OUTPUT_H;
output_dims.c = STRIDE2PAD1_OUT_CH;
@@ -248,6 +250,7 @@ void conv_2_arm_convolve_s8(void)
input_dims.c = CONV_2_IN_CH;
filter_dims.w = CONV_2_FILTER_X;
filter_dims.h = CONV_2_FILTER_Y;
+ filter_dims.c = CONV_2_IN_CH;
output_dims.w = CONV_2_OUTPUT_W;
output_dims.h = CONV_2_OUTPUT_H;
output_dims.c = CONV_2_OUT_CH;
@@ -341,6 +344,7 @@ void conv_3_arm_convolve_s8(void)
input_dims.c = CONV_3_IN_CH;
filter_dims.w = CONV_3_FILTER_X;
filter_dims.h = CONV_3_FILTER_Y;
+ filter_dims.c = CONV_3_IN_CH;
output_dims.w = CONV_3_OUTPUT_W;
output_dims.h = CONV_3_OUTPUT_H;
output_dims.c = CONV_3_OUT_CH;
@@ -434,6 +438,7 @@ void conv_4_arm_convolve_s8(void)
input_dims.c = CONV_4_IN_CH;
filter_dims.w = CONV_4_FILTER_X;
filter_dims.h = CONV_4_FILTER_Y;
+ filter_dims.c = CONV_4_IN_CH;
output_dims.w = CONV_4_OUTPUT_W;
output_dims.h = CONV_4_OUTPUT_H;
output_dims.c = CONV_4_OUT_CH;
@@ -526,6 +531,7 @@ void conv_out_activation_arm_convolve_s8(void)
input_dims.c = CONV_OUT_ACTIVATION_IN_CH;
filter_dims.w = CONV_OUT_ACTIVATION_FILTER_X;
filter_dims.h = CONV_OUT_ACTIVATION_FILTER_Y;
+ filter_dims.c = CONV_OUT_ACTIVATION_IN_CH;
output_dims.w = CONV_OUT_ACTIVATION_OUTPUT_W;
output_dims.h = CONV_OUT_ACTIVATION_OUTPUT_H;
output_dims.c = CONV_OUT_ACTIVATION_OUT_CH;
@@ -592,6 +598,7 @@ void conv_2x2_dilation_arm_convolve_s8(void)
input_dims.c = CONV_2X2_DILATION_IN_CH;
filter_dims.w = CONV_2X2_DILATION_FILTER_X;
filter_dims.h = CONV_2X2_DILATION_FILTER_Y;
+ filter_dims.c = CONV_2X2_DILATION_IN_CH;
output_dims.w = CONV_2X2_DILATION_OUTPUT_W;
output_dims.h = CONV_2X2_DILATION_OUTPUT_H;
output_dims.c = CONV_2X2_DILATION_OUT_CH;
@@ -685,6 +692,7 @@ void conv_2x2_dilation_5x5_input_arm_convolve_s8(void)
input_dims.c = CONV_2X2_DILATION_5X5_INPUT_IN_CH;
filter_dims.w = CONV_2X2_DILATION_5X5_INPUT_FILTER_X;
filter_dims.h = CONV_2X2_DILATION_5X5_INPUT_FILTER_Y;
+ filter_dims.c = CONV_2X2_DILATION_5X5_INPUT_IN_CH;
output_dims.w = CONV_2X2_DILATION_5X5_INPUT_OUTPUT_W;
output_dims.h = CONV_2X2_DILATION_5X5_INPUT_OUTPUT_H;
output_dims.c = CONV_2X2_DILATION_5X5_INPUT_OUT_CH;
@@ -776,6 +784,7 @@ void conv_3x3_dilation_5x5_input_arm_convolve_s8(void)
input_dims.c = CONV_3X3_DILATION_5X5_INPUT_IN_CH;
filter_dims.w = CONV_3X3_DILATION_5X5_INPUT_FILTER_X;
filter_dims.h = CONV_3X3_DILATION_5X5_INPUT_FILTER_Y;
+ filter_dims.c = CONV_3X3_DILATION_5X5_INPUT_IN_CH;
output_dims.w = CONV_3X3_DILATION_5X5_INPUT_OUTPUT_W;
output_dims.h = CONV_3X3_DILATION_5X5_INPUT_OUTPUT_H;
output_dims.c = CONV_3X3_DILATION_5X5_INPUT_OUT_CH;
@@ -867,6 +876,7 @@ void conv_2x3_dilation_arm_convolve_s8(void)
input_dims.c = CONV_2X3_DILATION_IN_CH;
filter_dims.w = CONV_2X3_DILATION_FILTER_X;
filter_dims.h = CONV_2X3_DILATION_FILTER_Y;
+ filter_dims.c = CONV_2X3_DILATION_IN_CH;
output_dims.w = CONV_2X3_DILATION_OUTPUT_W;
output_dims.h = CONV_2X3_DILATION_OUTPUT_H;
output_dims.c = CONV_2X3_DILATION_OUT_CH;
@@ -958,6 +968,7 @@ void conv_3x2_dilation_arm_convolve_s8(void)
input_dims.c = CONV_3X2_DILATION_IN_CH;
filter_dims.w = CONV_3X2_DILATION_FILTER_X;
filter_dims.h = CONV_3X2_DILATION_FILTER_Y;
+ filter_dims.c = CONV_3X2_DILATION_IN_CH;
output_dims.w = CONV_3X2_DILATION_OUTPUT_W;
output_dims.h = CONV_3X2_DILATION_OUTPUT_H;
output_dims.c = CONV_3X2_DILATION_OUT_CH;
@@ -1049,6 +1060,7 @@ void conv_dilation_golden_arm_convolve_s8(void)
input_dims.c = CONV_DILATION_GOLDEN_IN_CH;
filter_dims.w = CONV_DILATION_GOLDEN_FILTER_X;
filter_dims.h = CONV_DILATION_GOLDEN_FILTER_Y;
+ filter_dims.c = CONV_DILATION_GOLDEN_IN_CH;
output_dims.w = CONV_DILATION_GOLDEN_OUTPUT_W;
output_dims.h = CONV_DILATION_GOLDEN_OUTPUT_H;
output_dims.c = CONV_DILATION_GOLDEN_OUT_CH;
@@ -1140,6 +1152,7 @@ void conv_5_arm_convolve_s8(void)
input_dims.c = CONV_5_IN_CH;
filter_dims.w = CONV_5_FILTER_X;
filter_dims.h = CONV_5_FILTER_Y;
+ filter_dims.c = CONV_5_IN_CH;
output_dims.w = CONV_5_OUTPUT_W;
output_dims.h = CONV_5_OUTPUT_H;
output_dims.c = CONV_5_OUT_CH;
@@ -1221,6 +1234,7 @@ void buffer_size_arm_convolve_s8(void)
input_dims.c = CONV_5_IN_CH;
filter_dims.w = CONV_5_FILTER_X;
filter_dims.h = CONV_5_FILTER_Y;
+ filter_dims.c = CONV_5_IN_CH;
output_dims.w = CONV_5_OUTPUT_W;
output_dims.h = CONV_5_OUTPUT_H;
output_dims.c = CONV_5_OUT_CH;
@@ -1258,6 +1272,7 @@ void buffer_size_mve_arm_convolve_s8(void)
input_dims.c = CONV_5_IN_CH;
filter_dims.w = CONV_5_FILTER_X;
filter_dims.h = CONV_5_FILTER_Y;
+ filter_dims.c = CONV_5_IN_CH;
output_dims.w = CONV_5_OUTPUT_W;
output_dims.h = CONV_5_OUTPUT_H;
output_dims.c = CONV_5_OUT_CH;
@@ -1297,6 +1312,7 @@ void buffer_size_dsp_arm_convolve_s8(void)
input_dims.c = CONV_5_IN_CH;
filter_dims.w = CONV_5_FILTER_X;
filter_dims.h = CONV_5_FILTER_Y;
+ filter_dims.c = CONV_5_IN_CH;
output_dims.w = CONV_5_OUTPUT_W;
output_dims.h = CONV_5_OUTPUT_H;
output_dims.c = CONV_5_OUT_CH;
diff --git a/Tests/UnitTest/TestCases/test_arm_ds_cnn_l_s8/test_arm_ds_cnn_l_s8.c b/Tests/UnitTest/TestCases/test_arm_ds_cnn_l_s8/test_arm_ds_cnn_l_s8.c
index 1e6eafd9..f17a7f0e 100644
--- a/Tests/UnitTest/TestCases/test_arm_ds_cnn_l_s8/test_arm_ds_cnn_l_s8.c
+++ b/Tests/UnitTest/TestCases/test_arm_ds_cnn_l_s8/test_arm_ds_cnn_l_s8.c
@@ -1,5 +1,5 @@
/*
- * SPDX-FileCopyrightText: Copyright 2022-2023 Arm Limited and/or its affiliates
+ * SPDX-FileCopyrightText: Copyright 2022-2024 Arm Limited and/or its affiliates
*
* SPDX-License-Identifier: Apache-2.0
*
@@ -58,6 +58,7 @@ int ds_cnn_l_s8_get_buffer_size(void)
filter_dims.h = CONV_2D_1_FILTER_H;
filter_dims.w = CONV_2D_1_FILTER_W;
+ filter_dims.c = CONV_2D_1_IN_CH;
output_dims.n = input_dims.n;
output_dims.h = CONV_2D_1_OUTPUT_H;
@@ -170,6 +171,7 @@ void ds_cnn_l_s8_inference(void)
conv_filter_dims.h = CONV_2D_1_FILTER_H;
conv_filter_dims.w = CONV_2D_1_FILTER_W;
+ conv_filter_dims.c = CONV_2D_1_IN_CH;
in_out_dim_1.n = in_out_dim_0.n;
in_out_dim_1.h = CONV_2D_1_OUTPUT_H;
@@ -237,6 +239,7 @@ void ds_cnn_l_s8_inference(void)
in_out_dim_1.c = in_out_dim_0.c;
conv_filter_dims.h = CONV_2D_3_FILTER_H;
conv_filter_dims.w = CONV_2D_3_FILTER_W;
+ conv_filter_dims.c = CONV_2D_3_IN_CH;
conv_params.padding.h = CONV_2D_3_PAD_H;
conv_params.padding.w = CONV_2D_3_PAD_W;
diff --git a/Tests/UnitTest/TestCases/test_arm_ds_cnn_s_s8/test_arm_ds_cnn_s_s8.c b/Tests/UnitTest/TestCases/test_arm_ds_cnn_s_s8/test_arm_ds_cnn_s_s8.c
index 5862df7b..87f24e92 100644
--- a/Tests/UnitTest/TestCases/test_arm_ds_cnn_s_s8/test_arm_ds_cnn_s_s8.c
+++ b/Tests/UnitTest/TestCases/test_arm_ds_cnn_s_s8/test_arm_ds_cnn_s_s8.c
@@ -1,5 +1,5 @@
/*
- * SPDX-FileCopyrightText: Copyright 2022-2023 Arm Limited and/or its affiliates
+ * SPDX-FileCopyrightText: Copyright 2022-2024 Arm Limited and/or its affiliates
*
* SPDX-License-Identifier: Apache-2.0
*
@@ -60,6 +60,7 @@ int ds_cnn_s_s8_get_buffer_size(void)
filter_dims.h = CONV_2D_1_FILTER_H;
filter_dims.w = CONV_2D_1_FILTER_W;
+ filter_dims.c = CONV_2D_1_IN_CH;
output_dims.n = input_dims.n;
output_dims.h = CONV_2D_1_OUTPUT_H;
@@ -151,6 +152,7 @@ void ds_cnn_s_s8_inference(void)
conv_filter_dims.h = CONV_2D_1_FILTER_H;
conv_filter_dims.w = CONV_2D_1_FILTER_W;
+ conv_filter_dims.c = CONV_2D_1_IN_CH;
in_out_dim_1.n = in_out_dim_0.n;
in_out_dim_1.h = CONV_2D_1_OUTPUT_H;
@@ -221,6 +223,7 @@ void ds_cnn_s_s8_inference(void)
in_out_dim_1.c = in_out_dim_0.c;
conv_filter_dims.h = CONV_2D_3_FILTER_H;
conv_filter_dims.w = CONV_2D_3_FILTER_W;
+ conv_filter_dims.c = CONV_2D_3_IN_CH;
conv_params.padding.h = CONV_2D_3_PAD_H;
conv_params.padding.w = CONV_2D_3_PAD_W;
diff --git a/Tests/UnitTest/TestCases/test_arm_grouped_convolve_s8/CMakeLists.txt b/Tests/UnitTest/TestCases/test_arm_grouped_convolve_s8/CMakeLists.txt
new file mode 100644
index 00000000..4363d841
--- /dev/null
+++ b/Tests/UnitTest/TestCases/test_arm_grouped_convolve_s8/CMakeLists.txt
@@ -0,0 +1,23 @@
+#
+# Copyright (C) 2010-2024 Arm Limited or its affiliates. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+add_cmsis_nn_unit_test_executable(test_arm_grouped_convolve_s8)
+
+target_sources(test_arm_grouped_convolve_s8 PRIVATE
+ Unity/unity_test_arm_grouped_convolve_s8.c
+ Unity/TestRunner/unity_test_arm_grouped_convolve_s8_runner.c)
diff --git a/Tests/UnitTest/TestCases/test_arm_grouped_convolve_s8/Unity/unity_test_arm_grouped_convolve_s8.c b/Tests/UnitTest/TestCases/test_arm_grouped_convolve_s8/Unity/unity_test_arm_grouped_convolve_s8.c
new file mode 100644
index 00000000..a915234a
--- /dev/null
+++ b/Tests/UnitTest/TestCases/test_arm_grouped_convolve_s8/Unity/unity_test_arm_grouped_convolve_s8.c
@@ -0,0 +1,50 @@
+/*
+ * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+#include
+#include
+#include
+#include
+
+#include "../test_arm_grouped_convolve_s8.c"
+#include "unity.h"
+
+#ifdef USING_FVP_CORSTONE_300
+extern void uart_init(void);
+#endif
+
+/* This function is called from the autogenerated file.
+ * The name must be exactly like this
+ */
+void setUp(void)
+{ /* This is run before EACH TEST */
+#ifdef USING_FVP_CORSTONE_300
+ uart_init();
+#endif
+}
+
+/* This function is called from the autogenerated file.
+ * The name must be exactly like this
+ */
+void tearDown(void) {}
+
+void test_basic_arm_grouped_convolve_1_s8(void) { grouped_conv_arm_grouped_convolve_1_s8(); }
+void test_basic_arm_grouped_convolve_2_s8(void) { grouped_conv_arm_grouped_convolve_2_s8(); }
+void test_basic_arm_grouped_convolve_3_s8(void) { grouped_conv_arm_grouped_convolve_3_s8(); }
+void test_basic_arm_grouped_convolve_4_s8(void) { grouped_conv_arm_grouped_convolve_4_s8(); }
diff --git a/Tests/UnitTest/TestCases/test_arm_grouped_convolve_s8/test_arm_grouped_convolve_s8.c b/Tests/UnitTest/TestCases/test_arm_grouped_convolve_s8/test_arm_grouped_convolve_s8.c
new file mode 100644
index 00000000..1aaa6d10
--- /dev/null
+++ b/Tests/UnitTest/TestCases/test_arm_grouped_convolve_s8/test_arm_grouped_convolve_s8.c
@@ -0,0 +1,312 @@
+/*
+ * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+
+#include
+#include
+
+#include "../TestData/grouped_conv_1/test_data.h"
+#include "../TestData/grouped_conv_2/test_data.h"
+#include "../TestData/grouped_conv_3/test_data.h"
+#include "../TestData/grouped_conv_4/test_data.h"
+#include "../Utils/validate.h"
+
+void grouped_conv_arm_grouped_convolve_1_s8(void)
+{
+ const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS;
+ int8_t output[GROUPED_CONV_1_DST_SIZE] = {0};
+
+ cmsis_nn_context ctx;
+ cmsis_nn_conv_params conv_params;
+ cmsis_nn_per_channel_quant_params quant_params;
+ cmsis_nn_dims input_dims;
+ cmsis_nn_dims filter_dims;
+ cmsis_nn_dims bias_dims;
+ cmsis_nn_dims output_dims;
+
+ const int32_t *bias_data = grouped_conv_1_biases;
+ const int8_t *kernel_data = grouped_conv_1_weights;
+ const int8_t *input_data = grouped_conv_1_input;
+ const int8_t *output_ref = grouped_conv_1_output_ref;
+ const int32_t output_ref_size = GROUPED_CONV_1_DST_SIZE;
+
+ input_dims.n = GROUPED_CONV_1_INPUT_BATCHES;
+ input_dims.w = GROUPED_CONV_1_INPUT_W;
+ input_dims.h = GROUPED_CONV_1_INPUT_H;
+ input_dims.c = GROUPED_CONV_1_IN_CH;
+ filter_dims.w = GROUPED_CONV_1_FILTER_X;
+ filter_dims.h = GROUPED_CONV_1_FILTER_Y;
+ filter_dims.c = GROUPED_CONV_1_FILTER_CH;
+ output_dims.w = GROUPED_CONV_1_OUTPUT_W;
+ output_dims.h = GROUPED_CONV_1_OUTPUT_H;
+ output_dims.c = GROUPED_CONV_1_OUT_CH;
+
+ conv_params.padding.w = GROUPED_CONV_1_PAD_X;
+ conv_params.padding.h = GROUPED_CONV_1_PAD_Y;
+ conv_params.stride.w = GROUPED_CONV_1_STRIDE_X;
+ conv_params.stride.h = GROUPED_CONV_1_STRIDE_Y;
+ conv_params.dilation.w = GROUPED_CONV_1_DILATION_X;
+ conv_params.dilation.h = GROUPED_CONV_1_DILATION_Y;
+
+ conv_params.input_offset = GROUPED_CONV_1_INPUT_OFFSET;
+ conv_params.output_offset = GROUPED_CONV_1_OUTPUT_OFFSET;
+ conv_params.activation.min = GROUPED_CONV_1_OUT_ACTIVATION_MIN;
+ conv_params.activation.max = GROUPED_CONV_1_OUT_ACTIVATION_MAX;
+ quant_params.multiplier = (int32_t *)grouped_conv_1_output_mult;
+ quant_params.shift = (int32_t *)grouped_conv_1_output_shift;
+
+ int32_t buf_size = arm_convolve_s8_get_buffer_size(&input_dims, &filter_dims);
+ ctx.buf = malloc(buf_size);
+ ctx.size = 0;
+
+ arm_cmsis_nn_status result = arm_convolve_s8(&ctx,
+ &conv_params,
+ &quant_params,
+ &input_dims,
+ input_data,
+ &filter_dims,
+ kernel_data,
+ &bias_dims,
+ bias_data,
+ &output_dims,
+ output);
+
+ if (ctx.buf)
+ {
+ // The caller is responsible to clear the scratch buffers for security reasons if applicable.
+ memset(ctx.buf, 0, buf_size);
+ free(ctx.buf);
+ }
+ TEST_ASSERT_EQUAL(expected, result);
+ TEST_ASSERT_TRUE(validate(output, output_ref, output_ref_size));
+ memset(output, 0, sizeof(output));
+}
+
+void grouped_conv_arm_grouped_convolve_2_s8(void)
+{
+ const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS;
+ int8_t output[GROUPED_CONV_2_DST_SIZE] = {0};
+
+ cmsis_nn_context ctx;
+ cmsis_nn_conv_params conv_params;
+ cmsis_nn_per_channel_quant_params quant_params;
+ cmsis_nn_dims input_dims;
+ cmsis_nn_dims filter_dims;
+ cmsis_nn_dims bias_dims;
+ cmsis_nn_dims output_dims;
+
+ const int32_t *bias_data = grouped_conv_2_biases;
+ const int8_t *kernel_data = grouped_conv_2_weights;
+ const int8_t *input_data = grouped_conv_2_input;
+ const int8_t *output_ref = grouped_conv_2_output_ref;
+ const int32_t output_ref_size = GROUPED_CONV_2_DST_SIZE;
+
+ input_dims.n = GROUPED_CONV_2_INPUT_BATCHES;
+ input_dims.w = GROUPED_CONV_2_INPUT_W;
+ input_dims.h = GROUPED_CONV_2_INPUT_H;
+ input_dims.c = GROUPED_CONV_2_IN_CH;
+ filter_dims.w = GROUPED_CONV_2_FILTER_X;
+ filter_dims.h = GROUPED_CONV_2_FILTER_Y;
+ filter_dims.c = GROUPED_CONV_2_FILTER_CH;
+ output_dims.w = GROUPED_CONV_2_OUTPUT_W;
+ output_dims.h = GROUPED_CONV_2_OUTPUT_H;
+ output_dims.c = GROUPED_CONV_2_OUT_CH;
+
+ conv_params.padding.w = GROUPED_CONV_2_PAD_X;
+ conv_params.padding.h = GROUPED_CONV_2_PAD_Y;
+ conv_params.stride.w = GROUPED_CONV_2_STRIDE_X;
+ conv_params.stride.h = GROUPED_CONV_2_STRIDE_Y;
+ conv_params.dilation.w = GROUPED_CONV_2_DILATION_X;
+ conv_params.dilation.h = GROUPED_CONV_2_DILATION_Y;
+
+ conv_params.input_offset = GROUPED_CONV_2_INPUT_OFFSET;
+ conv_params.output_offset = GROUPED_CONV_2_OUTPUT_OFFSET;
+ conv_params.activation.min = GROUPED_CONV_2_OUT_ACTIVATION_MIN;
+ conv_params.activation.max = GROUPED_CONV_2_OUT_ACTIVATION_MAX;
+ quant_params.multiplier = (int32_t *)grouped_conv_2_output_mult;
+ quant_params.shift = (int32_t *)grouped_conv_2_output_shift;
+
+ int32_t buf_size = arm_convolve_s8_get_buffer_size(&input_dims, &filter_dims);
+ ctx.buf = malloc(buf_size);
+ ctx.size = 0;
+
+ arm_cmsis_nn_status result = arm_convolve_s8(&ctx,
+ &conv_params,
+ &quant_params,
+ &input_dims,
+ input_data,
+ &filter_dims,
+ kernel_data,
+ &bias_dims,
+ bias_data,
+ &output_dims,
+ output);
+
+ if (ctx.buf)
+ {
+ // The caller is responsible to clear the scratch buffers for security reasons if applicable.
+ memset(ctx.buf, 0, buf_size);
+ free(ctx.buf);
+ }
+ TEST_ASSERT_EQUAL(expected, result);
+ TEST_ASSERT_TRUE(validate(output, output_ref, output_ref_size));
+ memset(output, 0, sizeof(output));
+}
+
+void grouped_conv_arm_grouped_convolve_3_s8(void)
+{
+ const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS;
+ int8_t output[GROUPED_CONV_3_DST_SIZE] = {0};
+
+ cmsis_nn_context ctx;
+ cmsis_nn_conv_params conv_params;
+ cmsis_nn_per_channel_quant_params quant_params;
+ cmsis_nn_dims input_dims;
+ cmsis_nn_dims filter_dims;
+ cmsis_nn_dims bias_dims;
+ cmsis_nn_dims output_dims;
+
+ const int32_t *bias_data = grouped_conv_3_biases;
+ const int8_t *kernel_data = grouped_conv_3_weights;
+ const int8_t *input_data = grouped_conv_3_input;
+ const int8_t *output_ref = grouped_conv_3_output_ref;
+ const int32_t output_ref_size = GROUPED_CONV_3_DST_SIZE;
+
+ input_dims.n = GROUPED_CONV_3_INPUT_BATCHES;
+ input_dims.w = GROUPED_CONV_3_INPUT_W;
+ input_dims.h = GROUPED_CONV_3_INPUT_H;
+ input_dims.c = GROUPED_CONV_3_IN_CH;
+ filter_dims.w = GROUPED_CONV_3_FILTER_X;
+ filter_dims.h = GROUPED_CONV_3_FILTER_Y;
+ filter_dims.c = GROUPED_CONV_3_FILTER_CH;
+ output_dims.w = GROUPED_CONV_3_OUTPUT_W;
+ output_dims.h = GROUPED_CONV_3_OUTPUT_H;
+ output_dims.c = GROUPED_CONV_3_OUT_CH;
+
+ conv_params.padding.w = GROUPED_CONV_3_PAD_X;
+ conv_params.padding.h = GROUPED_CONV_3_PAD_Y;
+ conv_params.stride.w = GROUPED_CONV_3_STRIDE_X;
+ conv_params.stride.h = GROUPED_CONV_3_STRIDE_Y;
+ conv_params.dilation.w = GROUPED_CONV_3_DILATION_X;
+ conv_params.dilation.h = GROUPED_CONV_3_DILATION_Y;
+
+ conv_params.input_offset = GROUPED_CONV_3_INPUT_OFFSET;
+ conv_params.output_offset = GROUPED_CONV_3_OUTPUT_OFFSET;
+ conv_params.activation.min = GROUPED_CONV_3_OUT_ACTIVATION_MIN;
+ conv_params.activation.max = GROUPED_CONV_3_OUT_ACTIVATION_MAX;
+ quant_params.multiplier = (int32_t *)grouped_conv_3_output_mult;
+ quant_params.shift = (int32_t *)grouped_conv_3_output_shift;
+
+ int32_t buf_size = arm_convolve_s8_get_buffer_size(&input_dims, &filter_dims);
+ ctx.buf = malloc(buf_size);
+ ctx.size = 0;
+
+ arm_cmsis_nn_status result = arm_convolve_s8(&ctx,
+ &conv_params,
+ &quant_params,
+ &input_dims,
+ input_data,
+ &filter_dims,
+ kernel_data,
+ &bias_dims,
+ bias_data,
+ &output_dims,
+ output);
+
+ if (ctx.buf)
+ {
+ // The caller is responsible to clear the scratch buffers for security reasons if applicable.
+ memset(ctx.buf, 0, buf_size);
+ free(ctx.buf);
+ }
+ TEST_ASSERT_EQUAL(expected, result);
+ TEST_ASSERT_TRUE(validate(output, output_ref, output_ref_size));
+ memset(output, 0, sizeof(output));
+}
+
+void grouped_conv_arm_grouped_convolve_4_s8(void)
+{
+ const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS;
+ int8_t output[GROUPED_CONV_4_DST_SIZE] = {0};
+
+ cmsis_nn_context ctx;
+ cmsis_nn_conv_params conv_params;
+ cmsis_nn_per_channel_quant_params quant_params;
+ cmsis_nn_dims input_dims;
+ cmsis_nn_dims filter_dims;
+ cmsis_nn_dims bias_dims;
+ cmsis_nn_dims output_dims;
+
+ const int32_t *bias_data = grouped_conv_4_biases;
+ const int8_t *kernel_data = grouped_conv_4_weights;
+ const int8_t *input_data = grouped_conv_4_input;
+ const int8_t *output_ref = grouped_conv_4_output_ref;
+ const int32_t output_ref_size = GROUPED_CONV_4_DST_SIZE;
+
+ input_dims.n = GROUPED_CONV_4_INPUT_BATCHES;
+ input_dims.w = GROUPED_CONV_4_INPUT_W;
+ input_dims.h = GROUPED_CONV_4_INPUT_H;
+ input_dims.c = GROUPED_CONV_4_IN_CH;
+ filter_dims.w = GROUPED_CONV_4_FILTER_X;
+ filter_dims.h = GROUPED_CONV_4_FILTER_Y;
+ filter_dims.c = GROUPED_CONV_4_FILTER_CH;
+ output_dims.w = GROUPED_CONV_4_OUTPUT_W;
+ output_dims.h = GROUPED_CONV_4_OUTPUT_H;
+ output_dims.c = GROUPED_CONV_4_OUT_CH;
+
+ conv_params.padding.w = GROUPED_CONV_4_PAD_X;
+ conv_params.padding.h = GROUPED_CONV_4_PAD_Y;
+ conv_params.stride.w = GROUPED_CONV_4_STRIDE_X;
+ conv_params.stride.h = GROUPED_CONV_4_STRIDE_Y;
+ conv_params.dilation.w = GROUPED_CONV_4_DILATION_X;
+ conv_params.dilation.h = GROUPED_CONV_4_DILATION_Y;
+
+ conv_params.input_offset = GROUPED_CONV_4_INPUT_OFFSET;
+ conv_params.output_offset = GROUPED_CONV_4_OUTPUT_OFFSET;
+ conv_params.activation.min = GROUPED_CONV_4_OUT_ACTIVATION_MIN;
+ conv_params.activation.max = GROUPED_CONV_4_OUT_ACTIVATION_MAX;
+ quant_params.multiplier = (int32_t *)grouped_conv_4_output_mult;
+ quant_params.shift = (int32_t *)grouped_conv_4_output_shift;
+
+ int32_t buf_size = arm_convolve_s8_get_buffer_size(&input_dims, &filter_dims);
+ ctx.buf = malloc(buf_size);
+ ctx.size = 0;
+
+ arm_cmsis_nn_status result = arm_convolve_s8(&ctx,
+ &conv_params,
+ &quant_params,
+ &input_dims,
+ input_data,
+ &filter_dims,
+ kernel_data,
+ &bias_dims,
+ bias_data,
+ &output_dims,
+ output);
+
+ if (ctx.buf)
+ {
+ // The caller is responsible to clear the scratch buffers for security reasons if applicable.
+ memset(ctx.buf, 0, buf_size);
+ free(ctx.buf);
+ }
+ TEST_ASSERT_EQUAL(expected, result);
+ TEST_ASSERT_TRUE(validate(output, output_ref, output_ref_size));
+ memset(output, 0, sizeof(output));
+}
\ No newline at end of file
diff --git a/Tests/UnitTest/conv_settings.py b/Tests/UnitTest/conv_settings.py
index 9b41a90d..f16c688b 100644
--- a/Tests/UnitTest/conv_settings.py
+++ b/Tests/UnitTest/conv_settings.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2010-2023 Arm Limited and/or its affiliates
+# SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -38,6 +38,7 @@ def __init__(self,
w_y=3,
stride_x=2,
stride_y=2,
+ groups=1,
pad=True,
randmin=TestSettings.INT8_MIN,
randmax=TestSettings.INT8_MAX,
@@ -84,11 +85,23 @@ def __init__(self,
int4_weights=int4_weights)
self.scaling_factors = []
+ self.groups = groups
if self.test_type == 'depthwise_conv':
self.channel_multiplier = self.output_ch // self.input_ch
if self.output_ch % self.input_ch != 0:
raise RuntimeError("out channel ({}) is not multiple of in channel ({})".format(out_ch, in_ch))
+ if groups != 1:
+ raise RuntimeError("ERROR: Groups cannot be used for depthwise convolution")
+
+ self.filter_ch = in_ch // groups
+ if in_ch % groups != 0:
+ print(in_ch)
+ print(groups)
+ raise RuntimeError("ERROR: Number of input channels must be an even multiple of groups")
+ if out_ch % groups != 0:
+ raise RuntimeError("ERROR: Number of output channels must be an even multiple of groups")
+
else:
self.channel_multiplier = 0
@@ -113,6 +126,8 @@ def write_c_config_header(self) -> None:
f.write("#define {}_OUTPUT_OFFSET {}\n".format(prefix, self.output_zero_point))
f.write("#define {}_DILATION_X {}\n".format(prefix, self.dilation_x))
f.write("#define {}_DILATION_Y {}\n".format(prefix, self.dilation_y))
+ if self.groups != 1:
+ f.write("#define {}_FILTER_CH {}\n".format(prefix, self.filter_ch))
if self.test_type == 'transpose_conv':
f.write("#define {}_PAD_X_WITH_OFFSET {}\n".format(prefix, self.pad_x_with_offset))
f.write("#define {}_PAD_Y_WITH_OFFSET {}\n".format(prefix, self.pad_y_with_offset))
@@ -279,7 +294,7 @@ def generate_data(self, input_data=None, weights=None, biases=None) -> None:
if self.test_type == 'transpose_conv':
weight_shape = [self.filter_y, self.filter_x, out_channel, self.input_ch]
else:
- weight_shape = [self.filter_y, self.filter_x, self.input_ch, out_channel]
+ weight_shape = [self.filter_y, self.filter_x, self.filter_ch, out_channel]
if weights is not None:
weights = tf.reshape(weights, weight_shape)
@@ -301,7 +316,8 @@ def generate_data(self, input_data=None, weights=None, biases=None) -> None:
strides=(self.stride_y, self.stride_x),
padding=self.padding,
input_shape=input_shape[1:],
- dilation_rate=(self.dilation_y, self.dilation_x))
+ dilation_rate=(self.dilation_y, self.dilation_x),
+ groups=self.groups)
model.add(conv_layer)
conv_layer.set_weights([weights, biases])
elif self.test_type == 'depthwise_conv':
diff --git a/Tests/UnitTest/generate_test_data.py b/Tests/UnitTest/generate_test_data.py
index 9f8ba15a..b82c53d7 100755
--- a/Tests/UnitTest/generate_test_data.py
+++ b/Tests/UnitTest/generate_test_data.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python3
#
-# SPDX-FileCopyrightText: Copyright 2010-2023 Arm Limited and/or its affiliates
+# SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -642,7 +642,86 @@ def load_testdata_sets(regenerate_input, regenerate_weights, regenerate_biases,
int16xint8=True,
dilation_x=2,
interpreter=interpreter)
- dataset = 'basic_int4'
+ dataset = 'grouped_conv_1'
+ testdata_sets[dataset] = ConvSettings(dataset,
+ type_of_test,
+ regenerate_weights,
+ regenerate_input,
+ regenerate_biases,
+ schema_file,
+ in_ch=2,
+ out_ch=6,
+ groups=2,
+ x_in=5,
+ y_in=5,
+ w_x=2,
+ w_y=2,
+ generate_bias=False,
+ stride_x=1,
+ stride_y=1,
+ pad=False,
+ batches=2,
+ interpreter=interpreter)
+ dataset = 'grouped_conv_2'
+ testdata_sets[dataset] = ConvSettings(dataset,
+ type_of_test,
+ regenerate_weights,
+ regenerate_input,
+ regenerate_biases,
+ schema_file,
+ in_ch=4,
+ out_ch=2,
+ groups=2,
+ x_in=7,
+ y_in=3,
+ w_x=1,
+ w_y=2,
+ generate_bias=True,
+ stride_x=1,
+ stride_y=1,
+ pad=False,
+ interpreter=interpreter)
+ dataset = 'grouped_conv_3'
+ testdata_sets[dataset] = ConvSettings(dataset,
+ type_of_test,
+ regenerate_weights,
+ regenerate_input,
+ regenerate_biases,
+ schema_file,
+ in_ch=2,
+ out_ch=4,
+ groups=2,
+ x_in=3,
+ y_in=2,
+ w_x=3,
+ w_y=2,
+ generate_bias=True,
+ stride_x=2,
+ stride_y=2,
+ pad=True,
+ batches=2,
+ interpreter=interpreter)
+ dataset = 'grouped_conv_4'
+ testdata_sets[dataset] = ConvSettings(dataset,
+ type_of_test,
+ regenerate_weights,
+ regenerate_input,
+ regenerate_biases,
+ schema_file,
+ in_ch=3,
+ out_ch=6,
+ groups=3,
+ x_in=9,
+ y_in=9,
+ w_x=2,
+ w_y=2,
+ generate_bias=True,
+ stride_x=1,
+ stride_y=1,
+ dilation_x=3,
+ dilation_y=3,
+ pad=True,
+ interpreter=interpreter) dataset = 'basic_int4'
testdata_sets[dataset] = ConvSettings(dataset,
type_of_test,
regenerate_weights,