diff --git a/Include/arm_nnfunctions.h b/Include/arm_nnfunctions.h index d8062f80..ade3417c 100644 --- a/Include/arm_nnfunctions.h +++ b/Include/arm_nnfunctions.h @@ -21,8 +21,8 @@ * Title: arm_nnfunctions.h * Description: Public header file for CMSIS NN Library * - * $Date: 9 January 2024 - * $Revision: V.12.5.1 + * $Date: 11 January 2024 + * $Revision: V.12.6.0 * * Target : Arm(R) M-Profile Architecture * -------------------------------------------------------------------- */ @@ -365,8 +365,10 @@ arm_cmsis_nn_status arm_convolve_s4(const cmsis_nn_context *ctx, * It contains the multiplier and shift values to be applied to each output channel * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] * @param[in] input_data Input (activation) data pointer. Data type: int8 - * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the - * spatial filter dimensions + * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, CK] where HK, WK and CK are the + * spatial filter dimensions. CK != C_IN is used for grouped convolution, in which + * case the required conditions are C_IN = N * CK and C_OUT = N * M for N groups of + * size M. * @param[in] filter_data Filter data pointer. Data type: int8 * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] * @param[in] bias_data Optional bias data pointer. Data type: int32 diff --git a/Include/arm_nnsupportfunctions.h b/Include/arm_nnsupportfunctions.h index 9c0ebda5..20cbfd38 100644 --- a/Include/arm_nnsupportfunctions.h +++ b/Include/arm_nnsupportfunctions.h @@ -21,8 +21,8 @@ * Title: arm_nnsupportfunctions.h * Description: Public header file of support functions for CMSIS NN Library * - * $Date: 9 January 2024 - * $Revision: V.17.6.3 + * $Date: 11 January 2024 + * $Revision: V.17.7.0 * * Target : Arm(R) M-Profile Architecture * -------------------------------------------------------------------- */ @@ -423,6 +423,7 @@ arm_cmsis_nn_status arm_nn_mat_mult_nt_t_s4(const int8_t *lhs, * @param[in] dst_offset Offset to be applied the output result * @param[in] activation_min Minimum value to clamp down the output. Range : int8 * @param[in] activation_max Maximum value to clamp up the output. Range : int8 + * @param[in] row_address_offset Address offset between rows in output. NOTE: Only used for MVEI extension. * @param[in] lhs_cols_offset Column offset between subsequent lhs_rows * * @return The function returns ARM_CMSIS_NN_SUCCESS @@ -441,6 +442,7 @@ arm_cmsis_nn_status arm_nn_mat_mult_nt_t_s8(const int8_t *lhs, const int32_t dst_offset, const int32_t activation_min, const int32_t activation_max, + const int32_t row_address_offset, const int32_t lhs_cols_offset); /** @@ -1006,6 +1008,47 @@ int8_t *arm_nn_mat_mult_kernel_s8_s16(const int8_t *input_a, const int32_t *const output_bias, int8_t *out_0); +/** + * @brief Matrix-multiplication function for convolution with per-channel requantization, supporting an address offset + * between rows. + * @param[in] input_a pointer to operand A + * @param[in] input_b pointer to operand B, always consists of 2 vectors. + * @param[in] output_ch number of rows of A + * @param[in] out_shift pointer to per output channel requantization shift parameter. + * @param[in] out_mult pointer to per output channel requantization multiplier parameter. + * @param[in] out_offset output tensor offset. + * @param[in] activation_min minimum value to clamp the output to. Range : int8 + * @param[in] activation_max maximum value to clamp the output to. Range : int8 + * @param[in] num_col_a number of columns of A + * @param[in] aligned_num_col_a number of columns of A aligned by 4 + * @param[in] output_bias per output channel bias. Range : int32 + * @param[in] row_address_offset address offset between rows in the output + * @param[in,out] out_0 pointer to output + * @return The function returns one of the two + * 1. The incremented output pointer for a successful operation or + * 2. NULL if implementation is not available. + * + * @details This function does the matrix multiplication of weight matrix for all output channels + * with 2 columns from im2col and produces two elements/output_channel. The outputs are + * clamped in the range provided by activation min and max. + * + * This function is slighly less performant than arm_nn_mat_mult_kernel_s8_s16, but allows support for + * grouped convolution. Supported framework: TensorFlow Lite micro. + */ +int8_t *arm_nn_mat_mult_kernel_row_offset_s8_s16(const int8_t *input_a, + const int16_t *input_b, + const uint16_t output_ch, + const int32_t *out_shift, + const int32_t *out_mult, + const int32_t out_offset, + const int16_t activation_min, + const int16_t activation_max, + const int32_t num_col_a, + const int32_t aligned_num_col_a, + const int32_t *const output_bias, + const int32_t row_address_offset, + int8_t *out_0); + /** * @brief Common softmax function for s8 input and s8 or s16 output * @param[in] input Pointer to the input tensor diff --git a/Source/ConvolutionFunctions/arm_convolve_1_x_n_s8.c b/Source/ConvolutionFunctions/arm_convolve_1_x_n_s8.c index 9e2aeb05..a255fe5e 100644 --- a/Source/ConvolutionFunctions/arm_convolve_1_x_n_s8.c +++ b/Source/ConvolutionFunctions/arm_convolve_1_x_n_s8.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright 2010-2023 Arm Limited and/or its affiliates + * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates * * SPDX-License-Identifier: Apache-2.0 * @@ -21,8 +21,8 @@ * Title: arm_convolve_1_x_n_s8.c * Description: s8 version of 1xN convolution using symmetric quantization. * - * $Date: 8 March 2023 - * $Revision: V.3.4.0 + * $Date: 04 January 2024 + * $Revision: V.3.5.0 * * Target : Arm(R) M-Profile Architecture * @@ -30,7 +30,6 @@ #include "arm_nnfunctions.h" #include "arm_nnsupportfunctions.h" - /** * @ingroup Public */ @@ -149,6 +148,7 @@ arm_cmsis_nn_status arm_convolve_1_x_n_s8(const cmsis_nn_context *ctx, conv_params->output_offset, conv_params->activation.min, conv_params->activation.max, + rhs_rows, lhs_offset); output_data += lhs_rows * rhs_rows; diff --git a/Source/ConvolutionFunctions/arm_convolve_1x1_s8.c b/Source/ConvolutionFunctions/arm_convolve_1x1_s8.c index 7f3030eb..7cebffc0 100644 --- a/Source/ConvolutionFunctions/arm_convolve_1x1_s8.c +++ b/Source/ConvolutionFunctions/arm_convolve_1x1_s8.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright 2022-2023 Arm Limited and/or its affiliates + * SPDX-FileCopyrightText: Copyright 2022-2024 Arm Limited and/or its affiliates * * SPDX-License-Identifier: Apache-2.0 * @@ -21,8 +21,8 @@ * Title: arm_convolve_1x1_s8.c * Description: Generic s8 version of 1x1 convolution * - * $Date: 20 January 2023 - * $Revision: V.1.0.1 + * $Date: 04 January 2024 + * $Revision: V.1.1.0 * * Target : Arm(R) M-Profile Architecture * @@ -96,6 +96,7 @@ arm_cmsis_nn_status arm_convolve_1x1_s8(const cmsis_nn_context *ctx, conv_params->output_offset, conv_params->activation.min, conv_params->activation.max, + rhs_rows, rhs_cols * stride_w); if (result != ARM_CMSIS_NN_SUCCESS) { diff --git a/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c b/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c index e832d0b3..8a400f2b 100644 --- a/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c +++ b/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright 2010-2023 Arm Limited and/or its affiliates + * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates * * SPDX-License-Identifier: Apache-2.0 * @@ -21,8 +21,8 @@ * Title: arm_convolve_1x1_s8_fast.c * Description: Fast s8 version of 1x1 convolution (non-square shape) * - * $Date: 30 October 2023 - * $Revision: V.3.4.0 + * $Date: 04 January 2024 + * $Revision: V.3.5.0 * * Target : Arm(R) M-Profile Architecture * @@ -86,6 +86,7 @@ arm_cmsis_nn_status arm_convolve_1x1_s8_fast(const cmsis_nn_context *ctx, conv_params->output_offset, conv_params->activation.min, conv_params->activation.max, + rhs_rows, rhs_cols); /* Return to application */ diff --git a/Source/ConvolutionFunctions/arm_convolve_s8.c b/Source/ConvolutionFunctions/arm_convolve_s8.c index 0b14eaf4..8c4ac67a 100644 --- a/Source/ConvolutionFunctions/arm_convolve_s8.c +++ b/Source/ConvolutionFunctions/arm_convolve_s8.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright 2010-2023 Arm Limited and/or its affiliates + * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates * * SPDX-License-Identifier: Apache-2.0 * @@ -21,8 +21,8 @@ * Title: arm_convolve_s8.c * Description: s8 version of convolution using symmetric quantization. * - * $Date: 08 June 2023 - * $Revision: V.3.5.0 + * $Date: 04 January 2024 + * $Revision: V.3.6.0 * * Target : Arm(R) M-Profile Architecture * @@ -30,7 +30,6 @@ #include "arm_nnfunctions.h" #include "arm_nnsupportfunctions.h" - /** * @ingroup Public */ @@ -73,6 +72,7 @@ arm_cmsis_nn_status arm_convolve_s8(const cmsis_nn_context *ctx, const uint16_t input_ch = input_dims->c; const uint16_t kernel_x = filter_dims->w; const uint16_t kernel_y = filter_dims->h; + const uint16_t kernel_ch = filter_dims->c; const uint16_t output_x = output_dims->w; const uint16_t output_y = output_dims->h; const uint16_t output_ch = output_dims->c; @@ -86,20 +86,26 @@ arm_cmsis_nn_status arm_convolve_s8(const cmsis_nn_context *ctx, const int32_t out_offset = conv_params->output_offset; const int32_t out_activation_min = conv_params->activation.min; const int32_t out_activation_max = conv_params->activation.max; - const int32_t rhs_cols = kernel_x * kernel_y * input_ch; const int32_t input_offset = conv_params->input_offset; + const int32_t groups = input_ch / kernel_ch; + const int32_t rhs_cols = kernel_x * kernel_y * kernel_ch; + const int32_t output_ch_per_group = output_ch / groups; + int32_t *output_mult = quant_params->multiplier; int32_t *output_shift = quant_params->shift; + if (input_ch % groups != 0 || output_ch % groups != 0) + { + return ARM_CMSIS_NN_ARG_ERROR; + } + int i_batch; for (i_batch = 0; i_batch < input_batches; i_batch++) { - #if defined(ARM_MATH_MVEI) /* Generate up to four columns from the input tensor a GEMM computation */ int8_t *im2col_buf = (int8_t *)buffer_a; - const int32_t rhs_rows = output_dims->c; #else const int32_t remainder = rhs_cols % 4; const int32_t aligned_rhs_cols = remainder != 0 ? rhs_cols + 4 - remainder : rhs_cols; @@ -108,175 +114,221 @@ arm_cmsis_nn_status arm_convolve_s8(const cmsis_nn_context *ctx, int8_t *im2col_buf = (int8_t *)buffer_a + aligned_rhs_cols * 2; int16_t *im2col_buf_start_s16 = buffer_a; #endif - int8_t *out = output_data; int32_t lhs_rows = 0; + const int8_t *filter_data_ptr = &filter_data[0]; + const int32_t *bias_data_ptr = &bias_data[0]; + const int32_t *output_mult_ptr = &output_mult[0]; + const int32_t *output_shift_ptr = &output_shift[0]; + /* This part implements the im2col function */ - for (int i_out_y = 0; i_out_y < output_y; i_out_y++) + for (int32_t i_group = 0; i_group < groups; i_group++) { - for (int i_out_x = 0; i_out_x < output_x; i_out_x++) + int8_t *out = output_data + i_group * output_ch_per_group; + for (int i_out_y = 0; i_out_y < output_y; i_out_y++) { - const int32_t base_idx_x = stride_x * i_out_x - pad_x; - const int32_t base_idx_y = stride_y * i_out_y - pad_y; - - for (int32_t i_ker_y = 0; i_ker_y < kernel_y; i_ker_y++) + for (int i_out_x = 0; i_out_x < output_x; i_out_x++) { - for (int32_t i_ker_x = 0; i_ker_x < kernel_x; i_ker_x++) - { - const int32_t k_y = base_idx_y + dilation_y * i_ker_y; - const int32_t k_x = base_idx_x + dilation_x * i_ker_x; + const int32_t base_idx_x = stride_x * i_out_x - pad_x; + const int32_t base_idx_y = stride_y * i_out_y - pad_y; - if (k_y < 0 || k_y >= input_y || k_x < 0 || k_x >= input_x) - { - arm_memset_s8(im2col_buf, (int8_t)-input_offset, sizeof(int8_t) * input_ch); - } - else + for (int32_t i_ker_y = 0; i_ker_y < kernel_y; i_ker_y++) + { + for (int32_t i_ker_x = 0; i_ker_x < kernel_x; i_ker_x++) { - arm_memcpy_s8(im2col_buf, input_data + (k_y * input_x + k_x) * input_ch, input_ch); + const int32_t k_y = base_idx_y + dilation_y * i_ker_y; + const int32_t k_x = base_idx_x + dilation_x * i_ker_x; + + if (k_y < 0 || k_y >= input_y || k_x < 0 || k_x >= input_x) + { + arm_memset_s8(im2col_buf, (int8_t)-input_offset, sizeof(int8_t) * kernel_ch); + } + else + { + arm_memcpy_s8(im2col_buf, + input_data + (k_y * input_x + k_x) * input_ch + i_group * kernel_ch, + sizeof(int8_t) * kernel_ch); + } + im2col_buf += kernel_ch; } - im2col_buf += input_ch; } - } - lhs_rows++; + lhs_rows++; #if defined(ARM_MATH_MVEI) - /* Computation is filed for every 4 columns */ - if (lhs_rows == 4) - { - arm_nn_mat_mult_nt_t_s8((int8_t *)buffer_a, - filter_data, - bias_data, - out, - output_mult, - output_shift, - lhs_rows, - rhs_rows, - rhs_cols, - input_offset, - out_offset, - out_activation_min, - out_activation_max, - rhs_cols); - out += lhs_rows * rhs_rows; - - lhs_rows = 0; - im2col_buf = (int8_t *)buffer_a; - } + + /* Computation is filed for every 4 columns */ + if (lhs_rows == 4) + { + arm_nn_mat_mult_nt_t_s8((int8_t *)buffer_a, + filter_data_ptr, + bias_data_ptr, + out, + output_mult_ptr, + output_shift_ptr, + lhs_rows, + output_ch_per_group, + rhs_cols, + input_offset, + out_offset, + out_activation_min, + out_activation_max, + output_ch, + rhs_cols); + + out += lhs_rows * output_ch; + + lhs_rows = 0; + im2col_buf = (int8_t *)buffer_a; + } #else #if defined(ARM_MATH_DSP) - /* Copy one column with input offset and no ordering */ - arm_s8_to_s16_unordered_with_offset( - im2col_buf - rhs_cols, im2col_buf_start_s16, rhs_cols, (int16_t)input_offset); + /* Copy one column with input offset and no ordering */ + arm_s8_to_s16_unordered_with_offset( + im2col_buf - rhs_cols, im2col_buf_start_s16, rhs_cols, (int16_t)input_offset); #else - arm_q7_to_q15_with_offset(im2col_buf - rhs_cols, im2col_buf_start_s16, rhs_cols, (int16_t)input_offset); + + arm_q7_to_q15_with_offset( + im2col_buf - rhs_cols, im2col_buf_start_s16, rhs_cols, (int16_t)input_offset); + #endif - im2col_buf_start_s16 += aligned_rhs_cols; + im2col_buf_start_s16 += aligned_rhs_cols; - if (lhs_rows == 2) - { - out = arm_nn_mat_mult_kernel_s8_s16(filter_data, - buffer_a, - output_ch, - output_shift, - output_mult, - out_offset, - out_activation_min, - out_activation_max, - rhs_cols, - aligned_rhs_cols, - bias_data, - out); - - /* counter reset */ - im2col_buf_start_s16 = buffer_a; - im2col_buf = (int8_t *)buffer_a + aligned_rhs_cols * 2; - lhs_rows = 0; - } + if (lhs_rows == 2) + { + if (groups > 1) + { + out = arm_nn_mat_mult_kernel_row_offset_s8_s16(filter_data_ptr, + buffer_a, + output_ch_per_group, + output_shift_ptr, + output_mult_ptr, + out_offset, + out_activation_min, + out_activation_max, + rhs_cols, + aligned_rhs_cols, + bias_data_ptr, + output_ch, + out); + } + else + { + out = arm_nn_mat_mult_kernel_s8_s16(filter_data_ptr, + buffer_a, + output_ch_per_group, + output_shift_ptr, + output_mult_ptr, + out_offset, + out_activation_min, + out_activation_max, + rhs_cols, + aligned_rhs_cols, + bias_data_ptr, + out); + } + + /* counter reset */ + im2col_buf_start_s16 = buffer_a; + im2col_buf = (int8_t *)buffer_a + aligned_rhs_cols * 2; + lhs_rows = 0; + } #endif + } } if (out == NULL) { return ARM_CMSIS_NN_NO_IMPL_ERROR; } - } - /* Handle left over columns */ - if (lhs_rows != 0) - { + /* Handle left over columns */ + if (lhs_rows != 0) + { #if defined(ARM_MATH_MVEI) - arm_nn_mat_mult_nt_t_s8((int8_t *)buffer_a, - filter_data, - bias_data, - out, - output_mult, - output_shift, - lhs_rows, - rhs_rows, - rhs_cols, - input_offset, - out_offset, - out_activation_min, - out_activation_max, - rhs_cols); - out += lhs_rows * rhs_rows; - lhs_rows = 0; - im2col_buf = (int8_t *)buffer_a; + arm_nn_mat_mult_nt_t_s8((int8_t *)buffer_a, + filter_data_ptr, + bias_data_ptr, + out, + output_mult_ptr, + output_shift_ptr, + lhs_rows, + output_ch_per_group, + rhs_cols, + input_offset, + out_offset, + out_activation_min, + out_activation_max, + output_ch, + rhs_cols); + + out += lhs_rows * output_ch; + lhs_rows = 0; + im2col_buf = (int8_t *)buffer_a; #else // #if defined(ARM_MATH_MVEI) - const int8_t *ker_a = filter_data; - int i; + const int8_t *ker_a = filter_data_ptr; + int i; - for (i = 0; i < output_ch; i++) - { - /* Load the accumulator with bias first */ - int32_t sum = 0; - if (bias_data) + for (i = 0; i < output_ch_per_group; i++) { - sum = bias_data[i]; - } + /* Load the accumulator with bias first */ + int32_t sum = 0; + if (bias_data_ptr) + { + sum = bias_data_ptr[i]; + } - const int16_t *ip_as_col = buffer_a; + const int16_t *ip_as_col = buffer_a; #if defined(ARM_MATH_DSP) - /* 4 multiply and accumulates are done in one loop. */ - uint16_t col_count = rhs_cols / 4; - while (col_count) - { - int32_t ker_a1, ker_a2; - int32_t ip_b1, ip_b2; + /* 4 multiply and accumulates are done in one loop. */ + uint16_t col_count = rhs_cols / 4; + while (col_count) + { + int32_t ker_a1, ker_a2; + int32_t ip_b1, ip_b2; - ker_a = read_and_pad_reordered(ker_a, &ker_a1, &ker_a2); + ker_a = read_and_pad_reordered(ker_a, &ker_a1, &ker_a2); - ip_b1 = arm_nn_read_q15x2_ia(&ip_as_col); - sum = SMLAD(ker_a1, ip_b1, sum); - ip_b2 = arm_nn_read_q15x2_ia(&ip_as_col); - sum = SMLAD(ker_a2, ip_b2, sum); + ip_b1 = arm_nn_read_q15x2_ia(&ip_as_col); + sum = SMLAD(ker_a1, ip_b1, sum); + ip_b2 = arm_nn_read_q15x2_ia(&ip_as_col); + sum = SMLAD(ker_a2, ip_b2, sum); - col_count--; - } - /* Handle left over mac */ - col_count = rhs_cols & 0x3; + col_count--; + } + /* Handle left over mac */ + col_count = rhs_cols & 0x3; #else - uint16_t col_count = rhs_cols; + uint16_t col_count = rhs_cols; + #endif - while (col_count) - { - int8_t ker_a1 = *ker_a++; - int16_t ip_b1 = *ip_as_col++; - sum += ker_a1 * ip_b1; - col_count--; + while (col_count) + { + int8_t ker_a1 = *ker_a++; + int16_t ip_b1 = *ip_as_col++; + + sum += ker_a1 * ip_b1; + col_count--; + } + + sum = arm_nn_requantize(sum, output_mult_ptr[i], output_shift_ptr[i]); + sum += out_offset; + sum = MAX(sum, out_activation_min); + sum = MIN(sum, out_activation_max); + *out++ = (int8_t)sum; } - sum = arm_nn_requantize(sum, output_mult[i], output_shift[i]); - sum += out_offset; - sum = MAX(sum, out_activation_min); - sum = MIN(sum, out_activation_max); - *out++ = (int8_t)sum; - } + im2col_buf_start_s16 = buffer_a; + im2col_buf = (int8_t *)buffer_a + aligned_rhs_cols * 2; + lhs_rows = 0; #endif // #if defined(ARM_MATH_MVEI) + } + filter_data_ptr += output_ch_per_group * rhs_cols; + bias_data_ptr += output_ch_per_group; + output_mult_ptr += output_ch_per_group; + output_shift_ptr += output_ch_per_group; } - /* Advance to the next batch */ input_data += (input_x * input_y * input_ch); output_data += (output_x * output_y * output_ch); diff --git a/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c b/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c index 07f29327..98f90585 100644 --- a/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c +++ b/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright 2010-2023 Arm Limited and/or its affiliates + * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates * * SPDX-License-Identifier: Apache-2.0 * @@ -22,8 +22,8 @@ * Description: s8 convolution layer wrapper function with the main purpose to call the optimal kernel available in * cmsis-nn to perform the convolution. * - * $Date: 8 March 2023 - * $Revision: V.2.4.0 + * $Date: 04 January 2024 + * $Revision: V.2.5.0 * * Target : Arm(R) M-Profile Architecture * @@ -60,7 +60,8 @@ arm_cmsis_nn_status arm_convolve_wrapper_s8(const cmsis_nn_context *ctx, int8_t *output_data) { if ((conv_params->padding.w == 0) && (conv_params->padding.h == 0) && (filter_dims->w == 1) && - (filter_dims->h == 1) && (conv_params->dilation.w == 1 && conv_params->dilation.h == 1)) + (filter_dims->h == 1) && (conv_params->dilation.w == 1 && conv_params->dilation.h == 1) && + (input_dims->c == filter_dims->c)) { if ((conv_params->stride.w == 1) && (conv_params->stride.h == 1)) { @@ -92,7 +93,7 @@ arm_cmsis_nn_status arm_convolve_wrapper_s8(const cmsis_nn_context *ctx, } } else if ((input_dims->h == 1) && conv_params->dilation.w == 1 && (filter_dims->h == 1) && - ((conv_params->stride.w * input_dims->c) % 4 == 0)) + ((conv_params->stride.w * input_dims->c) % 4 == 0) && (input_dims->c == filter_dims->c)) { return arm_convolve_1_x_n_s8(ctx, conv_params, diff --git a/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_row_offset_s8_s16.c b/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_row_offset_s8_s16.c new file mode 100644 index 00000000..7a400e12 --- /dev/null +++ b/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_row_offset_s8_s16.c @@ -0,0 +1,253 @@ +/* + * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------- + * Project: CMSIS NN Library + * Title: arm_nn_mat_mult_kernel_row_offset_s8_s16.c + * Description: Matrix-multiplication function for grouped convolution + * + * $Date: 04 January 2024 + * $Revision: V.1.0.0 + * + * Target : Arm(R) M-Profile Architecture + * -------------------------------------------------------------------- */ + +#include "arm_nnfunctions.h" +#include "arm_nnsupportfunctions.h" +/* + * Matrix-multiplication function for convolution with per-channel requantization, supporting an address offset between + * rows. + * + * Refer header file for details. + * + */ + +int8_t *arm_nn_mat_mult_kernel_row_offset_s8_s16(const int8_t *input_a, + const int16_t *input_b, + const uint16_t output_ch, + const int32_t *out_shift, + const int32_t *out_mult, + const int32_t out_offset, + const int16_t activation_min, + const int16_t activation_max, + const int32_t num_col_a, + const int32_t aligned_num_col_a, + const int32_t *const output_bias, + const int32_t row_address_offset, + int8_t *out_0) +{ + +#if !defined(ARM_MATH_MVEI) + /* set up the second output pointers */ + + int8_t *out_1 = out_0 + row_address_offset; + const int32_t *bias = output_bias; + + uint16_t row_count = output_ch / 2; + const int8_t *ip_a0 = input_a; + /* this loop over rows in A */ + while (row_count) + { + /* setup pointers for B */ + const int16_t *ip_b0 = input_b; + const int16_t *ip_b1 = ip_b0 + aligned_num_col_a; + + /* align the second pointer for A */ + const int8_t *ip_a1 = ip_a0 + num_col_a; + + int32_t ch_0_out_0 = 0; + int32_t ch_0_out_1 = 0; + int32_t ch_1_out_0 = 0; + int32_t ch_1_out_1 = 0; + /* Init accumulator with bias for channel N and N + 1 */ + if (bias) + { + ch_0_out_0 = *bias; + ch_0_out_1 = *bias++; + ch_1_out_0 = *bias; + ch_1_out_1 = *bias++; + } + + #if defined(ARM_MATH_DSP) + int32_t col_count = num_col_a / 4; + /* accumulate over the vector */ + while (col_count) + { + int32_t a01, a02, a11, a12; + int32_t b0 = arm_nn_read_q15x2_ia(&ip_b0); + int32_t b1 = arm_nn_read_q15x2_ia(&ip_b1); + + ip_a0 = read_and_pad_reordered(ip_a0, &a01, &a02); + ip_a1 = read_and_pad_reordered(ip_a1, &a11, &a12); + + ch_0_out_0 = SMLAD(a01, b0, ch_0_out_0); + ch_0_out_1 = SMLAD(a01, b1, ch_0_out_1); + ch_1_out_0 = SMLAD(a11, b0, ch_1_out_0); + ch_1_out_1 = SMLAD(a11, b1, ch_1_out_1); + + b0 = arm_nn_read_q15x2_ia(&ip_b0); + b1 = arm_nn_read_q15x2_ia(&ip_b1); + + ch_0_out_0 = SMLAD(a02, b0, ch_0_out_0); + ch_0_out_1 = SMLAD(a02, b1, ch_0_out_1); + ch_1_out_0 = SMLAD(a12, b0, ch_1_out_0); + ch_1_out_1 = SMLAD(a12, b1, ch_1_out_1); + + col_count--; + } /* while over col_count */ + + col_count = num_col_a & 0x3; + + #else + int32_t col_count = num_col_a; + #endif + while (col_count) + { + int8_t a0 = *ip_a0++; + int16_t b0 = *ip_b0++; + int8_t a1 = *ip_a1++; + int16_t b1 = *ip_b1++; + + ch_0_out_0 += a0 * b0; + ch_0_out_1 += a0 * b1; + ch_1_out_0 += a1 * b0; + ch_1_out_1 += a1 * b1; + col_count--; + } /* while over col_count */ + + ch_0_out_0 = arm_nn_requantize(ch_0_out_0, *out_mult, *out_shift); + ch_0_out_0 += out_offset; + ch_0_out_0 = MAX(ch_0_out_0, activation_min); + ch_0_out_0 = MIN(ch_0_out_0, activation_max); + *out_0++ = (int8_t)ch_0_out_0; + + ch_0_out_1 = arm_nn_requantize(ch_0_out_1, *out_mult, *out_shift); + ch_0_out_1 += out_offset; + ch_0_out_1 = MAX(ch_0_out_1, activation_min); + ch_0_out_1 = MIN(ch_0_out_1, activation_max); + *out_1++ = (int8_t)ch_0_out_1; + out_mult++; + out_shift++; + + ch_1_out_0 = arm_nn_requantize(ch_1_out_0, *out_mult, *out_shift); + ch_1_out_0 += out_offset; + ch_1_out_0 = MAX(ch_1_out_0, activation_min); + ch_1_out_0 = MIN(ch_1_out_0, activation_max); + *out_0++ = (int8_t)ch_1_out_0; + + ch_1_out_1 = arm_nn_requantize(ch_1_out_1, *out_mult, *out_shift); + ch_1_out_1 += out_offset; + ch_1_out_1 = MAX(ch_1_out_1, activation_min); + ch_1_out_1 = MIN(ch_1_out_1, activation_max); + *out_1++ = (int8_t)ch_1_out_1; + out_mult++; + out_shift++; + + /* skip row */ + ip_a0 += num_col_a; + row_count--; + } + + /* compute the last odd numbered row if any */ + if (output_ch & 0x1) + { + /* setup pointers for B */ + const int16_t *ip_b0 = input_b; + const int16_t *ip_b1 = ip_b0 + aligned_num_col_a; + + int32_t ch_0_out_0 = 0; + int32_t ch_0_out_1 = 0; + + /* load the bias */ + if (bias) + { + ch_0_out_0 = *bias; + ch_0_out_1 = *bias++; + } + + #if defined(ARM_MATH_DSP) + int32_t col_count = num_col_a >> 2; + while (col_count) + { + int32_t a01, a02; + int32_t b0 = arm_nn_read_q15x2_ia(&ip_b0); + int32_t b1 = arm_nn_read_q15x2_ia(&ip_b1); + + ip_a0 = read_and_pad_reordered(ip_a0, &a01, &a02); + + ch_0_out_0 = SMLAD(a01, b0, ch_0_out_0); + ch_0_out_1 = SMLAD(a01, b1, ch_0_out_1); + + b0 = arm_nn_read_q15x2_ia(&ip_b0); + b1 = arm_nn_read_q15x2_ia(&ip_b1); + ch_0_out_0 = SMLAD(a02, b0, ch_0_out_0); + ch_0_out_1 = SMLAD(a02, b1, ch_0_out_1); + + col_count--; + } + col_count = num_col_a & 0x3; + + #else + int32_t col_count = num_col_a; + #endif + while (col_count) + { + int8_t a0 = *ip_a0++; + int16_t b0 = *ip_b0++; + int16_t b1 = *ip_b1++; + + ch_0_out_0 += a0 * b0; + ch_0_out_1 += a0 * b1; + col_count--; + } + + ch_0_out_0 = arm_nn_requantize(ch_0_out_0, *out_mult, *out_shift); + ch_0_out_0 += out_offset; + ch_0_out_0 = MAX(ch_0_out_0, activation_min); + ch_0_out_0 = MIN(ch_0_out_0, activation_max); + *out_0++ = (int8_t)ch_0_out_0; + + ch_0_out_1 = arm_nn_requantize(ch_0_out_1, *out_mult, *out_shift); + ch_0_out_1 += out_offset; + ch_0_out_1 = MAX(ch_0_out_1, activation_min); + ch_0_out_1 = MIN(ch_0_out_1, activation_max); + *out_1++ = (int8_t)ch_0_out_1; + out_mult++; + out_shift++; + } + + out_0 += 2 * row_address_offset - output_ch; + + /* return the new output pointer with offset */ + return out_0; +#else + (void)input_a; + (void)input_b; + (void)output_ch; + (void)out_shift; + (void)out_mult; + (void)out_offset; + (void)activation_min; + (void)activation_max; + (void)aligned_num_col_a, (void)num_col_a; + (void)output_bias; + (void)row_address_offset; + (void)out_0; + return NULL; +#endif +} diff --git a/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c b/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c index 13c28c59..8b93ca49 100644 --- a/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c +++ b/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright 2020-2023 Arm Limited and/or its affiliates + * SPDX-FileCopyrightText: Copyright 2020-2024 Arm Limited and/or its affiliates * * SPDX-License-Identifier: Apache-2.0 * @@ -21,8 +21,8 @@ * Title: arm_nn_mat_mult_s8_nt_t_s8 * Description: Matrix multiplication support function with the right-hand-side (rhs) matrix transposed * - * $Date: 22 March 2023 - * $Revision: V.2.1.2 + * $Date: 04 January 2024 + * $Revision: V.3.0.0 * * Target : Arm(R) M-Profile Architecture * @@ -58,6 +58,7 @@ arm_cmsis_nn_status arm_nn_mat_mult_nt_t_s8(const int8_t *lhs, const int32_t dst_offset, const int32_t activation_min, const int32_t activation_max, + const int32_t row_address_offset, const int32_t lhs_cols_offset) { @@ -140,12 +141,13 @@ arm_cmsis_nn_status arm_nn_mat_mult_nt_t_s8(const int8_t *lhs, res = vmaxq_s32(res, vdupq_n_s32(activation_min)); res = vminq_s32(res, vdupq_n_s32(activation_max)); - const uint32x4_t scatter_offset = {0, (uint32_t)rhs_rows, (uint32_t)rhs_rows * 2, (uint32_t)rhs_rows * 3}; + const uint32x4_t scatter_offset = { + 0, (uint32_t)row_address_offset, (uint32_t)row_address_offset * 2, (uint32_t)row_address_offset * 3}; vstrbq_scatter_offset_s32(dst, scatter_offset, res); dst++; } lhs += 4 * lhs_cols_offset; - dst += (3 * rhs_rows); + dst += 4 * row_address_offset - rhs_rows; } for (; i_items < lhs_rows; i_items++) @@ -217,9 +219,11 @@ arm_cmsis_nn_status arm_nn_mat_mult_nt_t_s8(const int8_t *lhs, acc_n0 = MIN(acc_n0, activation_max); *dst++ = (int8_t)acc_n0; } + dst += row_address_offset - rhs_rows; } #elif defined(ARM_MATH_DSP) + (void)row_address_offset; const int32_t rhs_off0 = rhs_cols - 4; const int32_t lhs_off0 = lhs_cols_offset - 4; @@ -618,6 +622,7 @@ arm_cmsis_nn_status arm_nn_mat_mult_nt_t_s8(const int8_t *lhs, } } #else + (void)row_address_offset; for (int32_t rhs_rows_idx = 0; rhs_rows_idx <= (rhs_rows - 2); rhs_rows_idx += 2) { const int8_t *lhs_ptr = &lhs[0]; diff --git a/Tests/UnitTest/CMakeLists.txt b/Tests/UnitTest/CMakeLists.txt index 3f24afe1..c6c0ef03 100644 --- a/Tests/UnitTest/CMakeLists.txt +++ b/Tests/UnitTest/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright 2019-2023 Arm Limited and/or its affiliates +# SPDX-FileCopyrightText: Copyright 2019-2024 Arm Limited and/or its affiliates # # SPDX-License-Identifier: Apache-2.0 # @@ -96,6 +96,7 @@ add_subdirectory(TestCases/test_arm_elementwise_mul_s8) add_subdirectory(TestCases/test_arm_fully_connected_s16) add_subdirectory(TestCases/test_arm_fully_connected_s8) add_subdirectory(TestCases/test_arm_fully_connected_s4) +add_subdirectory(TestCases/test_arm_grouped_convolve_s8) add_subdirectory(TestCases/test_arm_lstm_unidirectional_s16_s8) add_subdirectory(TestCases/test_arm_max_pool_s16) add_subdirectory(TestCases/test_arm_max_pool_s8) diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv/biases_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv/biases_data.h new file mode 100644 index 00000000..35f54d09 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv/biases_data.h @@ -0,0 +1,6 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#pragma once +#include + +const int32_t grouped_conv_biases[1] = {-22573}; diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv/config_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv/config_data.h new file mode 100644 index 00000000..054820ba --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv/config_data.h @@ -0,0 +1,24 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#pragma once +#define GROUPED_CONV_OUT_CH 1 +#define GROUPED_CONV_IN_CH 1 +#define GROUPED_CONV_INPUT_W 7 +#define GROUPED_CONV_INPUT_H 7 +#define GROUPED_CONV_DST_SIZE 16 +#define GROUPED_CONV_INPUT_SIZE 49 +#define GROUPED_CONV_OUT_ACTIVATION_MIN -128 +#define GROUPED_CONV_OUT_ACTIVATION_MAX 127 +#define GROUPED_CONV_INPUT_BATCHES 1 +#define GROUPED_CONV_FILTER_X 3 +#define GROUPED_CONV_FILTER_Y 3 +#define GROUPED_CONV_STRIDE_X 2 +#define GROUPED_CONV_STRIDE_Y 2 +#define GROUPED_CONV_PAD_X 1 +#define GROUPED_CONV_PAD_Y 1 +#define GROUPED_CONV_OUTPUT_W 4 +#define GROUPED_CONV_OUTPUT_H 4 +#define GROUPED_CONV_INPUT_OFFSET 128 +#define GROUPED_CONV_OUTPUT_OFFSET 127 +#define GROUPED_CONV_DILATION_X 1 +#define GROUPED_CONV_DILATION_Y 1 diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv/input_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv/input_data.h new file mode 100644 index 00000000..e684c4ed --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv/input_data.h @@ -0,0 +1,9 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#pragma once +#include + +const int8_t grouped_conv_input[49] = {-97, 6, 19, -32, 76, 79, -74, 85, -66, -7, -29, 107, 118, + 65, 30, 45, -92, -123, -42, -25, -49, 74, -81, -2, -106, 3, + 69, -55, -13, -107, -59, -56, -30, 52, 124, 91, 90, 19, -71, + -47, 11, 113, 38, 77, -54, 24, 57, -90, 115}; diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv/output_mult_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv/output_mult_data.h new file mode 100644 index 00000000..b897ee2a --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv/output_mult_data.h @@ -0,0 +1,6 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#pragma once +#include + +const int32_t grouped_conv_output_mult[1] = {1972783891}; diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv/output_ref_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv/output_ref_data.h new file mode 100644 index 00000000..ff634014 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv/output_ref_data.h @@ -0,0 +1,6 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#pragma once +#include + +const int8_t grouped_conv_output_ref[16] = {112, 23, 7, 40, 48, 29, -15, 5, 28, 36, -12, 10, -7, -3, -16, -11}; diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv/output_shift_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv/output_shift_data.h new file mode 100644 index 00000000..190c42cb --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv/output_shift_data.h @@ -0,0 +1,6 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#pragma once +#include + +const int32_t grouped_conv_output_shift[1] = {-9}; diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv/test_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv/test_data.h new file mode 100644 index 00000000..2c64febe --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv/test_data.h @@ -0,0 +1,9 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#include "biases_data.h" +#include "config_data.h" +#include "input_data.h" +#include "output_mult_data.h" +#include "output_ref_data.h" +#include "output_shift_data.h" +#include "weights_data.h" diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv/weights_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv/weights_data.h new file mode 100644 index 00000000..b00fa385 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv/weights_data.h @@ -0,0 +1,6 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#pragma once +#include + +const int8_t grouped_conv_weights[9] = {-13, -73, -73, -113, -127, 4, -75, 117, -116}; diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_1/biases_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/biases_data.h new file mode 100644 index 00000000..09b09752 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/biases_data.h @@ -0,0 +1,6 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#pragma once +#include + +const int32_t grouped_conv_1_biases[6] = {0, 0, 0, 0, 0, 0}; diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_1/config_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/config_data.h new file mode 100644 index 00000000..8c0bb9c9 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/config_data.h @@ -0,0 +1,25 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#pragma once +#define GROUPED_CONV_1_OUT_CH 6 +#define GROUPED_CONV_1_IN_CH 2 +#define GROUPED_CONV_1_INPUT_W 5 +#define GROUPED_CONV_1_INPUT_H 5 +#define GROUPED_CONV_1_DST_SIZE 192 +#define GROUPED_CONV_1_INPUT_SIZE 50 +#define GROUPED_CONV_1_OUT_ACTIVATION_MIN -128 +#define GROUPED_CONV_1_OUT_ACTIVATION_MAX 127 +#define GROUPED_CONV_1_INPUT_BATCHES 2 +#define GROUPED_CONV_1_FILTER_X 2 +#define GROUPED_CONV_1_FILTER_Y 2 +#define GROUPED_CONV_1_STRIDE_X 1 +#define GROUPED_CONV_1_STRIDE_Y 1 +#define GROUPED_CONV_1_PAD_X 0 +#define GROUPED_CONV_1_PAD_Y 0 +#define GROUPED_CONV_1_OUTPUT_W 4 +#define GROUPED_CONV_1_OUTPUT_H 4 +#define GROUPED_CONV_1_INPUT_OFFSET 128 +#define GROUPED_CONV_1_OUTPUT_OFFSET -56 +#define GROUPED_CONV_1_DILATION_X 1 +#define GROUPED_CONV_1_DILATION_Y 1 +#define GROUPED_CONV_1_FILTER_CH 1 diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_1/input_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/input_data.h new file mode 100644 index 00000000..cd7ba022 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/input_data.h @@ -0,0 +1,11 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#pragma once +#include + +const int8_t grouped_conv_1_input[100] = { + 34, -101, 76, -112, -64, -54, 24, 76, -103, 12, 81, -64, 124, 27, -88, 18, -98, -40, -104, 24, + -105, -114, -108, -39, 46, 124, -28, -17, -117, -91, 89, -25, -5, 23, -11, 86, -45, -125, 72, -35, + 48, -126, 25, -6, -37, -114, 95, -48, -107, 115, -109, 123, 80, 81, -51, -118, -113, -85, -3, -75, + 21, -65, -120, -2, -110, -109, -98, -99, -83, -116, -50, 76, 33, 33, 92, 61, -14, 24, -103, 84, + 115, 79, 15, -62, -50, -102, -108, -128, 125, 63, 110, 100, -71, -47, 24, 33, 69, -27, -81, 40}; diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_1/output_mult_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/output_mult_data.h new file mode 100644 index 00000000..dff13443 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/output_mult_data.h @@ -0,0 +1,6 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#pragma once +#include + +const int32_t grouped_conv_1_output_mult[6] = {1438488183, 1655654472, 1389835298, 1454332884, 1200344887, 2074611644}; diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_1/output_ref_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/output_ref_data.h new file mode 100644 index 00000000..8dd8212c --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/output_ref_data.h @@ -0,0 +1,15 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#pragma once +#include + +const int8_t grouped_conv_1_output_ref[192] = { + -113, -39, 63, -87, -14, -19, -38, -1, 56, -33, 9, -18, -106, -62, -54, 11, 43, -19, -76, -43, -20, -76, + 51, -9, -128, -64, -57, -46, 14, -19, -109, -46, 28, -97, 61, 16, -35, -28, 26, -11, 38, -29, -42, -38, + -21, -2, 19, -35, -21, -21, 45, -47, 8, -14, -96, -56, -17, -17, 81, 16, -86, -36, 12, -2, 39, -52, + -69, -44, 36, -97, -12, -31, -90, -27, 52, -71, 23, -12, -78, -35, 14, 15, 43, -35, -95, -50, 33, -121, + -6, -39, -76, -30, -3, -82, 20, 9, -89, -49, -42, -64, 70, -8, -104, -48, -29, -60, -3, -56, -68, -50, + -33, -48, -35, -45, -96, -65, -56, -43, -29, -49, -71, -39, 33, -15, 39, -11, -43, -38, 49, -85, 19, -15, + -22, -22, 41, -38, 2, -22, -47, -38, -17, -78, 5, -9, -69, -28, 40, -1, 55, -31, -118, -44, -6, -17, + 34, -33, -100, -38, -11, -39, 19, -42, -97, -59, 28, -86, 59, 11, -74, -11, 48, -28, 34, -38, -98, -51, + 9, -88, -2, -18, -51, -33, 52, -41, -17, -37, -96, -47, -26, -19, 36, 1}; diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_1/output_shift_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/output_shift_data.h new file mode 100644 index 00000000..c5689f9b --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/output_shift_data.h @@ -0,0 +1,6 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#pragma once +#include + +const int32_t grouped_conv_1_output_shift[6] = {-8, -9, -8, -8, -8, -9}; diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_1/test_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/test_data.h new file mode 100644 index 00000000..2c64febe --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/test_data.h @@ -0,0 +1,9 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#include "biases_data.h" +#include "config_data.h" +#include "input_data.h" +#include "output_mult_data.h" +#include "output_ref_data.h" +#include "output_shift_data.h" +#include "weights_data.h" diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_1/weights_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/weights_data.h new file mode 100644 index 00000000..af93bc4d --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/weights_data.h @@ -0,0 +1,7 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#pragma once +#include + +const int8_t grouped_conv_1_weights[24] = {-42, -127, 99, -38, 50, -72, 127, -34, 65, -75, 127, 100, + -77, 116, 127, -126, 73, 127, 51, 77, -4, 55, -16, 127}; diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_2/biases_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/biases_data.h new file mode 100644 index 00000000..3ec466aa --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/biases_data.h @@ -0,0 +1,6 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#pragma once +#include + +const int32_t grouped_conv_2_biases[2] = {-22972, -40991}; diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_2/config_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/config_data.h new file mode 100644 index 00000000..ca166e5b --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/config_data.h @@ -0,0 +1,25 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#pragma once +#define GROUPED_CONV_2_OUT_CH 2 +#define GROUPED_CONV_2_IN_CH 4 +#define GROUPED_CONV_2_INPUT_W 7 +#define GROUPED_CONV_2_INPUT_H 3 +#define GROUPED_CONV_2_DST_SIZE 28 +#define GROUPED_CONV_2_INPUT_SIZE 84 +#define GROUPED_CONV_2_OUT_ACTIVATION_MIN -128 +#define GROUPED_CONV_2_OUT_ACTIVATION_MAX 127 +#define GROUPED_CONV_2_INPUT_BATCHES 1 +#define GROUPED_CONV_2_FILTER_X 1 +#define GROUPED_CONV_2_FILTER_Y 2 +#define GROUPED_CONV_2_STRIDE_X 1 +#define GROUPED_CONV_2_STRIDE_Y 1 +#define GROUPED_CONV_2_PAD_X 0 +#define GROUPED_CONV_2_PAD_Y 0 +#define GROUPED_CONV_2_OUTPUT_W 7 +#define GROUPED_CONV_2_OUTPUT_H 2 +#define GROUPED_CONV_2_INPUT_OFFSET 128 +#define GROUPED_CONV_2_OUTPUT_OFFSET 127 +#define GROUPED_CONV_2_DILATION_X 1 +#define GROUPED_CONV_2_DILATION_Y 1 +#define GROUPED_CONV_2_FILTER_CH 2 diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_2/input_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/input_data.h new file mode 100644 index 00000000..e6292c97 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/input_data.h @@ -0,0 +1,10 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#pragma once +#include + +const int8_t grouped_conv_2_input[84] = { + 84, -53, -94, -95, -64, -85, -94, 3, -21, -109, 123, -90, -108, -34, 97, -106, 104, 9, -106, -14, -110, + 116, 95, 8, 116, 77, -70, -51, -67, -84, 3, -10, -69, -92, 23, -116, 104, 86, 37, -97, -108, -108, + -90, 8, -56, 50, -48, -23, 102, -76, -73, 0, 5, 0, -8, -100, 112, 31, 41, 99, 116, 96, -46, + 93, -110, -96, -70, -61, 120, 62, 2, -70, -48, 56, -121, 117, -24, -21, 61, -104, 41, -77, -97, 99}; diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_2/output_mult_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/output_mult_data.h new file mode 100644 index 00000000..4ba76479 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/output_mult_data.h @@ -0,0 +1,6 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#pragma once +#include + +const int32_t grouped_conv_2_output_mult[2] = {1191882470, 1352029555}; diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_2/output_ref_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/output_ref_data.h new file mode 100644 index 00000000..a6626227 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/output_ref_data.h @@ -0,0 +1,7 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#pragma once +#include + +const int8_t grouped_conv_2_output_ref[28] = {127, -128, 3, -82, 63, -27, -93, -61, 1, -123, 36, -26, 103, -86, + 70, -128, 9, -128, 59, -51, 15, -99, -128, -128, 127, -95, 127, -128}; diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_2/output_shift_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/output_shift_data.h new file mode 100644 index 00000000..a1ce4b56 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/output_shift_data.h @@ -0,0 +1,6 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#pragma once +#include + +const int32_t grouped_conv_2_output_shift[2] = {-6, -7}; diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_2/test_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/test_data.h new file mode 100644 index 00000000..2c64febe --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/test_data.h @@ -0,0 +1,9 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#include "biases_data.h" +#include "config_data.h" +#include "input_data.h" +#include "output_mult_data.h" +#include "output_ref_data.h" +#include "output_shift_data.h" +#include "weights_data.h" diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_2/weights_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/weights_data.h new file mode 100644 index 00000000..0b6ef315 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/weights_data.h @@ -0,0 +1,6 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#pragma once +#include + +const int8_t grouped_conv_2_weights[8] = {120, -52, 127, -120, 96, 67, -79, -127}; diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_3/biases_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/biases_data.h new file mode 100644 index 00000000..3d1a303c --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/biases_data.h @@ -0,0 +1,6 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#pragma once +#include + +const int32_t grouped_conv_3_biases[4] = {-16494, 1675, -6184, -27704}; diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_3/config_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/config_data.h new file mode 100644 index 00000000..0ce23d83 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/config_data.h @@ -0,0 +1,25 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#pragma once +#define GROUPED_CONV_3_OUT_CH 4 +#define GROUPED_CONV_3_IN_CH 2 +#define GROUPED_CONV_3_INPUT_W 3 +#define GROUPED_CONV_3_INPUT_H 2 +#define GROUPED_CONV_3_DST_SIZE 16 +#define GROUPED_CONV_3_INPUT_SIZE 12 +#define GROUPED_CONV_3_OUT_ACTIVATION_MIN -128 +#define GROUPED_CONV_3_OUT_ACTIVATION_MAX 127 +#define GROUPED_CONV_3_INPUT_BATCHES 2 +#define GROUPED_CONV_3_FILTER_X 3 +#define GROUPED_CONV_3_FILTER_Y 2 +#define GROUPED_CONV_3_STRIDE_X 2 +#define GROUPED_CONV_3_STRIDE_Y 2 +#define GROUPED_CONV_3_PAD_X 1 +#define GROUPED_CONV_3_PAD_Y 0 +#define GROUPED_CONV_3_OUTPUT_W 2 +#define GROUPED_CONV_3_OUTPUT_H 1 +#define GROUPED_CONV_3_INPUT_OFFSET 128 +#define GROUPED_CONV_3_OUTPUT_OFFSET 47 +#define GROUPED_CONV_3_DILATION_X 1 +#define GROUPED_CONV_3_DILATION_Y 1 +#define GROUPED_CONV_3_FILTER_CH 1 diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_3/input_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/input_data.h new file mode 100644 index 00000000..09c0bdfe --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/input_data.h @@ -0,0 +1,7 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#pragma once +#include + +const int8_t grouped_conv_3_input[24] = {49, 51, -76, 16, 5, -65, 34, 38, -9, -95, -84, -104, + 41, -114, 85, -57, 58, -110, -3, -122, -115, -89, 60, 55}; diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_3/output_mult_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/output_mult_data.h new file mode 100644 index 00000000..bf56c239 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/output_mult_data.h @@ -0,0 +1,6 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#pragma once +#include + +const int32_t grouped_conv_3_output_mult[4] = {1096383366, 1906223722, 1992067095, 1833146972}; diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_3/output_ref_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/output_ref_data.h new file mode 100644 index 00000000..aad049e5 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/output_ref_data.h @@ -0,0 +1,6 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#pragma once +#include + +const int8_t grouped_conv_3_output_ref[16] = {-41, 43, -38, 16, -16, 77, 60, -6, -22, 30, 11, 7, -18, 93, 32, -7}; diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_3/output_shift_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/output_shift_data.h new file mode 100644 index 00000000..1bccd965 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/output_shift_data.h @@ -0,0 +1,6 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#pragma once +#include + +const int32_t grouped_conv_3_output_shift[4] = {-8, -9, -9, -9}; diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_3/test_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/test_data.h new file mode 100644 index 00000000..2c64febe --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/test_data.h @@ -0,0 +1,9 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#include "biases_data.h" +#include "config_data.h" +#include "input_data.h" +#include "output_mult_data.h" +#include "output_ref_data.h" +#include "output_shift_data.h" +#include "weights_data.h" diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_3/weights_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/weights_data.h new file mode 100644 index 00000000..9d7c6217 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/weights_data.h @@ -0,0 +1,7 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#pragma once +#include + +const int8_t grouped_conv_3_weights[24] = {117, -127, 67, -4, -92, 52, 127, 22, -50, 63, -37, 8, + 127, -66, -106, 14, -56, -125, -16, 38, -12, -127, 4, 108}; diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_4/biases_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/biases_data.h new file mode 100644 index 00000000..fa8fc17e --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/biases_data.h @@ -0,0 +1,6 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#pragma once +#include + +const int32_t grouped_conv_4_biases[6] = {-14062, 34706, -14058, 6024, 3930, 9268}; diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_4/config_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/config_data.h new file mode 100644 index 00000000..3ab708f9 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/config_data.h @@ -0,0 +1,25 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#pragma once +#define GROUPED_CONV_4_OUT_CH 6 +#define GROUPED_CONV_4_IN_CH 3 +#define GROUPED_CONV_4_INPUT_W 9 +#define GROUPED_CONV_4_INPUT_H 9 +#define GROUPED_CONV_4_DST_SIZE 486 +#define GROUPED_CONV_4_INPUT_SIZE 243 +#define GROUPED_CONV_4_OUT_ACTIVATION_MIN -128 +#define GROUPED_CONV_4_OUT_ACTIVATION_MAX 127 +#define GROUPED_CONV_4_INPUT_BATCHES 1 +#define GROUPED_CONV_4_FILTER_X 2 +#define GROUPED_CONV_4_FILTER_Y 2 +#define GROUPED_CONV_4_STRIDE_X 1 +#define GROUPED_CONV_4_STRIDE_Y 1 +#define GROUPED_CONV_4_PAD_X 1 +#define GROUPED_CONV_4_PAD_Y 1 +#define GROUPED_CONV_4_OUTPUT_W 9 +#define GROUPED_CONV_4_OUTPUT_H 9 +#define GROUPED_CONV_4_INPUT_OFFSET 128 +#define GROUPED_CONV_4_OUTPUT_OFFSET -62 +#define GROUPED_CONV_4_DILATION_X 3 +#define GROUPED_CONV_4_DILATION_Y 3 +#define GROUPED_CONV_4_FILTER_CH 1 diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_4/input_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/input_data.h new file mode 100644 index 00000000..f1d86ff5 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/input_data.h @@ -0,0 +1,19 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#pragma once +#include + +const int8_t grouped_conv_4_input[243] = { + 74, -104, -81, 46, -118, -58, -106, 95, 114, -72, 59, 89, 123, -2, -41, -104, 87, 23, 0, + 66, -119, -102, 103, 85, 35, 67, 59, -118, -108, -74, -67, -100, -55, 111, -58, -6, 89, -22, + -66, 45, -54, -62, -91, 87, 0, -122, 59, -78, 78, 59, -24, -46, 66, 125, 71, -15, 124, + 82, -21, 12, -119, 3, 126, 82, -29, 85, 14, -25, -91, 77, -23, -106, 107, -35, -14, 64, + 83, -39, 79, 29, 19, 92, 95, 91, 70, 21, 70, 38, 110, 49, 112, -41, -125, -97, 6, + -48, 36, -76, 48, -8, -79, 93, -33, -96, -61, 57, -90, -41, 36, -63, 92, -39, 76, -39, + 33, 65, -58, -13, -94, -91, 115, 104, -42, -11, -97, -66, -100, 82, 125, 67, 39, 75, -21, + 2, -53, 2, 120, 16, -39, 64, 70, -34, -52, 84, -114, 79, -80, 42, 86, 105, -53, 49, + -22, -37, -72, -115, -33, -99, 55, -57, 34, -126, 1, 123, 16, -61, -76, 25, 98, 111, 124, + -64, 97, 103, 5, 32, 76, -30, 6, -96, 106, -33, -36, -8, 56, -103, -61, -39, -41, -99, + 55, -112, 109, 106, -115, 58, 110, -55, -85, -81, -3, -20, -58, -21, 122, -16, 93, -37, 75, + -64, -90, -94, 47, -83, 15, -111, 97, -76, 69, 90, 91, -112, -87, -4, 18, -65, 79, 48, + -115, 19, -45, -31, -99, -38, -8, 29, -79, 75, 15, 10, -79, -46, 29}; diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_4/output_mult_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/output_mult_data.h new file mode 100644 index 00000000..c58a42ae --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/output_mult_data.h @@ -0,0 +1,6 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#pragma once +#include + +const int32_t grouped_conv_4_output_mult[6] = {1813678179, 1858473757, 1768903071, 1183163162, 1102339756, 1617679616}; diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_4/output_ref_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/output_ref_data.h new file mode 100644 index 00000000..e9328247 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/output_ref_data.h @@ -0,0 +1,28 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#pragma once +#include + +const int8_t grouped_conv_4_output_ref[486] = { + -83, -3, -62, -40, 5, -46, -77, 53, -44, -39, 20, -2, -93, 51, -45, -39, -32, -23, -47, 12, -40, -38, -24, + -3, -74, 57, -48, -40, -7, -10, -71, 40, -27, -32, -30, -41, -79, 54, -36, -35, -18, -43, -124, 47, -66, -46, + -43, -28, -117, 38, -42, -42, -45, -33, -52, 8, -14, -62, 49, -60, -30, 33, 1, -54, 29, -13, -65, 23, -14, + -49, 15, -4, -75, 41, 4, -5, 73, 21, -85, 51, -30, -17, 27, -6, -18, -25, -19, -38, 35, -29, -64, 45, + -39, -11, 50, 1, -78, 4, -73, 6, -31, -8, -96, 13, -76, 15, -21, 3, -44, 13, -42, -45, -7, -54, -79, + 43, -51, -53, -1, -3, -33, 27, 7, -29, 1, -22, -38, 6, -10, -49, 15, -20, -53, -7, -15, -29, 29, -31, + -43, 42, 18, -34, 36, -25, -73, 26, -28, -6, 50, -27, -89, 2, -40, 9, -23, 6, -75, 10, -49, 7, -21, + 7, -67, 3, -54, -61, 60, -61, -53, 3, 16, -9, 57, 13, -17, 1, 6, -13, 47, 12, -76, 25, -23, -13, + 29, 36, -16, -18, -19, -24, 10, -6, -57, 24, -6, -39, 39, -4, -32, -6, 2, -24, -3, -34, -51, -17, -72, + -22, -38, -25, -61, -10, -77, 10, -25, 1, -34, 15, -11, -64, 50, -56, -38, 3, 19, 24, 42, 20, -28, -7, + -27, -13, 54, 13, -62, 32, -3, 26, 45, 20, 5, -5, -15, -18, 47, -18, -73, 31, -15, 1, -1, -1, -46, + 3, -36, -29, 14, -14, -99, 30, -65, -33, -17, 9, -85, 9, -47, -34, -43, -31, -42, 13, -18, -58, -19, -52, + -42, -15, -34, -27, 14, -6, -75, 48, 9, -11, 5, -33, -29, 33, -11, 15, 29, -24, -49, -4, -11, -52, 42, + -33, -37, -7, -40, -3, 60, -23, -89, 39, -19, -44, -2, -2, -94, 12, -41, 15, -16, 10, -51, -23, -76, -2, + -11, 20, -73, 2, -53, -51, 34, -59, -83, 31, -8, 8, 37, 12, -93, 36, 16, -7, 51, -21, -50, 0, -30, + -45, 35, 12, -65, 18, -6, 17, -16, -9, -8, -9, -24, 20, 62, 1, -73, 12, -41, -15, 5, -12, -86, 10, + -52, -29, -48, -37, -99, 27, -55, -36, -17, 10, -76, 2, -52, -80, 10, -62, -55, -20, -52, -9, 23, -34, -65, + -10, -62, -55, 17, -31, -34, -33, -64, -1, -14, -3, -62, -7, -70, 0, -2, -10, -52, -19, -58, -29, -22, -12, + -62, -15, -71, -24, -28, -47, -36, -36, -84, -23, -42, -31, -60, -20, -83, 1, -51, -44, -77, 1, -52, -80, -36, + -52, -77, -6, -76, -5, -20, -52, -31, -34, -73, 6, -25, -52, -36, -24, -67, 2, 11, -47, -72, -7, -57, -62, + -22, -28, -61, -18, -79, -34, 4, -38, -31, -37, -64, -37, -22, -8, -66, -16, -83, 6, -46, -36, -77, -9, -84, + -40, -32, -16}; diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_4/output_shift_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/output_shift_data.h new file mode 100644 index 00000000..4ebe6c11 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/output_shift_data.h @@ -0,0 +1,6 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#pragma once +#include + +const int32_t grouped_conv_4_output_shift[6] = {-9, -9, -9, -8, -8, -9}; diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_4/test_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/test_data.h new file mode 100644 index 00000000..2c64febe --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/test_data.h @@ -0,0 +1,9 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#include "biases_data.h" +#include "config_data.h" +#include "input_data.h" +#include "output_mult_data.h" +#include "output_ref_data.h" +#include "output_shift_data.h" +#include "weights_data.h" diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_4/weights_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/weights_data.h new file mode 100644 index 00000000..87cb7b8d --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/weights_data.h @@ -0,0 +1,7 @@ +// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1). +// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62. +#pragma once +#include + +const int8_t grouped_conv_4_weights[24] = {127, 26, -101, 118, -83, 14, 127, 39, 6, 84, 127, 107, + 127, -60, 15, 31, 63, 127, 49, 116, 127, -37, 120, 5}; diff --git a/Tests/UnitTest/TestCases/test_arm_convolve_1_x_n_s8/test_arm_convolve_1_x_n_s8.c b/Tests/UnitTest/TestCases/test_arm_convolve_1_x_n_s8/test_arm_convolve_1_x_n_s8.c index 1d9f9824..afe45fa3 100644 --- a/Tests/UnitTest/TestCases/test_arm_convolve_1_x_n_s8/test_arm_convolve_1_x_n_s8.c +++ b/Tests/UnitTest/TestCases/test_arm_convolve_1_x_n_s8/test_arm_convolve_1_x_n_s8.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright 2023 Arm Limited and/or its affiliates + * SPDX-FileCopyrightText: Copyright 2023-2024 Arm Limited and/or its affiliates * * SPDX-License-Identifier: Apache-2.0 * @@ -53,6 +53,7 @@ void conv_1_x_n_1_arm_convolve_s8(void) input_dims.c = CONV_1_X_N_1_IN_CH; filter_dims.w = CONV_1_X_N_1_FILTER_X; filter_dims.h = CONV_1_X_N_1_FILTER_Y; + filter_dims.c = CONV_1_X_N_1_IN_CH; output_dims.w = CONV_1_X_N_1_OUTPUT_W; output_dims.h = CONV_1_X_N_1_OUTPUT_H; output_dims.c = CONV_1_X_N_1_OUT_CH; @@ -144,6 +145,7 @@ void conv_1_x_n_2_arm_convolve_s8(void) input_dims.c = CONV_1_X_N_2_IN_CH; filter_dims.w = CONV_1_X_N_2_FILTER_X; filter_dims.h = CONV_1_X_N_2_FILTER_Y; + filter_dims.c = CONV_1_X_N_2_IN_CH; output_dims.w = CONV_1_X_N_2_OUTPUT_W; output_dims.h = CONV_1_X_N_2_OUTPUT_H; output_dims.c = CONV_1_X_N_2_OUT_CH; @@ -233,6 +235,7 @@ void conv_1_x_n_3_arm_convolve_s8(void) input_dims.c = CONV_1_X_N_3_IN_CH; filter_dims.w = CONV_1_X_N_3_FILTER_X; filter_dims.h = CONV_1_X_N_3_FILTER_Y; + filter_dims.c = CONV_1_X_N_3_IN_CH; output_dims.w = CONV_1_X_N_3_OUTPUT_W; output_dims.h = CONV_1_X_N_3_OUTPUT_H; output_dims.c = CONV_1_X_N_3_OUT_CH; @@ -324,6 +327,7 @@ void conv_1_x_n_4_arm_convolve_s8(void) input_dims.c = CONV_1_X_N_4_IN_CH; filter_dims.w = CONV_1_X_N_4_FILTER_X; filter_dims.h = CONV_1_X_N_4_FILTER_Y; + filter_dims.c = CONV_1_X_N_4_IN_CH; output_dims.w = CONV_1_X_N_4_OUTPUT_W; output_dims.h = CONV_1_X_N_4_OUTPUT_H; output_dims.c = CONV_1_X_N_4_OUT_CH; @@ -415,6 +419,7 @@ void conv_1_x_n_5_arm_convolve_s8(void) input_dims.c = CONV_1_X_N_5_IN_CH; filter_dims.w = CONV_1_X_N_5_FILTER_X; filter_dims.h = CONV_1_X_N_5_FILTER_Y; + filter_dims.c = CONV_1_X_N_5_IN_CH; output_dims.w = CONV_1_X_N_5_OUTPUT_W; output_dims.h = CONV_1_X_N_5_OUTPUT_H; output_dims.c = CONV_1_X_N_5_OUT_CH; @@ -502,6 +507,7 @@ void conv_1_x_n_6_arm_convolve_s8(void) input_dims.c = CONV_1_X_N_3_IN_CH; filter_dims.w = CONV_1_X_N_3_FILTER_X; filter_dims.h = CONV_1_X_N_3_FILTER_Y; + filter_dims.c = CONV_1_X_N_3_IN_CH; output_dims.w = CONV_1_X_N_3_OUTPUT_W; output_dims.h = CONV_1_X_N_3_OUTPUT_H; output_dims.c = CONV_1_X_N_3_OUT_CH; diff --git a/Tests/UnitTest/TestCases/test_arm_convolve_s8/test_arm_convolve_s8.c b/Tests/UnitTest/TestCases/test_arm_convolve_s8/test_arm_convolve_s8.c index 3d37c7ef..a6065946 100644 --- a/Tests/UnitTest/TestCases/test_arm_convolve_s8/test_arm_convolve_s8.c +++ b/Tests/UnitTest/TestCases/test_arm_convolve_s8/test_arm_convolve_s8.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright 2010-2023 Arm Limited and/or its affiliates + * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates * * SPDX-License-Identifier: Apache-2.0 * @@ -61,6 +61,7 @@ void basic_arm_convolve_s8(void) input_dims.c = BASIC_IN_CH; filter_dims.w = BASIC_FILTER_X; filter_dims.h = BASIC_FILTER_Y; + filter_dims.c = BASIC_IN_CH; output_dims.w = BASIC_OUTPUT_W; output_dims.h = BASIC_OUTPUT_H; output_dims.c = BASIC_OUT_CH; @@ -155,6 +156,7 @@ void stride2pad1_arm_convolve_s8(void) input_dims.c = STRIDE2PAD1_IN_CH; filter_dims.w = STRIDE2PAD1_FILTER_X; filter_dims.h = STRIDE2PAD1_FILTER_Y; + filter_dims.c = STRIDE2PAD1_IN_CH; output_dims.w = STRIDE2PAD1_OUTPUT_W; output_dims.h = STRIDE2PAD1_OUTPUT_H; output_dims.c = STRIDE2PAD1_OUT_CH; @@ -248,6 +250,7 @@ void conv_2_arm_convolve_s8(void) input_dims.c = CONV_2_IN_CH; filter_dims.w = CONV_2_FILTER_X; filter_dims.h = CONV_2_FILTER_Y; + filter_dims.c = CONV_2_IN_CH; output_dims.w = CONV_2_OUTPUT_W; output_dims.h = CONV_2_OUTPUT_H; output_dims.c = CONV_2_OUT_CH; @@ -341,6 +344,7 @@ void conv_3_arm_convolve_s8(void) input_dims.c = CONV_3_IN_CH; filter_dims.w = CONV_3_FILTER_X; filter_dims.h = CONV_3_FILTER_Y; + filter_dims.c = CONV_3_IN_CH; output_dims.w = CONV_3_OUTPUT_W; output_dims.h = CONV_3_OUTPUT_H; output_dims.c = CONV_3_OUT_CH; @@ -434,6 +438,7 @@ void conv_4_arm_convolve_s8(void) input_dims.c = CONV_4_IN_CH; filter_dims.w = CONV_4_FILTER_X; filter_dims.h = CONV_4_FILTER_Y; + filter_dims.c = CONV_4_IN_CH; output_dims.w = CONV_4_OUTPUT_W; output_dims.h = CONV_4_OUTPUT_H; output_dims.c = CONV_4_OUT_CH; @@ -526,6 +531,7 @@ void conv_out_activation_arm_convolve_s8(void) input_dims.c = CONV_OUT_ACTIVATION_IN_CH; filter_dims.w = CONV_OUT_ACTIVATION_FILTER_X; filter_dims.h = CONV_OUT_ACTIVATION_FILTER_Y; + filter_dims.c = CONV_OUT_ACTIVATION_IN_CH; output_dims.w = CONV_OUT_ACTIVATION_OUTPUT_W; output_dims.h = CONV_OUT_ACTIVATION_OUTPUT_H; output_dims.c = CONV_OUT_ACTIVATION_OUT_CH; @@ -592,6 +598,7 @@ void conv_2x2_dilation_arm_convolve_s8(void) input_dims.c = CONV_2X2_DILATION_IN_CH; filter_dims.w = CONV_2X2_DILATION_FILTER_X; filter_dims.h = CONV_2X2_DILATION_FILTER_Y; + filter_dims.c = CONV_2X2_DILATION_IN_CH; output_dims.w = CONV_2X2_DILATION_OUTPUT_W; output_dims.h = CONV_2X2_DILATION_OUTPUT_H; output_dims.c = CONV_2X2_DILATION_OUT_CH; @@ -685,6 +692,7 @@ void conv_2x2_dilation_5x5_input_arm_convolve_s8(void) input_dims.c = CONV_2X2_DILATION_5X5_INPUT_IN_CH; filter_dims.w = CONV_2X2_DILATION_5X5_INPUT_FILTER_X; filter_dims.h = CONV_2X2_DILATION_5X5_INPUT_FILTER_Y; + filter_dims.c = CONV_2X2_DILATION_5X5_INPUT_IN_CH; output_dims.w = CONV_2X2_DILATION_5X5_INPUT_OUTPUT_W; output_dims.h = CONV_2X2_DILATION_5X5_INPUT_OUTPUT_H; output_dims.c = CONV_2X2_DILATION_5X5_INPUT_OUT_CH; @@ -776,6 +784,7 @@ void conv_3x3_dilation_5x5_input_arm_convolve_s8(void) input_dims.c = CONV_3X3_DILATION_5X5_INPUT_IN_CH; filter_dims.w = CONV_3X3_DILATION_5X5_INPUT_FILTER_X; filter_dims.h = CONV_3X3_DILATION_5X5_INPUT_FILTER_Y; + filter_dims.c = CONV_3X3_DILATION_5X5_INPUT_IN_CH; output_dims.w = CONV_3X3_DILATION_5X5_INPUT_OUTPUT_W; output_dims.h = CONV_3X3_DILATION_5X5_INPUT_OUTPUT_H; output_dims.c = CONV_3X3_DILATION_5X5_INPUT_OUT_CH; @@ -867,6 +876,7 @@ void conv_2x3_dilation_arm_convolve_s8(void) input_dims.c = CONV_2X3_DILATION_IN_CH; filter_dims.w = CONV_2X3_DILATION_FILTER_X; filter_dims.h = CONV_2X3_DILATION_FILTER_Y; + filter_dims.c = CONV_2X3_DILATION_IN_CH; output_dims.w = CONV_2X3_DILATION_OUTPUT_W; output_dims.h = CONV_2X3_DILATION_OUTPUT_H; output_dims.c = CONV_2X3_DILATION_OUT_CH; @@ -958,6 +968,7 @@ void conv_3x2_dilation_arm_convolve_s8(void) input_dims.c = CONV_3X2_DILATION_IN_CH; filter_dims.w = CONV_3X2_DILATION_FILTER_X; filter_dims.h = CONV_3X2_DILATION_FILTER_Y; + filter_dims.c = CONV_3X2_DILATION_IN_CH; output_dims.w = CONV_3X2_DILATION_OUTPUT_W; output_dims.h = CONV_3X2_DILATION_OUTPUT_H; output_dims.c = CONV_3X2_DILATION_OUT_CH; @@ -1049,6 +1060,7 @@ void conv_dilation_golden_arm_convolve_s8(void) input_dims.c = CONV_DILATION_GOLDEN_IN_CH; filter_dims.w = CONV_DILATION_GOLDEN_FILTER_X; filter_dims.h = CONV_DILATION_GOLDEN_FILTER_Y; + filter_dims.c = CONV_DILATION_GOLDEN_IN_CH; output_dims.w = CONV_DILATION_GOLDEN_OUTPUT_W; output_dims.h = CONV_DILATION_GOLDEN_OUTPUT_H; output_dims.c = CONV_DILATION_GOLDEN_OUT_CH; @@ -1140,6 +1152,7 @@ void conv_5_arm_convolve_s8(void) input_dims.c = CONV_5_IN_CH; filter_dims.w = CONV_5_FILTER_X; filter_dims.h = CONV_5_FILTER_Y; + filter_dims.c = CONV_5_IN_CH; output_dims.w = CONV_5_OUTPUT_W; output_dims.h = CONV_5_OUTPUT_H; output_dims.c = CONV_5_OUT_CH; @@ -1221,6 +1234,7 @@ void buffer_size_arm_convolve_s8(void) input_dims.c = CONV_5_IN_CH; filter_dims.w = CONV_5_FILTER_X; filter_dims.h = CONV_5_FILTER_Y; + filter_dims.c = CONV_5_IN_CH; output_dims.w = CONV_5_OUTPUT_W; output_dims.h = CONV_5_OUTPUT_H; output_dims.c = CONV_5_OUT_CH; @@ -1258,6 +1272,7 @@ void buffer_size_mve_arm_convolve_s8(void) input_dims.c = CONV_5_IN_CH; filter_dims.w = CONV_5_FILTER_X; filter_dims.h = CONV_5_FILTER_Y; + filter_dims.c = CONV_5_IN_CH; output_dims.w = CONV_5_OUTPUT_W; output_dims.h = CONV_5_OUTPUT_H; output_dims.c = CONV_5_OUT_CH; @@ -1297,6 +1312,7 @@ void buffer_size_dsp_arm_convolve_s8(void) input_dims.c = CONV_5_IN_CH; filter_dims.w = CONV_5_FILTER_X; filter_dims.h = CONV_5_FILTER_Y; + filter_dims.c = CONV_5_IN_CH; output_dims.w = CONV_5_OUTPUT_W; output_dims.h = CONV_5_OUTPUT_H; output_dims.c = CONV_5_OUT_CH; diff --git a/Tests/UnitTest/TestCases/test_arm_ds_cnn_l_s8/test_arm_ds_cnn_l_s8.c b/Tests/UnitTest/TestCases/test_arm_ds_cnn_l_s8/test_arm_ds_cnn_l_s8.c index 1e6eafd9..f17a7f0e 100644 --- a/Tests/UnitTest/TestCases/test_arm_ds_cnn_l_s8/test_arm_ds_cnn_l_s8.c +++ b/Tests/UnitTest/TestCases/test_arm_ds_cnn_l_s8/test_arm_ds_cnn_l_s8.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright 2022-2023 Arm Limited and/or its affiliates + * SPDX-FileCopyrightText: Copyright 2022-2024 Arm Limited and/or its affiliates * * SPDX-License-Identifier: Apache-2.0 * @@ -58,6 +58,7 @@ int ds_cnn_l_s8_get_buffer_size(void) filter_dims.h = CONV_2D_1_FILTER_H; filter_dims.w = CONV_2D_1_FILTER_W; + filter_dims.c = CONV_2D_1_IN_CH; output_dims.n = input_dims.n; output_dims.h = CONV_2D_1_OUTPUT_H; @@ -170,6 +171,7 @@ void ds_cnn_l_s8_inference(void) conv_filter_dims.h = CONV_2D_1_FILTER_H; conv_filter_dims.w = CONV_2D_1_FILTER_W; + conv_filter_dims.c = CONV_2D_1_IN_CH; in_out_dim_1.n = in_out_dim_0.n; in_out_dim_1.h = CONV_2D_1_OUTPUT_H; @@ -237,6 +239,7 @@ void ds_cnn_l_s8_inference(void) in_out_dim_1.c = in_out_dim_0.c; conv_filter_dims.h = CONV_2D_3_FILTER_H; conv_filter_dims.w = CONV_2D_3_FILTER_W; + conv_filter_dims.c = CONV_2D_3_IN_CH; conv_params.padding.h = CONV_2D_3_PAD_H; conv_params.padding.w = CONV_2D_3_PAD_W; diff --git a/Tests/UnitTest/TestCases/test_arm_ds_cnn_s_s8/test_arm_ds_cnn_s_s8.c b/Tests/UnitTest/TestCases/test_arm_ds_cnn_s_s8/test_arm_ds_cnn_s_s8.c index 5862df7b..87f24e92 100644 --- a/Tests/UnitTest/TestCases/test_arm_ds_cnn_s_s8/test_arm_ds_cnn_s_s8.c +++ b/Tests/UnitTest/TestCases/test_arm_ds_cnn_s_s8/test_arm_ds_cnn_s_s8.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright 2022-2023 Arm Limited and/or its affiliates + * SPDX-FileCopyrightText: Copyright 2022-2024 Arm Limited and/or its affiliates * * SPDX-License-Identifier: Apache-2.0 * @@ -60,6 +60,7 @@ int ds_cnn_s_s8_get_buffer_size(void) filter_dims.h = CONV_2D_1_FILTER_H; filter_dims.w = CONV_2D_1_FILTER_W; + filter_dims.c = CONV_2D_1_IN_CH; output_dims.n = input_dims.n; output_dims.h = CONV_2D_1_OUTPUT_H; @@ -151,6 +152,7 @@ void ds_cnn_s_s8_inference(void) conv_filter_dims.h = CONV_2D_1_FILTER_H; conv_filter_dims.w = CONV_2D_1_FILTER_W; + conv_filter_dims.c = CONV_2D_1_IN_CH; in_out_dim_1.n = in_out_dim_0.n; in_out_dim_1.h = CONV_2D_1_OUTPUT_H; @@ -221,6 +223,7 @@ void ds_cnn_s_s8_inference(void) in_out_dim_1.c = in_out_dim_0.c; conv_filter_dims.h = CONV_2D_3_FILTER_H; conv_filter_dims.w = CONV_2D_3_FILTER_W; + conv_filter_dims.c = CONV_2D_3_IN_CH; conv_params.padding.h = CONV_2D_3_PAD_H; conv_params.padding.w = CONV_2D_3_PAD_W; diff --git a/Tests/UnitTest/TestCases/test_arm_grouped_convolve_s8/CMakeLists.txt b/Tests/UnitTest/TestCases/test_arm_grouped_convolve_s8/CMakeLists.txt new file mode 100644 index 00000000..4363d841 --- /dev/null +++ b/Tests/UnitTest/TestCases/test_arm_grouped_convolve_s8/CMakeLists.txt @@ -0,0 +1,23 @@ +# +# Copyright (C) 2010-2024 Arm Limited or its affiliates. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the License); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an AS IS BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +add_cmsis_nn_unit_test_executable(test_arm_grouped_convolve_s8) + +target_sources(test_arm_grouped_convolve_s8 PRIVATE + Unity/unity_test_arm_grouped_convolve_s8.c + Unity/TestRunner/unity_test_arm_grouped_convolve_s8_runner.c) diff --git a/Tests/UnitTest/TestCases/test_arm_grouped_convolve_s8/Unity/unity_test_arm_grouped_convolve_s8.c b/Tests/UnitTest/TestCases/test_arm_grouped_convolve_s8/Unity/unity_test_arm_grouped_convolve_s8.c new file mode 100644 index 00000000..a915234a --- /dev/null +++ b/Tests/UnitTest/TestCases/test_arm_grouped_convolve_s8/Unity/unity_test_arm_grouped_convolve_s8.c @@ -0,0 +1,50 @@ +/* + * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include "../test_arm_grouped_convolve_s8.c" +#include "unity.h" + +#ifdef USING_FVP_CORSTONE_300 +extern void uart_init(void); +#endif + +/* This function is called from the autogenerated file. + * The name must be exactly like this + */ +void setUp(void) +{ /* This is run before EACH TEST */ +#ifdef USING_FVP_CORSTONE_300 + uart_init(); +#endif +} + +/* This function is called from the autogenerated file. + * The name must be exactly like this + */ +void tearDown(void) {} + +void test_basic_arm_grouped_convolve_1_s8(void) { grouped_conv_arm_grouped_convolve_1_s8(); } +void test_basic_arm_grouped_convolve_2_s8(void) { grouped_conv_arm_grouped_convolve_2_s8(); } +void test_basic_arm_grouped_convolve_3_s8(void) { grouped_conv_arm_grouped_convolve_3_s8(); } +void test_basic_arm_grouped_convolve_4_s8(void) { grouped_conv_arm_grouped_convolve_4_s8(); } diff --git a/Tests/UnitTest/TestCases/test_arm_grouped_convolve_s8/test_arm_grouped_convolve_s8.c b/Tests/UnitTest/TestCases/test_arm_grouped_convolve_s8/test_arm_grouped_convolve_s8.c new file mode 100644 index 00000000..1aaa6d10 --- /dev/null +++ b/Tests/UnitTest/TestCases/test_arm_grouped_convolve_s8/test_arm_grouped_convolve_s8.c @@ -0,0 +1,312 @@ +/* + * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include + +#include "../TestData/grouped_conv_1/test_data.h" +#include "../TestData/grouped_conv_2/test_data.h" +#include "../TestData/grouped_conv_3/test_data.h" +#include "../TestData/grouped_conv_4/test_data.h" +#include "../Utils/validate.h" + +void grouped_conv_arm_grouped_convolve_1_s8(void) +{ + const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS; + int8_t output[GROUPED_CONV_1_DST_SIZE] = {0}; + + cmsis_nn_context ctx; + cmsis_nn_conv_params conv_params; + cmsis_nn_per_channel_quant_params quant_params; + cmsis_nn_dims input_dims; + cmsis_nn_dims filter_dims; + cmsis_nn_dims bias_dims; + cmsis_nn_dims output_dims; + + const int32_t *bias_data = grouped_conv_1_biases; + const int8_t *kernel_data = grouped_conv_1_weights; + const int8_t *input_data = grouped_conv_1_input; + const int8_t *output_ref = grouped_conv_1_output_ref; + const int32_t output_ref_size = GROUPED_CONV_1_DST_SIZE; + + input_dims.n = GROUPED_CONV_1_INPUT_BATCHES; + input_dims.w = GROUPED_CONV_1_INPUT_W; + input_dims.h = GROUPED_CONV_1_INPUT_H; + input_dims.c = GROUPED_CONV_1_IN_CH; + filter_dims.w = GROUPED_CONV_1_FILTER_X; + filter_dims.h = GROUPED_CONV_1_FILTER_Y; + filter_dims.c = GROUPED_CONV_1_FILTER_CH; + output_dims.w = GROUPED_CONV_1_OUTPUT_W; + output_dims.h = GROUPED_CONV_1_OUTPUT_H; + output_dims.c = GROUPED_CONV_1_OUT_CH; + + conv_params.padding.w = GROUPED_CONV_1_PAD_X; + conv_params.padding.h = GROUPED_CONV_1_PAD_Y; + conv_params.stride.w = GROUPED_CONV_1_STRIDE_X; + conv_params.stride.h = GROUPED_CONV_1_STRIDE_Y; + conv_params.dilation.w = GROUPED_CONV_1_DILATION_X; + conv_params.dilation.h = GROUPED_CONV_1_DILATION_Y; + + conv_params.input_offset = GROUPED_CONV_1_INPUT_OFFSET; + conv_params.output_offset = GROUPED_CONV_1_OUTPUT_OFFSET; + conv_params.activation.min = GROUPED_CONV_1_OUT_ACTIVATION_MIN; + conv_params.activation.max = GROUPED_CONV_1_OUT_ACTIVATION_MAX; + quant_params.multiplier = (int32_t *)grouped_conv_1_output_mult; + quant_params.shift = (int32_t *)grouped_conv_1_output_shift; + + int32_t buf_size = arm_convolve_s8_get_buffer_size(&input_dims, &filter_dims); + ctx.buf = malloc(buf_size); + ctx.size = 0; + + arm_cmsis_nn_status result = arm_convolve_s8(&ctx, + &conv_params, + &quant_params, + &input_dims, + input_data, + &filter_dims, + kernel_data, + &bias_dims, + bias_data, + &output_dims, + output); + + if (ctx.buf) + { + // The caller is responsible to clear the scratch buffers for security reasons if applicable. + memset(ctx.buf, 0, buf_size); + free(ctx.buf); + } + TEST_ASSERT_EQUAL(expected, result); + TEST_ASSERT_TRUE(validate(output, output_ref, output_ref_size)); + memset(output, 0, sizeof(output)); +} + +void grouped_conv_arm_grouped_convolve_2_s8(void) +{ + const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS; + int8_t output[GROUPED_CONV_2_DST_SIZE] = {0}; + + cmsis_nn_context ctx; + cmsis_nn_conv_params conv_params; + cmsis_nn_per_channel_quant_params quant_params; + cmsis_nn_dims input_dims; + cmsis_nn_dims filter_dims; + cmsis_nn_dims bias_dims; + cmsis_nn_dims output_dims; + + const int32_t *bias_data = grouped_conv_2_biases; + const int8_t *kernel_data = grouped_conv_2_weights; + const int8_t *input_data = grouped_conv_2_input; + const int8_t *output_ref = grouped_conv_2_output_ref; + const int32_t output_ref_size = GROUPED_CONV_2_DST_SIZE; + + input_dims.n = GROUPED_CONV_2_INPUT_BATCHES; + input_dims.w = GROUPED_CONV_2_INPUT_W; + input_dims.h = GROUPED_CONV_2_INPUT_H; + input_dims.c = GROUPED_CONV_2_IN_CH; + filter_dims.w = GROUPED_CONV_2_FILTER_X; + filter_dims.h = GROUPED_CONV_2_FILTER_Y; + filter_dims.c = GROUPED_CONV_2_FILTER_CH; + output_dims.w = GROUPED_CONV_2_OUTPUT_W; + output_dims.h = GROUPED_CONV_2_OUTPUT_H; + output_dims.c = GROUPED_CONV_2_OUT_CH; + + conv_params.padding.w = GROUPED_CONV_2_PAD_X; + conv_params.padding.h = GROUPED_CONV_2_PAD_Y; + conv_params.stride.w = GROUPED_CONV_2_STRIDE_X; + conv_params.stride.h = GROUPED_CONV_2_STRIDE_Y; + conv_params.dilation.w = GROUPED_CONV_2_DILATION_X; + conv_params.dilation.h = GROUPED_CONV_2_DILATION_Y; + + conv_params.input_offset = GROUPED_CONV_2_INPUT_OFFSET; + conv_params.output_offset = GROUPED_CONV_2_OUTPUT_OFFSET; + conv_params.activation.min = GROUPED_CONV_2_OUT_ACTIVATION_MIN; + conv_params.activation.max = GROUPED_CONV_2_OUT_ACTIVATION_MAX; + quant_params.multiplier = (int32_t *)grouped_conv_2_output_mult; + quant_params.shift = (int32_t *)grouped_conv_2_output_shift; + + int32_t buf_size = arm_convolve_s8_get_buffer_size(&input_dims, &filter_dims); + ctx.buf = malloc(buf_size); + ctx.size = 0; + + arm_cmsis_nn_status result = arm_convolve_s8(&ctx, + &conv_params, + &quant_params, + &input_dims, + input_data, + &filter_dims, + kernel_data, + &bias_dims, + bias_data, + &output_dims, + output); + + if (ctx.buf) + { + // The caller is responsible to clear the scratch buffers for security reasons if applicable. + memset(ctx.buf, 0, buf_size); + free(ctx.buf); + } + TEST_ASSERT_EQUAL(expected, result); + TEST_ASSERT_TRUE(validate(output, output_ref, output_ref_size)); + memset(output, 0, sizeof(output)); +} + +void grouped_conv_arm_grouped_convolve_3_s8(void) +{ + const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS; + int8_t output[GROUPED_CONV_3_DST_SIZE] = {0}; + + cmsis_nn_context ctx; + cmsis_nn_conv_params conv_params; + cmsis_nn_per_channel_quant_params quant_params; + cmsis_nn_dims input_dims; + cmsis_nn_dims filter_dims; + cmsis_nn_dims bias_dims; + cmsis_nn_dims output_dims; + + const int32_t *bias_data = grouped_conv_3_biases; + const int8_t *kernel_data = grouped_conv_3_weights; + const int8_t *input_data = grouped_conv_3_input; + const int8_t *output_ref = grouped_conv_3_output_ref; + const int32_t output_ref_size = GROUPED_CONV_3_DST_SIZE; + + input_dims.n = GROUPED_CONV_3_INPUT_BATCHES; + input_dims.w = GROUPED_CONV_3_INPUT_W; + input_dims.h = GROUPED_CONV_3_INPUT_H; + input_dims.c = GROUPED_CONV_3_IN_CH; + filter_dims.w = GROUPED_CONV_3_FILTER_X; + filter_dims.h = GROUPED_CONV_3_FILTER_Y; + filter_dims.c = GROUPED_CONV_3_FILTER_CH; + output_dims.w = GROUPED_CONV_3_OUTPUT_W; + output_dims.h = GROUPED_CONV_3_OUTPUT_H; + output_dims.c = GROUPED_CONV_3_OUT_CH; + + conv_params.padding.w = GROUPED_CONV_3_PAD_X; + conv_params.padding.h = GROUPED_CONV_3_PAD_Y; + conv_params.stride.w = GROUPED_CONV_3_STRIDE_X; + conv_params.stride.h = GROUPED_CONV_3_STRIDE_Y; + conv_params.dilation.w = GROUPED_CONV_3_DILATION_X; + conv_params.dilation.h = GROUPED_CONV_3_DILATION_Y; + + conv_params.input_offset = GROUPED_CONV_3_INPUT_OFFSET; + conv_params.output_offset = GROUPED_CONV_3_OUTPUT_OFFSET; + conv_params.activation.min = GROUPED_CONV_3_OUT_ACTIVATION_MIN; + conv_params.activation.max = GROUPED_CONV_3_OUT_ACTIVATION_MAX; + quant_params.multiplier = (int32_t *)grouped_conv_3_output_mult; + quant_params.shift = (int32_t *)grouped_conv_3_output_shift; + + int32_t buf_size = arm_convolve_s8_get_buffer_size(&input_dims, &filter_dims); + ctx.buf = malloc(buf_size); + ctx.size = 0; + + arm_cmsis_nn_status result = arm_convolve_s8(&ctx, + &conv_params, + &quant_params, + &input_dims, + input_data, + &filter_dims, + kernel_data, + &bias_dims, + bias_data, + &output_dims, + output); + + if (ctx.buf) + { + // The caller is responsible to clear the scratch buffers for security reasons if applicable. + memset(ctx.buf, 0, buf_size); + free(ctx.buf); + } + TEST_ASSERT_EQUAL(expected, result); + TEST_ASSERT_TRUE(validate(output, output_ref, output_ref_size)); + memset(output, 0, sizeof(output)); +} + +void grouped_conv_arm_grouped_convolve_4_s8(void) +{ + const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS; + int8_t output[GROUPED_CONV_4_DST_SIZE] = {0}; + + cmsis_nn_context ctx; + cmsis_nn_conv_params conv_params; + cmsis_nn_per_channel_quant_params quant_params; + cmsis_nn_dims input_dims; + cmsis_nn_dims filter_dims; + cmsis_nn_dims bias_dims; + cmsis_nn_dims output_dims; + + const int32_t *bias_data = grouped_conv_4_biases; + const int8_t *kernel_data = grouped_conv_4_weights; + const int8_t *input_data = grouped_conv_4_input; + const int8_t *output_ref = grouped_conv_4_output_ref; + const int32_t output_ref_size = GROUPED_CONV_4_DST_SIZE; + + input_dims.n = GROUPED_CONV_4_INPUT_BATCHES; + input_dims.w = GROUPED_CONV_4_INPUT_W; + input_dims.h = GROUPED_CONV_4_INPUT_H; + input_dims.c = GROUPED_CONV_4_IN_CH; + filter_dims.w = GROUPED_CONV_4_FILTER_X; + filter_dims.h = GROUPED_CONV_4_FILTER_Y; + filter_dims.c = GROUPED_CONV_4_FILTER_CH; + output_dims.w = GROUPED_CONV_4_OUTPUT_W; + output_dims.h = GROUPED_CONV_4_OUTPUT_H; + output_dims.c = GROUPED_CONV_4_OUT_CH; + + conv_params.padding.w = GROUPED_CONV_4_PAD_X; + conv_params.padding.h = GROUPED_CONV_4_PAD_Y; + conv_params.stride.w = GROUPED_CONV_4_STRIDE_X; + conv_params.stride.h = GROUPED_CONV_4_STRIDE_Y; + conv_params.dilation.w = GROUPED_CONV_4_DILATION_X; + conv_params.dilation.h = GROUPED_CONV_4_DILATION_Y; + + conv_params.input_offset = GROUPED_CONV_4_INPUT_OFFSET; + conv_params.output_offset = GROUPED_CONV_4_OUTPUT_OFFSET; + conv_params.activation.min = GROUPED_CONV_4_OUT_ACTIVATION_MIN; + conv_params.activation.max = GROUPED_CONV_4_OUT_ACTIVATION_MAX; + quant_params.multiplier = (int32_t *)grouped_conv_4_output_mult; + quant_params.shift = (int32_t *)grouped_conv_4_output_shift; + + int32_t buf_size = arm_convolve_s8_get_buffer_size(&input_dims, &filter_dims); + ctx.buf = malloc(buf_size); + ctx.size = 0; + + arm_cmsis_nn_status result = arm_convolve_s8(&ctx, + &conv_params, + &quant_params, + &input_dims, + input_data, + &filter_dims, + kernel_data, + &bias_dims, + bias_data, + &output_dims, + output); + + if (ctx.buf) + { + // The caller is responsible to clear the scratch buffers for security reasons if applicable. + memset(ctx.buf, 0, buf_size); + free(ctx.buf); + } + TEST_ASSERT_EQUAL(expected, result); + TEST_ASSERT_TRUE(validate(output, output_ref, output_ref_size)); + memset(output, 0, sizeof(output)); +} \ No newline at end of file diff --git a/Tests/UnitTest/conv_settings.py b/Tests/UnitTest/conv_settings.py index 9b41a90d..f16c688b 100644 --- a/Tests/UnitTest/conv_settings.py +++ b/Tests/UnitTest/conv_settings.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright 2010-2023 Arm Limited and/or its affiliates +# SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates # # SPDX-License-Identifier: Apache-2.0 # @@ -38,6 +38,7 @@ def __init__(self, w_y=3, stride_x=2, stride_y=2, + groups=1, pad=True, randmin=TestSettings.INT8_MIN, randmax=TestSettings.INT8_MAX, @@ -84,11 +85,23 @@ def __init__(self, int4_weights=int4_weights) self.scaling_factors = [] + self.groups = groups if self.test_type == 'depthwise_conv': self.channel_multiplier = self.output_ch // self.input_ch if self.output_ch % self.input_ch != 0: raise RuntimeError("out channel ({}) is not multiple of in channel ({})".format(out_ch, in_ch)) + if groups != 1: + raise RuntimeError("ERROR: Groups cannot be used for depthwise convolution") + + self.filter_ch = in_ch // groups + if in_ch % groups != 0: + print(in_ch) + print(groups) + raise RuntimeError("ERROR: Number of input channels must be an even multiple of groups") + if out_ch % groups != 0: + raise RuntimeError("ERROR: Number of output channels must be an even multiple of groups") + else: self.channel_multiplier = 0 @@ -113,6 +126,8 @@ def write_c_config_header(self) -> None: f.write("#define {}_OUTPUT_OFFSET {}\n".format(prefix, self.output_zero_point)) f.write("#define {}_DILATION_X {}\n".format(prefix, self.dilation_x)) f.write("#define {}_DILATION_Y {}\n".format(prefix, self.dilation_y)) + if self.groups != 1: + f.write("#define {}_FILTER_CH {}\n".format(prefix, self.filter_ch)) if self.test_type == 'transpose_conv': f.write("#define {}_PAD_X_WITH_OFFSET {}\n".format(prefix, self.pad_x_with_offset)) f.write("#define {}_PAD_Y_WITH_OFFSET {}\n".format(prefix, self.pad_y_with_offset)) @@ -279,7 +294,7 @@ def generate_data(self, input_data=None, weights=None, biases=None) -> None: if self.test_type == 'transpose_conv': weight_shape = [self.filter_y, self.filter_x, out_channel, self.input_ch] else: - weight_shape = [self.filter_y, self.filter_x, self.input_ch, out_channel] + weight_shape = [self.filter_y, self.filter_x, self.filter_ch, out_channel] if weights is not None: weights = tf.reshape(weights, weight_shape) @@ -301,7 +316,8 @@ def generate_data(self, input_data=None, weights=None, biases=None) -> None: strides=(self.stride_y, self.stride_x), padding=self.padding, input_shape=input_shape[1:], - dilation_rate=(self.dilation_y, self.dilation_x)) + dilation_rate=(self.dilation_y, self.dilation_x), + groups=self.groups) model.add(conv_layer) conv_layer.set_weights([weights, biases]) elif self.test_type == 'depthwise_conv': diff --git a/Tests/UnitTest/generate_test_data.py b/Tests/UnitTest/generate_test_data.py index 9f8ba15a..b82c53d7 100755 --- a/Tests/UnitTest/generate_test_data.py +++ b/Tests/UnitTest/generate_test_data.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -# SPDX-FileCopyrightText: Copyright 2010-2023 Arm Limited and/or its affiliates +# SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates # # SPDX-License-Identifier: Apache-2.0 # @@ -642,7 +642,86 @@ def load_testdata_sets(regenerate_input, regenerate_weights, regenerate_biases, int16xint8=True, dilation_x=2, interpreter=interpreter) - dataset = 'basic_int4' + dataset = 'grouped_conv_1' + testdata_sets[dataset] = ConvSettings(dataset, + type_of_test, + regenerate_weights, + regenerate_input, + regenerate_biases, + schema_file, + in_ch=2, + out_ch=6, + groups=2, + x_in=5, + y_in=5, + w_x=2, + w_y=2, + generate_bias=False, + stride_x=1, + stride_y=1, + pad=False, + batches=2, + interpreter=interpreter) + dataset = 'grouped_conv_2' + testdata_sets[dataset] = ConvSettings(dataset, + type_of_test, + regenerate_weights, + regenerate_input, + regenerate_biases, + schema_file, + in_ch=4, + out_ch=2, + groups=2, + x_in=7, + y_in=3, + w_x=1, + w_y=2, + generate_bias=True, + stride_x=1, + stride_y=1, + pad=False, + interpreter=interpreter) + dataset = 'grouped_conv_3' + testdata_sets[dataset] = ConvSettings(dataset, + type_of_test, + regenerate_weights, + regenerate_input, + regenerate_biases, + schema_file, + in_ch=2, + out_ch=4, + groups=2, + x_in=3, + y_in=2, + w_x=3, + w_y=2, + generate_bias=True, + stride_x=2, + stride_y=2, + pad=True, + batches=2, + interpreter=interpreter) + dataset = 'grouped_conv_4' + testdata_sets[dataset] = ConvSettings(dataset, + type_of_test, + regenerate_weights, + regenerate_input, + regenerate_biases, + schema_file, + in_ch=3, + out_ch=6, + groups=3, + x_in=9, + y_in=9, + w_x=2, + w_y=2, + generate_bias=True, + stride_x=1, + stride_y=1, + dilation_x=3, + dilation_y=3, + pad=True, + interpreter=interpreter) dataset = 'basic_int4' testdata_sets[dataset] = ConvSettings(dataset, type_of_test, regenerate_weights,