diff --git a/Include/arm_nnfunctions.h b/Include/arm_nnfunctions.h
index d8062f80..ade3417c 100644
--- a/Include/arm_nnfunctions.h
+++ b/Include/arm_nnfunctions.h
@@ -21,8 +21,8 @@
  * Title:        arm_nnfunctions.h
  * Description:  Public header file for CMSIS NN Library
  *
- * $Date:        9 January 2024
- * $Revision:    V.12.5.1
+ * $Date:        11 January 2024
+ * $Revision:    V.12.6.0
  *
  * Target :  Arm(R) M-Profile Architecture
  * -------------------------------------------------------------------- */
@@ -365,8 +365,10 @@ arm_cmsis_nn_status arm_convolve_s4(const cmsis_nn_context *ctx,
  *                                It contains the multiplier and shift values to be applied to each output channel
  * @param[in]      input_dims     Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
  * @param[in]      input_data     Input (activation) data pointer. Data type: int8
- * @param[in]      filter_dims    Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the
- *                                spatial filter dimensions
+ * @param[in]      filter_dims    Filter tensor dimensions. Format: [C_OUT, HK, WK, CK] where HK, WK and CK are the
+ *                                spatial filter dimensions. CK != C_IN is used for grouped convolution, in which
+ *                                case the required conditions are C_IN = N * CK and C_OUT = N * M for N groups of
+ *                                size M.
  * @param[in]      filter_data    Filter data pointer. Data type: int8
  * @param[in]      bias_dims      Bias tensor dimensions. Format: [C_OUT]
  * @param[in]      bias_data      Optional bias data pointer. Data type: int32
diff --git a/Include/arm_nnsupportfunctions.h b/Include/arm_nnsupportfunctions.h
index 9c0ebda5..20cbfd38 100644
--- a/Include/arm_nnsupportfunctions.h
+++ b/Include/arm_nnsupportfunctions.h
@@ -21,8 +21,8 @@
  * Title:        arm_nnsupportfunctions.h
  * Description:  Public header file of support functions for CMSIS NN Library
  *
- * $Date:        9 January 2024
- * $Revision:    V.17.6.3
+ * $Date:        11 January 2024
+ * $Revision:    V.17.7.0
  *
  * Target :  Arm(R) M-Profile Architecture
  * -------------------------------------------------------------------- */
@@ -423,6 +423,7 @@ arm_cmsis_nn_status arm_nn_mat_mult_nt_t_s4(const int8_t *lhs,
  * @param[in]  dst_offset         Offset to be applied the output result
  * @param[in]  activation_min     Minimum value to clamp down the output. Range : int8
  * @param[in]  activation_max     Maximum value to clamp up the output. Range : int8
+ * @param[in]  row_address_offset Address offset between rows in output. NOTE: Only used for MVEI extension.
  * @param[in]  lhs_cols_offset    Column offset between subsequent lhs_rows
  *
  * @return     The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
@@ -441,6 +442,7 @@ arm_cmsis_nn_status arm_nn_mat_mult_nt_t_s8(const int8_t *lhs,
                                             const int32_t dst_offset,
                                             const int32_t activation_min,
                                             const int32_t activation_max,
+                                            const int32_t row_address_offset,
                                             const int32_t lhs_cols_offset);
 
 /**
@@ -1006,6 +1008,47 @@ int8_t *arm_nn_mat_mult_kernel_s8_s16(const int8_t *input_a,
                                       const int32_t *const output_bias,
                                       int8_t *out_0);
 
+/**
+ * @brief Matrix-multiplication function for convolution with per-channel requantization, supporting an address offset
+ * between rows.
+ * @param[in]       input_a            pointer to operand A
+ * @param[in]       input_b            pointer to operand B, always consists of 2 vectors.
+ * @param[in]       output_ch          number of rows of A
+ * @param[in]       out_shift          pointer to per output channel requantization shift parameter.
+ * @param[in]       out_mult           pointer to per output channel requantization multiplier parameter.
+ * @param[in]       out_offset         output tensor offset.
+ * @param[in]       activation_min     minimum value to clamp the output to. Range : int8
+ * @param[in]       activation_max     maximum value to clamp the output to. Range : int8
+ * @param[in]       num_col_a          number of columns of A
+ * @param[in]       aligned_num_col_a  number of columns of A aligned by 4
+ * @param[in]       output_bias        per output channel bias. Range : int32
+ * @param[in]       row_address_offset address offset between rows in the output
+ * @param[in,out]   out_0              pointer to output
+ * @return     The function returns one of the two
+ *              1. The incremented output pointer for a successful operation or
+ *              2. NULL if implementation is not available.
+ *
+ * @details   This function does the matrix multiplication of weight matrix for all output channels
+ *            with 2 columns from im2col and produces two elements/output_channel. The outputs are
+ *            clamped in the range provided by activation min and max.
+ *
+ *            This function is slighly less performant than arm_nn_mat_mult_kernel_s8_s16, but allows support for
+ * grouped convolution. Supported framework: TensorFlow Lite micro.
+ */
+int8_t *arm_nn_mat_mult_kernel_row_offset_s8_s16(const int8_t *input_a,
+                                                 const int16_t *input_b,
+                                                 const uint16_t output_ch,
+                                                 const int32_t *out_shift,
+                                                 const int32_t *out_mult,
+                                                 const int32_t out_offset,
+                                                 const int16_t activation_min,
+                                                 const int16_t activation_max,
+                                                 const int32_t num_col_a,
+                                                 const int32_t aligned_num_col_a,
+                                                 const int32_t *const output_bias,
+                                                 const int32_t row_address_offset,
+                                                 int8_t *out_0);
+
 /**
  * @brief Common softmax function for s8 input and s8 or s16 output
  * @param[in]  input          Pointer to the input tensor
diff --git a/Source/ConvolutionFunctions/arm_convolve_1_x_n_s8.c b/Source/ConvolutionFunctions/arm_convolve_1_x_n_s8.c
index 9e2aeb05..a255fe5e 100644
--- a/Source/ConvolutionFunctions/arm_convolve_1_x_n_s8.c
+++ b/Source/ConvolutionFunctions/arm_convolve_1_x_n_s8.c
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright 2010-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_convolve_1_x_n_s8.c
  * Description:  s8 version of 1xN convolution using symmetric quantization.
  *
- * $Date:        8 March 2023
- * $Revision:    V.3.4.0
+ * $Date:        04 January 2024
+ * $Revision:    V.3.5.0
  *
  * Target :  Arm(R) M-Profile Architecture
  *
@@ -30,7 +30,6 @@
 
 #include "arm_nnfunctions.h"
 #include "arm_nnsupportfunctions.h"
-
 /**
  *  @ingroup Public
  */
@@ -149,6 +148,7 @@ arm_cmsis_nn_status arm_convolve_1_x_n_s8(const cmsis_nn_context *ctx,
                                 conv_params->output_offset,
                                 conv_params->activation.min,
                                 conv_params->activation.max,
+                                rhs_rows,
                                 lhs_offset);
 
         output_data += lhs_rows * rhs_rows;
diff --git a/Source/ConvolutionFunctions/arm_convolve_1x1_s8.c b/Source/ConvolutionFunctions/arm_convolve_1x1_s8.c
index 7f3030eb..7cebffc0 100644
--- a/Source/ConvolutionFunctions/arm_convolve_1x1_s8.c
+++ b/Source/ConvolutionFunctions/arm_convolve_1x1_s8.c
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright 2022-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ * SPDX-FileCopyrightText: Copyright 2022-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_convolve_1x1_s8.c
  * Description:  Generic s8 version of 1x1 convolution
  *
- * $Date:        20 January 2023
- * $Revision:    V.1.0.1
+ * $Date:        04 January 2024
+ * $Revision:    V.1.1.0
  *
  * Target :  Arm(R) M-Profile Architecture
  *
@@ -96,6 +96,7 @@ arm_cmsis_nn_status arm_convolve_1x1_s8(const cmsis_nn_context *ctx,
                                                                  conv_params->output_offset,
                                                                  conv_params->activation.min,
                                                                  conv_params->activation.max,
+                                                                 rhs_rows,
                                                                  rhs_cols * stride_w);
             if (result != ARM_CMSIS_NN_SUCCESS)
             {
diff --git a/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c b/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c
index e832d0b3..8a400f2b 100644
--- a/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c
+++ b/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright 2010-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_convolve_1x1_s8_fast.c
  * Description:  Fast s8 version of 1x1 convolution (non-square shape)
  *
- * $Date:        30 October 2023
- * $Revision:    V.3.4.0
+ * $Date:        04 January 2024
+ * $Revision:    V.3.5.0
  *
  * Target :  Arm(R) M-Profile Architecture
  *
@@ -86,6 +86,7 @@ arm_cmsis_nn_status arm_convolve_1x1_s8_fast(const cmsis_nn_context *ctx,
                             conv_params->output_offset,
                             conv_params->activation.min,
                             conv_params->activation.max,
+                            rhs_rows,
                             rhs_cols);
 
     /* Return to application */
diff --git a/Source/ConvolutionFunctions/arm_convolve_s8.c b/Source/ConvolutionFunctions/arm_convolve_s8.c
index 0b14eaf4..8c4ac67a 100644
--- a/Source/ConvolutionFunctions/arm_convolve_s8.c
+++ b/Source/ConvolutionFunctions/arm_convolve_s8.c
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright 2010-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_convolve_s8.c
  * Description:  s8 version of convolution using symmetric quantization.
  *
- * $Date:        08 June 2023
- * $Revision:    V.3.5.0
+ * $Date:        04 January 2024
+ * $Revision:    V.3.6.0
  *
  * Target :  Arm(R) M-Profile Architecture
  *
@@ -30,7 +30,6 @@
 
 #include "arm_nnfunctions.h"
 #include "arm_nnsupportfunctions.h"
-
 /**
  *  @ingroup Public
  */
@@ -73,6 +72,7 @@ arm_cmsis_nn_status arm_convolve_s8(const cmsis_nn_context *ctx,
     const uint16_t input_ch = input_dims->c;
     const uint16_t kernel_x = filter_dims->w;
     const uint16_t kernel_y = filter_dims->h;
+    const uint16_t kernel_ch = filter_dims->c;
     const uint16_t output_x = output_dims->w;
     const uint16_t output_y = output_dims->h;
     const uint16_t output_ch = output_dims->c;
@@ -86,20 +86,26 @@ arm_cmsis_nn_status arm_convolve_s8(const cmsis_nn_context *ctx,
     const int32_t out_offset = conv_params->output_offset;
     const int32_t out_activation_min = conv_params->activation.min;
     const int32_t out_activation_max = conv_params->activation.max;
-    const int32_t rhs_cols = kernel_x * kernel_y * input_ch;
     const int32_t input_offset = conv_params->input_offset;
 
+    const int32_t groups = input_ch / kernel_ch;
+    const int32_t rhs_cols = kernel_x * kernel_y * kernel_ch;
+    const int32_t output_ch_per_group = output_ch / groups;
+
     int32_t *output_mult = quant_params->multiplier;
     int32_t *output_shift = quant_params->shift;
 
+    if (input_ch % groups != 0 || output_ch % groups != 0)
+    {
+        return ARM_CMSIS_NN_ARG_ERROR;
+    }
+
     int i_batch;
     for (i_batch = 0; i_batch < input_batches; i_batch++)
     {
-
 #if defined(ARM_MATH_MVEI)
         /* Generate up to four columns from the input tensor a GEMM computation */
         int8_t *im2col_buf = (int8_t *)buffer_a;
-        const int32_t rhs_rows = output_dims->c;
 #else
         const int32_t remainder = rhs_cols % 4;
         const int32_t aligned_rhs_cols = remainder != 0 ? rhs_cols + 4 - remainder : rhs_cols;
@@ -108,175 +114,221 @@ arm_cmsis_nn_status arm_convolve_s8(const cmsis_nn_context *ctx,
         int8_t *im2col_buf = (int8_t *)buffer_a + aligned_rhs_cols * 2;
         int16_t *im2col_buf_start_s16 = buffer_a;
 #endif
-        int8_t *out = output_data;
         int32_t lhs_rows = 0;
 
+        const int8_t *filter_data_ptr = &filter_data[0];
+        const int32_t *bias_data_ptr = &bias_data[0];
+        const int32_t *output_mult_ptr = &output_mult[0];
+        const int32_t *output_shift_ptr = &output_shift[0];
+
         /* This part implements the im2col function */
-        for (int i_out_y = 0; i_out_y < output_y; i_out_y++)
+        for (int32_t i_group = 0; i_group < groups; i_group++)
         {
-            for (int i_out_x = 0; i_out_x < output_x; i_out_x++)
+            int8_t *out = output_data + i_group * output_ch_per_group;
+            for (int i_out_y = 0; i_out_y < output_y; i_out_y++)
             {
-                const int32_t base_idx_x = stride_x * i_out_x - pad_x;
-                const int32_t base_idx_y = stride_y * i_out_y - pad_y;
-
-                for (int32_t i_ker_y = 0; i_ker_y < kernel_y; i_ker_y++)
+                for (int i_out_x = 0; i_out_x < output_x; i_out_x++)
                 {
-                    for (int32_t i_ker_x = 0; i_ker_x < kernel_x; i_ker_x++)
-                    {
-                        const int32_t k_y = base_idx_y + dilation_y * i_ker_y;
-                        const int32_t k_x = base_idx_x + dilation_x * i_ker_x;
+                    const int32_t base_idx_x = stride_x * i_out_x - pad_x;
+                    const int32_t base_idx_y = stride_y * i_out_y - pad_y;
 
-                        if (k_y < 0 || k_y >= input_y || k_x < 0 || k_x >= input_x)
-                        {
-                            arm_memset_s8(im2col_buf, (int8_t)-input_offset, sizeof(int8_t) * input_ch);
-                        }
-                        else
+                    for (int32_t i_ker_y = 0; i_ker_y < kernel_y; i_ker_y++)
+                    {
+                        for (int32_t i_ker_x = 0; i_ker_x < kernel_x; i_ker_x++)
                         {
-                            arm_memcpy_s8(im2col_buf, input_data + (k_y * input_x + k_x) * input_ch, input_ch);
+                            const int32_t k_y = base_idx_y + dilation_y * i_ker_y;
+                            const int32_t k_x = base_idx_x + dilation_x * i_ker_x;
+
+                            if (k_y < 0 || k_y >= input_y || k_x < 0 || k_x >= input_x)
+                            {
+                                arm_memset_s8(im2col_buf, (int8_t)-input_offset, sizeof(int8_t) * kernel_ch);
+                            }
+                            else
+                            {
+                                arm_memcpy_s8(im2col_buf,
+                                              input_data + (k_y * input_x + k_x) * input_ch + i_group * kernel_ch,
+                                              sizeof(int8_t) * kernel_ch);
+                            }
+                            im2col_buf += kernel_ch;
                         }
-                        im2col_buf += input_ch;
                     }
-                }
-                lhs_rows++;
+                    lhs_rows++;
 
 #if defined(ARM_MATH_MVEI)
-                /* Computation is filed for every 4 columns */
-                if (lhs_rows == 4)
-                {
-                    arm_nn_mat_mult_nt_t_s8((int8_t *)buffer_a,
-                                            filter_data,
-                                            bias_data,
-                                            out,
-                                            output_mult,
-                                            output_shift,
-                                            lhs_rows,
-                                            rhs_rows,
-                                            rhs_cols,
-                                            input_offset,
-                                            out_offset,
-                                            out_activation_min,
-                                            out_activation_max,
-                                            rhs_cols);
-                    out += lhs_rows * rhs_rows;
-
-                    lhs_rows = 0;
-                    im2col_buf = (int8_t *)buffer_a;
-                }
+
+                    /* Computation is filed for every 4 columns */
+                    if (lhs_rows == 4)
+                    {
+                        arm_nn_mat_mult_nt_t_s8((int8_t *)buffer_a,
+                                                filter_data_ptr,
+                                                bias_data_ptr,
+                                                out,
+                                                output_mult_ptr,
+                                                output_shift_ptr,
+                                                lhs_rows,
+                                                output_ch_per_group,
+                                                rhs_cols,
+                                                input_offset,
+                                                out_offset,
+                                                out_activation_min,
+                                                out_activation_max,
+                                                output_ch,
+                                                rhs_cols);
+
+                        out += lhs_rows * output_ch;
+
+                        lhs_rows = 0;
+                        im2col_buf = (int8_t *)buffer_a;
+                    }
 #else
     #if defined(ARM_MATH_DSP)
-                /* Copy one column with input offset and no ordering */
-                arm_s8_to_s16_unordered_with_offset(
-                    im2col_buf - rhs_cols, im2col_buf_start_s16, rhs_cols, (int16_t)input_offset);
+                    /* Copy one column with input offset and no ordering */
+                    arm_s8_to_s16_unordered_with_offset(
+                        im2col_buf - rhs_cols, im2col_buf_start_s16, rhs_cols, (int16_t)input_offset);
     #else
-                arm_q7_to_q15_with_offset(im2col_buf - rhs_cols, im2col_buf_start_s16, rhs_cols, (int16_t)input_offset);
+
+                    arm_q7_to_q15_with_offset(
+                        im2col_buf - rhs_cols, im2col_buf_start_s16, rhs_cols, (int16_t)input_offset);
+
     #endif
-                im2col_buf_start_s16 += aligned_rhs_cols;
+                    im2col_buf_start_s16 += aligned_rhs_cols;
 
-                if (lhs_rows == 2)
-                {
-                    out = arm_nn_mat_mult_kernel_s8_s16(filter_data,
-                                                        buffer_a,
-                                                        output_ch,
-                                                        output_shift,
-                                                        output_mult,
-                                                        out_offset,
-                                                        out_activation_min,
-                                                        out_activation_max,
-                                                        rhs_cols,
-                                                        aligned_rhs_cols,
-                                                        bias_data,
-                                                        out);
-
-                    /* counter reset */
-                    im2col_buf_start_s16 = buffer_a;
-                    im2col_buf = (int8_t *)buffer_a + aligned_rhs_cols * 2;
-                    lhs_rows = 0;
-                }
+                    if (lhs_rows == 2)
+                    {
+                        if (groups > 1)
+                        {
+                            out = arm_nn_mat_mult_kernel_row_offset_s8_s16(filter_data_ptr,
+                                                                           buffer_a,
+                                                                           output_ch_per_group,
+                                                                           output_shift_ptr,
+                                                                           output_mult_ptr,
+                                                                           out_offset,
+                                                                           out_activation_min,
+                                                                           out_activation_max,
+                                                                           rhs_cols,
+                                                                           aligned_rhs_cols,
+                                                                           bias_data_ptr,
+                                                                           output_ch,
+                                                                           out);
+                        }
+                        else
+                        {
+                            out = arm_nn_mat_mult_kernel_s8_s16(filter_data_ptr,
+                                                                buffer_a,
+                                                                output_ch_per_group,
+                                                                output_shift_ptr,
+                                                                output_mult_ptr,
+                                                                out_offset,
+                                                                out_activation_min,
+                                                                out_activation_max,
+                                                                rhs_cols,
+                                                                aligned_rhs_cols,
+                                                                bias_data_ptr,
+                                                                out);
+                        }
+
+                        /* counter reset */
+                        im2col_buf_start_s16 = buffer_a;
+                        im2col_buf = (int8_t *)buffer_a + aligned_rhs_cols * 2;
+                        lhs_rows = 0;
+                    }
 #endif
+                }
             }
 
             if (out == NULL)
             {
                 return ARM_CMSIS_NN_NO_IMPL_ERROR;
             }
-        }
 
-        /* Handle left over columns */
-        if (lhs_rows != 0)
-        {
+            /* Handle left over columns */
+            if (lhs_rows != 0)
+            {
 #if defined(ARM_MATH_MVEI)
-            arm_nn_mat_mult_nt_t_s8((int8_t *)buffer_a,
-                                    filter_data,
-                                    bias_data,
-                                    out,
-                                    output_mult,
-                                    output_shift,
-                                    lhs_rows,
-                                    rhs_rows,
-                                    rhs_cols,
-                                    input_offset,
-                                    out_offset,
-                                    out_activation_min,
-                                    out_activation_max,
-                                    rhs_cols);
-            out += lhs_rows * rhs_rows;
-            lhs_rows = 0;
-            im2col_buf = (int8_t *)buffer_a;
+                arm_nn_mat_mult_nt_t_s8((int8_t *)buffer_a,
+                                        filter_data_ptr,
+                                        bias_data_ptr,
+                                        out,
+                                        output_mult_ptr,
+                                        output_shift_ptr,
+                                        lhs_rows,
+                                        output_ch_per_group,
+                                        rhs_cols,
+                                        input_offset,
+                                        out_offset,
+                                        out_activation_min,
+                                        out_activation_max,
+                                        output_ch,
+                                        rhs_cols);
+
+                out += lhs_rows * output_ch;
+                lhs_rows = 0;
+                im2col_buf = (int8_t *)buffer_a;
 #else // #if defined(ARM_MATH_MVEI)
 
-            const int8_t *ker_a = filter_data;
-            int i;
+                const int8_t *ker_a = filter_data_ptr;
+                int i;
 
-            for (i = 0; i < output_ch; i++)
-            {
-                /* Load the accumulator with bias first */
-                int32_t sum = 0;
-                if (bias_data)
+                for (i = 0; i < output_ch_per_group; i++)
                 {
-                    sum = bias_data[i];
-                }
+                    /* Load the accumulator with bias first */
+                    int32_t sum = 0;
+                    if (bias_data_ptr)
+                    {
+                        sum = bias_data_ptr[i];
+                    }
 
-                const int16_t *ip_as_col = buffer_a;
+                    const int16_t *ip_as_col = buffer_a;
 
     #if defined(ARM_MATH_DSP)
-                /* 4 multiply and accumulates are done in one loop. */
-                uint16_t col_count = rhs_cols / 4;
-                while (col_count)
-                {
-                    int32_t ker_a1, ker_a2;
-                    int32_t ip_b1, ip_b2;
+                    /* 4 multiply and accumulates are done in one loop. */
+                    uint16_t col_count = rhs_cols / 4;
+                    while (col_count)
+                    {
+                        int32_t ker_a1, ker_a2;
+                        int32_t ip_b1, ip_b2;
 
-                    ker_a = read_and_pad_reordered(ker_a, &ker_a1, &ker_a2);
+                        ker_a = read_and_pad_reordered(ker_a, &ker_a1, &ker_a2);
 
-                    ip_b1 = arm_nn_read_q15x2_ia(&ip_as_col);
-                    sum = SMLAD(ker_a1, ip_b1, sum);
-                    ip_b2 = arm_nn_read_q15x2_ia(&ip_as_col);
-                    sum = SMLAD(ker_a2, ip_b2, sum);
+                        ip_b1 = arm_nn_read_q15x2_ia(&ip_as_col);
+                        sum = SMLAD(ker_a1, ip_b1, sum);
+                        ip_b2 = arm_nn_read_q15x2_ia(&ip_as_col);
+                        sum = SMLAD(ker_a2, ip_b2, sum);
 
-                    col_count--;
-                }
-                /* Handle left over mac */
-                col_count = rhs_cols & 0x3;
+                        col_count--;
+                    }
+                    /* Handle left over mac */
+                    col_count = rhs_cols & 0x3;
     #else
-                uint16_t col_count = rhs_cols;
+                    uint16_t col_count = rhs_cols;
+
     #endif
-                while (col_count)
-                {
-                    int8_t ker_a1 = *ker_a++;
-                    int16_t ip_b1 = *ip_as_col++;
-                    sum += ker_a1 * ip_b1;
-                    col_count--;
+                    while (col_count)
+                    {
+                        int8_t ker_a1 = *ker_a++;
+                        int16_t ip_b1 = *ip_as_col++;
+
+                        sum += ker_a1 * ip_b1;
+                        col_count--;
+                    }
+
+                    sum = arm_nn_requantize(sum, output_mult_ptr[i], output_shift_ptr[i]);
+                    sum += out_offset;
+                    sum = MAX(sum, out_activation_min);
+                    sum = MIN(sum, out_activation_max);
+                    *out++ = (int8_t)sum;
                 }
 
-                sum = arm_nn_requantize(sum, output_mult[i], output_shift[i]);
-                sum += out_offset;
-                sum = MAX(sum, out_activation_min);
-                sum = MIN(sum, out_activation_max);
-                *out++ = (int8_t)sum;
-            }
+                im2col_buf_start_s16 = buffer_a;
+                im2col_buf = (int8_t *)buffer_a + aligned_rhs_cols * 2;
+                lhs_rows = 0;
 #endif // #if defined(ARM_MATH_MVEI)
+            }
+            filter_data_ptr += output_ch_per_group * rhs_cols;
+            bias_data_ptr += output_ch_per_group;
+            output_mult_ptr += output_ch_per_group;
+            output_shift_ptr += output_ch_per_group;
         }
-
         /* Advance to the next batch */
         input_data += (input_x * input_y * input_ch);
         output_data += (output_x * output_y * output_ch);
diff --git a/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c b/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c
index 07f29327..98f90585 100644
--- a/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c
+++ b/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright 2010-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -22,8 +22,8 @@
  * Description:  s8 convolution layer wrapper function with the main purpose to call the optimal kernel available in
  * cmsis-nn to perform the convolution.
  *
- * $Date:        8 March 2023
- * $Revision:    V.2.4.0
+ * $Date:        04 January 2024
+ * $Revision:    V.2.5.0
  *
  * Target :  Arm(R) M-Profile Architecture
  *
@@ -60,7 +60,8 @@ arm_cmsis_nn_status arm_convolve_wrapper_s8(const cmsis_nn_context *ctx,
                                             int8_t *output_data)
 {
     if ((conv_params->padding.w == 0) && (conv_params->padding.h == 0) && (filter_dims->w == 1) &&
-        (filter_dims->h == 1) && (conv_params->dilation.w == 1 && conv_params->dilation.h == 1))
+        (filter_dims->h == 1) && (conv_params->dilation.w == 1 && conv_params->dilation.h == 1) &&
+        (input_dims->c == filter_dims->c))
     {
         if ((conv_params->stride.w == 1) && (conv_params->stride.h == 1))
         {
@@ -92,7 +93,7 @@ arm_cmsis_nn_status arm_convolve_wrapper_s8(const cmsis_nn_context *ctx,
         }
     }
     else if ((input_dims->h == 1) && conv_params->dilation.w == 1 && (filter_dims->h == 1) &&
-             ((conv_params->stride.w * input_dims->c) % 4 == 0))
+             ((conv_params->stride.w * input_dims->c) % 4 == 0) && (input_dims->c == filter_dims->c))
     {
         return arm_convolve_1_x_n_s8(ctx,
                                      conv_params,
diff --git a/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_row_offset_s8_s16.c b/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_row_offset_s8_s16.c
new file mode 100644
index 00000000..7a400e12
--- /dev/null
+++ b/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_row_offset_s8_s16.c
@@ -0,0 +1,253 @@
+/*
+ * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_nn_mat_mult_kernel_row_offset_s8_s16.c
+ * Description:  Matrix-multiplication function for grouped convolution
+ *
+ * $Date:        04 January 2024
+ * $Revision:    V.1.0.0
+ *
+ * Target :  Arm(R) M-Profile Architecture
+ * -------------------------------------------------------------------- */
+
+#include "arm_nnfunctions.h"
+#include "arm_nnsupportfunctions.h"
+/*
+ * Matrix-multiplication function for convolution with per-channel requantization, supporting an address offset between
+ * rows.
+ *
+ * Refer header file for details.
+ *
+ */
+
+int8_t *arm_nn_mat_mult_kernel_row_offset_s8_s16(const int8_t *input_a,
+                                                 const int16_t *input_b,
+                                                 const uint16_t output_ch,
+                                                 const int32_t *out_shift,
+                                                 const int32_t *out_mult,
+                                                 const int32_t out_offset,
+                                                 const int16_t activation_min,
+                                                 const int16_t activation_max,
+                                                 const int32_t num_col_a,
+                                                 const int32_t aligned_num_col_a,
+                                                 const int32_t *const output_bias,
+                                                 const int32_t row_address_offset,
+                                                 int8_t *out_0)
+{
+
+#if !defined(ARM_MATH_MVEI)
+    /* set up the second output pointers */
+
+    int8_t *out_1 = out_0 + row_address_offset;
+    const int32_t *bias = output_bias;
+
+    uint16_t row_count = output_ch / 2;
+    const int8_t *ip_a0 = input_a;
+    /* this loop over rows in A */
+    while (row_count)
+    {
+        /* setup pointers for B */
+        const int16_t *ip_b0 = input_b;
+        const int16_t *ip_b1 = ip_b0 + aligned_num_col_a;
+
+        /* align the second pointer for A */
+        const int8_t *ip_a1 = ip_a0 + num_col_a;
+
+        int32_t ch_0_out_0 = 0;
+        int32_t ch_0_out_1 = 0;
+        int32_t ch_1_out_0 = 0;
+        int32_t ch_1_out_1 = 0;
+        /* Init accumulator with bias for channel N and N + 1 */
+        if (bias)
+        {
+            ch_0_out_0 = *bias;
+            ch_0_out_1 = *bias++;
+            ch_1_out_0 = *bias;
+            ch_1_out_1 = *bias++;
+        }
+
+    #if defined(ARM_MATH_DSP)
+        int32_t col_count = num_col_a / 4;
+        /* accumulate over the vector */
+        while (col_count)
+        {
+            int32_t a01, a02, a11, a12;
+            int32_t b0 = arm_nn_read_q15x2_ia(&ip_b0);
+            int32_t b1 = arm_nn_read_q15x2_ia(&ip_b1);
+
+            ip_a0 = read_and_pad_reordered(ip_a0, &a01, &a02);
+            ip_a1 = read_and_pad_reordered(ip_a1, &a11, &a12);
+
+            ch_0_out_0 = SMLAD(a01, b0, ch_0_out_0);
+            ch_0_out_1 = SMLAD(a01, b1, ch_0_out_1);
+            ch_1_out_0 = SMLAD(a11, b0, ch_1_out_0);
+            ch_1_out_1 = SMLAD(a11, b1, ch_1_out_1);
+
+            b0 = arm_nn_read_q15x2_ia(&ip_b0);
+            b1 = arm_nn_read_q15x2_ia(&ip_b1);
+
+            ch_0_out_0 = SMLAD(a02, b0, ch_0_out_0);
+            ch_0_out_1 = SMLAD(a02, b1, ch_0_out_1);
+            ch_1_out_0 = SMLAD(a12, b0, ch_1_out_0);
+            ch_1_out_1 = SMLAD(a12, b1, ch_1_out_1);
+
+            col_count--;
+        } /* while over col_count */
+
+        col_count = num_col_a & 0x3;
+
+    #else
+        int32_t col_count = num_col_a;
+    #endif
+        while (col_count)
+        {
+            int8_t a0 = *ip_a0++;
+            int16_t b0 = *ip_b0++;
+            int8_t a1 = *ip_a1++;
+            int16_t b1 = *ip_b1++;
+
+            ch_0_out_0 += a0 * b0;
+            ch_0_out_1 += a0 * b1;
+            ch_1_out_0 += a1 * b0;
+            ch_1_out_1 += a1 * b1;
+            col_count--;
+        } /* while over col_count */
+
+        ch_0_out_0 = arm_nn_requantize(ch_0_out_0, *out_mult, *out_shift);
+        ch_0_out_0 += out_offset;
+        ch_0_out_0 = MAX(ch_0_out_0, activation_min);
+        ch_0_out_0 = MIN(ch_0_out_0, activation_max);
+        *out_0++ = (int8_t)ch_0_out_0;
+
+        ch_0_out_1 = arm_nn_requantize(ch_0_out_1, *out_mult, *out_shift);
+        ch_0_out_1 += out_offset;
+        ch_0_out_1 = MAX(ch_0_out_1, activation_min);
+        ch_0_out_1 = MIN(ch_0_out_1, activation_max);
+        *out_1++ = (int8_t)ch_0_out_1;
+        out_mult++;
+        out_shift++;
+
+        ch_1_out_0 = arm_nn_requantize(ch_1_out_0, *out_mult, *out_shift);
+        ch_1_out_0 += out_offset;
+        ch_1_out_0 = MAX(ch_1_out_0, activation_min);
+        ch_1_out_0 = MIN(ch_1_out_0, activation_max);
+        *out_0++ = (int8_t)ch_1_out_0;
+
+        ch_1_out_1 = arm_nn_requantize(ch_1_out_1, *out_mult, *out_shift);
+        ch_1_out_1 += out_offset;
+        ch_1_out_1 = MAX(ch_1_out_1, activation_min);
+        ch_1_out_1 = MIN(ch_1_out_1, activation_max);
+        *out_1++ = (int8_t)ch_1_out_1;
+        out_mult++;
+        out_shift++;
+
+        /* skip row */
+        ip_a0 += num_col_a;
+        row_count--;
+    }
+
+    /* compute the last odd numbered row if any */
+    if (output_ch & 0x1)
+    {
+        /* setup pointers for B */
+        const int16_t *ip_b0 = input_b;
+        const int16_t *ip_b1 = ip_b0 + aligned_num_col_a;
+
+        int32_t ch_0_out_0 = 0;
+        int32_t ch_0_out_1 = 0;
+
+        /* load the bias */
+        if (bias)
+        {
+            ch_0_out_0 = *bias;
+            ch_0_out_1 = *bias++;
+        }
+
+    #if defined(ARM_MATH_DSP)
+        int32_t col_count = num_col_a >> 2;
+        while (col_count)
+        {
+            int32_t a01, a02;
+            int32_t b0 = arm_nn_read_q15x2_ia(&ip_b0);
+            int32_t b1 = arm_nn_read_q15x2_ia(&ip_b1);
+
+            ip_a0 = read_and_pad_reordered(ip_a0, &a01, &a02);
+
+            ch_0_out_0 = SMLAD(a01, b0, ch_0_out_0);
+            ch_0_out_1 = SMLAD(a01, b1, ch_0_out_1);
+
+            b0 = arm_nn_read_q15x2_ia(&ip_b0);
+            b1 = arm_nn_read_q15x2_ia(&ip_b1);
+            ch_0_out_0 = SMLAD(a02, b0, ch_0_out_0);
+            ch_0_out_1 = SMLAD(a02, b1, ch_0_out_1);
+
+            col_count--;
+        }
+        col_count = num_col_a & 0x3;
+
+    #else
+        int32_t col_count = num_col_a;
+    #endif
+        while (col_count)
+        {
+            int8_t a0 = *ip_a0++;
+            int16_t b0 = *ip_b0++;
+            int16_t b1 = *ip_b1++;
+
+            ch_0_out_0 += a0 * b0;
+            ch_0_out_1 += a0 * b1;
+            col_count--;
+        }
+
+        ch_0_out_0 = arm_nn_requantize(ch_0_out_0, *out_mult, *out_shift);
+        ch_0_out_0 += out_offset;
+        ch_0_out_0 = MAX(ch_0_out_0, activation_min);
+        ch_0_out_0 = MIN(ch_0_out_0, activation_max);
+        *out_0++ = (int8_t)ch_0_out_0;
+
+        ch_0_out_1 = arm_nn_requantize(ch_0_out_1, *out_mult, *out_shift);
+        ch_0_out_1 += out_offset;
+        ch_0_out_1 = MAX(ch_0_out_1, activation_min);
+        ch_0_out_1 = MIN(ch_0_out_1, activation_max);
+        *out_1++ = (int8_t)ch_0_out_1;
+        out_mult++;
+        out_shift++;
+    }
+
+    out_0 += 2 * row_address_offset - output_ch;
+
+    /* return the new output pointer with offset */
+    return out_0;
+#else
+    (void)input_a;
+    (void)input_b;
+    (void)output_ch;
+    (void)out_shift;
+    (void)out_mult;
+    (void)out_offset;
+    (void)activation_min;
+    (void)activation_max;
+    (void)aligned_num_col_a, (void)num_col_a;
+    (void)output_bias;
+    (void)row_address_offset;
+    (void)out_0;
+    return NULL;
+#endif
+}
diff --git a/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c b/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c
index 13c28c59..8b93ca49 100644
--- a/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c
+++ b/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ * SPDX-FileCopyrightText: Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_nn_mat_mult_s8_nt_t_s8
  * Description:  Matrix multiplication support function with the right-hand-side (rhs) matrix transposed
  *
- * $Date:        22 March 2023
- * $Revision:    V.2.1.2
+ * $Date:        04 January 2024
+ * $Revision:    V.3.0.0
  *
  * Target :  Arm(R) M-Profile Architecture
  *
@@ -58,6 +58,7 @@ arm_cmsis_nn_status arm_nn_mat_mult_nt_t_s8(const int8_t *lhs,
                                             const int32_t dst_offset,
                                             const int32_t activation_min,
                                             const int32_t activation_max,
+                                            const int32_t row_address_offset,
                                             const int32_t lhs_cols_offset)
 {
 
@@ -140,12 +141,13 @@ arm_cmsis_nn_status arm_nn_mat_mult_nt_t_s8(const int8_t *lhs,
             res = vmaxq_s32(res, vdupq_n_s32(activation_min));
             res = vminq_s32(res, vdupq_n_s32(activation_max));
 
-            const uint32x4_t scatter_offset = {0, (uint32_t)rhs_rows, (uint32_t)rhs_rows * 2, (uint32_t)rhs_rows * 3};
+            const uint32x4_t scatter_offset = {
+                0, (uint32_t)row_address_offset, (uint32_t)row_address_offset * 2, (uint32_t)row_address_offset * 3};
             vstrbq_scatter_offset_s32(dst, scatter_offset, res);
             dst++;
         }
         lhs += 4 * lhs_cols_offset;
-        dst += (3 * rhs_rows);
+        dst += 4 * row_address_offset - rhs_rows;
     }
 
     for (; i_items < lhs_rows; i_items++)
@@ -217,9 +219,11 @@ arm_cmsis_nn_status arm_nn_mat_mult_nt_t_s8(const int8_t *lhs,
             acc_n0 = MIN(acc_n0, activation_max);
             *dst++ = (int8_t)acc_n0;
         }
+        dst += row_address_offset - rhs_rows;
     }
 
 #elif defined(ARM_MATH_DSP)
+    (void)row_address_offset;
     const int32_t rhs_off0 = rhs_cols - 4;
     const int32_t lhs_off0 = lhs_cols_offset - 4;
 
@@ -618,6 +622,7 @@ arm_cmsis_nn_status arm_nn_mat_mult_nt_t_s8(const int8_t *lhs,
         }
     }
 #else
+    (void)row_address_offset;
     for (int32_t rhs_rows_idx = 0; rhs_rows_idx <= (rhs_rows - 2); rhs_rows_idx += 2)
     {
         const int8_t *lhs_ptr = &lhs[0];
diff --git a/Tests/UnitTest/CMakeLists.txt b/Tests/UnitTest/CMakeLists.txt
index 3f24afe1..c6c0ef03 100644
--- a/Tests/UnitTest/CMakeLists.txt
+++ b/Tests/UnitTest/CMakeLists.txt
@@ -1,5 +1,5 @@
 #
-# SPDX-FileCopyrightText: Copyright 2019-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# SPDX-FileCopyrightText: Copyright 2019-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 #
 # SPDX-License-Identifier: Apache-2.0
 #
@@ -96,6 +96,7 @@ add_subdirectory(TestCases/test_arm_elementwise_mul_s8)
 add_subdirectory(TestCases/test_arm_fully_connected_s16)
 add_subdirectory(TestCases/test_arm_fully_connected_s8)
 add_subdirectory(TestCases/test_arm_fully_connected_s4)
+add_subdirectory(TestCases/test_arm_grouped_convolve_s8)
 add_subdirectory(TestCases/test_arm_lstm_unidirectional_s16_s8)
 add_subdirectory(TestCases/test_arm_max_pool_s16)
 add_subdirectory(TestCases/test_arm_max_pool_s8)
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv/biases_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv/biases_data.h
new file mode 100644
index 00000000..35f54d09
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv/biases_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include <stdint.h>
+
+const int32_t grouped_conv_biases[1] = {-22573};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv/config_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv/config_data.h
new file mode 100644
index 00000000..054820ba
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv/config_data.h
@@ -0,0 +1,24 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#define GROUPED_CONV_OUT_CH 1
+#define GROUPED_CONV_IN_CH 1
+#define GROUPED_CONV_INPUT_W 7
+#define GROUPED_CONV_INPUT_H 7
+#define GROUPED_CONV_DST_SIZE 16
+#define GROUPED_CONV_INPUT_SIZE 49
+#define GROUPED_CONV_OUT_ACTIVATION_MIN -128
+#define GROUPED_CONV_OUT_ACTIVATION_MAX 127
+#define GROUPED_CONV_INPUT_BATCHES 1
+#define GROUPED_CONV_FILTER_X 3
+#define GROUPED_CONV_FILTER_Y 3
+#define GROUPED_CONV_STRIDE_X 2
+#define GROUPED_CONV_STRIDE_Y 2
+#define GROUPED_CONV_PAD_X 1
+#define GROUPED_CONV_PAD_Y 1
+#define GROUPED_CONV_OUTPUT_W 4
+#define GROUPED_CONV_OUTPUT_H 4
+#define GROUPED_CONV_INPUT_OFFSET 128
+#define GROUPED_CONV_OUTPUT_OFFSET 127
+#define GROUPED_CONV_DILATION_X 1
+#define GROUPED_CONV_DILATION_Y 1
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv/input_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv/input_data.h
new file mode 100644
index 00000000..e684c4ed
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv/input_data.h
@@ -0,0 +1,9 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include <stdint.h>
+
+const int8_t grouped_conv_input[49] = {-97, 6,   19,  -32,  76,   79,  -74, 85,  -66, -7,  -29, 107,  118,
+                                       65,  30,  45,  -92,  -123, -42, -25, -49, 74,  -81, -2,  -106, 3,
+                                       69,  -55, -13, -107, -59,  -56, -30, 52,  124, 91,  90,  19,   -71,
+                                       -47, 11,  113, 38,   77,   -54, 24,  57,  -90, 115};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv/output_mult_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv/output_mult_data.h
new file mode 100644
index 00000000..b897ee2a
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv/output_mult_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include <stdint.h>
+
+const int32_t grouped_conv_output_mult[1] = {1972783891};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv/output_ref_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv/output_ref_data.h
new file mode 100644
index 00000000..ff634014
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv/output_ref_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include <stdint.h>
+
+const int8_t grouped_conv_output_ref[16] = {112, 23, 7, 40, 48, 29, -15, 5, 28, 36, -12, 10, -7, -3, -16, -11};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv/output_shift_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv/output_shift_data.h
new file mode 100644
index 00000000..190c42cb
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv/output_shift_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include <stdint.h>
+
+const int32_t grouped_conv_output_shift[1] = {-9};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv/test_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv/test_data.h
new file mode 100644
index 00000000..2c64febe
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv/test_data.h
@@ -0,0 +1,9 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#include "biases_data.h"
+#include "config_data.h"
+#include "input_data.h"
+#include "output_mult_data.h"
+#include "output_ref_data.h"
+#include "output_shift_data.h"
+#include "weights_data.h"
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv/weights_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv/weights_data.h
new file mode 100644
index 00000000..b00fa385
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv/weights_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include <stdint.h>
+
+const int8_t grouped_conv_weights[9] = {-13, -73, -73, -113, -127, 4, -75, 117, -116};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_1/biases_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/biases_data.h
new file mode 100644
index 00000000..09b09752
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/biases_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include <stdint.h>
+
+const int32_t grouped_conv_1_biases[6] = {0, 0, 0, 0, 0, 0};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_1/config_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/config_data.h
new file mode 100644
index 00000000..8c0bb9c9
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/config_data.h
@@ -0,0 +1,25 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#define GROUPED_CONV_1_OUT_CH 6
+#define GROUPED_CONV_1_IN_CH 2
+#define GROUPED_CONV_1_INPUT_W 5
+#define GROUPED_CONV_1_INPUT_H 5
+#define GROUPED_CONV_1_DST_SIZE 192
+#define GROUPED_CONV_1_INPUT_SIZE 50
+#define GROUPED_CONV_1_OUT_ACTIVATION_MIN -128
+#define GROUPED_CONV_1_OUT_ACTIVATION_MAX 127
+#define GROUPED_CONV_1_INPUT_BATCHES 2
+#define GROUPED_CONV_1_FILTER_X 2
+#define GROUPED_CONV_1_FILTER_Y 2
+#define GROUPED_CONV_1_STRIDE_X 1
+#define GROUPED_CONV_1_STRIDE_Y 1
+#define GROUPED_CONV_1_PAD_X 0
+#define GROUPED_CONV_1_PAD_Y 0
+#define GROUPED_CONV_1_OUTPUT_W 4
+#define GROUPED_CONV_1_OUTPUT_H 4
+#define GROUPED_CONV_1_INPUT_OFFSET 128
+#define GROUPED_CONV_1_OUTPUT_OFFSET -56
+#define GROUPED_CONV_1_DILATION_X 1
+#define GROUPED_CONV_1_DILATION_Y 1
+#define GROUPED_CONV_1_FILTER_CH 1
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_1/input_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/input_data.h
new file mode 100644
index 00000000..cd7ba022
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/input_data.h
@@ -0,0 +1,11 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include <stdint.h>
+
+const int8_t grouped_conv_1_input[100] = {
+    34,   -101, 76,   -112, -64,  -54,  24,   76,   -103, 12,   81,   -64, 124, 27,  -88, 18,   -98,  -40,  -104, 24,
+    -105, -114, -108, -39,  46,   124,  -28,  -17,  -117, -91,  89,   -25, -5,  23,  -11, 86,   -45,  -125, 72,   -35,
+    48,   -126, 25,   -6,   -37,  -114, 95,   -48,  -107, 115,  -109, 123, 80,  81,  -51, -118, -113, -85,  -3,   -75,
+    21,   -65,  -120, -2,   -110, -109, -98,  -99,  -83,  -116, -50,  76,  33,  33,  92,  61,   -14,  24,   -103, 84,
+    115,  79,   15,   -62,  -50,  -102, -108, -128, 125,  63,   110,  100, -71, -47, 24,  33,   69,   -27,  -81,  40};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_1/output_mult_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/output_mult_data.h
new file mode 100644
index 00000000..dff13443
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/output_mult_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include <stdint.h>
+
+const int32_t grouped_conv_1_output_mult[6] = {1438488183, 1655654472, 1389835298, 1454332884, 1200344887, 2074611644};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_1/output_ref_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/output_ref_data.h
new file mode 100644
index 00000000..8dd8212c
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/output_ref_data.h
@@ -0,0 +1,15 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include <stdint.h>
+
+const int8_t grouped_conv_1_output_ref[192] = {
+    -113, -39, 63,   -87, -14, -19, -38, -1,  56,   -33, 9,   -18, -106, -62, -54,  11,  43,  -19, -76,  -43, -20, -76,
+    51,   -9,  -128, -64, -57, -46, 14,  -19, -109, -46, 28,  -97, 61,   16,  -35,  -28, 26,  -11, 38,   -29, -42, -38,
+    -21,  -2,  19,   -35, -21, -21, 45,  -47, 8,    -14, -96, -56, -17,  -17, 81,   16,  -86, -36, 12,   -2,  39,  -52,
+    -69,  -44, 36,   -97, -12, -31, -90, -27, 52,   -71, 23,  -12, -78,  -35, 14,   15,  43,  -35, -95,  -50, 33,  -121,
+    -6,   -39, -76,  -30, -3,  -82, 20,  9,   -89,  -49, -42, -64, 70,   -8,  -104, -48, -29, -60, -3,   -56, -68, -50,
+    -33,  -48, -35,  -45, -96, -65, -56, -43, -29,  -49, -71, -39, 33,   -15, 39,   -11, -43, -38, 49,   -85, 19,  -15,
+    -22,  -22, 41,   -38, 2,   -22, -47, -38, -17,  -78, 5,   -9,  -69,  -28, 40,   -1,  55,  -31, -118, -44, -6,  -17,
+    34,   -33, -100, -38, -11, -39, 19,  -42, -97,  -59, 28,  -86, 59,   11,  -74,  -11, 48,  -28, 34,   -38, -98, -51,
+    9,    -88, -2,   -18, -51, -33, 52,  -41, -17,  -37, -96, -47, -26,  -19, 36,   1};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_1/output_shift_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/output_shift_data.h
new file mode 100644
index 00000000..c5689f9b
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/output_shift_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include <stdint.h>
+
+const int32_t grouped_conv_1_output_shift[6] = {-8, -9, -8, -8, -8, -9};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_1/test_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/test_data.h
new file mode 100644
index 00000000..2c64febe
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/test_data.h
@@ -0,0 +1,9 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#include "biases_data.h"
+#include "config_data.h"
+#include "input_data.h"
+#include "output_mult_data.h"
+#include "output_ref_data.h"
+#include "output_shift_data.h"
+#include "weights_data.h"
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_1/weights_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/weights_data.h
new file mode 100644
index 00000000..af93bc4d
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_1/weights_data.h
@@ -0,0 +1,7 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include <stdint.h>
+
+const int8_t grouped_conv_1_weights[24] = {-42, -127, 99,  -38,  50, -72, 127, -34, 65, -75, 127, 100,
+                                           -77, 116,  127, -126, 73, 127, 51,  77,  -4, 55,  -16, 127};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_2/biases_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/biases_data.h
new file mode 100644
index 00000000..3ec466aa
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/biases_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include <stdint.h>
+
+const int32_t grouped_conv_2_biases[2] = {-22972, -40991};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_2/config_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/config_data.h
new file mode 100644
index 00000000..ca166e5b
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/config_data.h
@@ -0,0 +1,25 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#define GROUPED_CONV_2_OUT_CH 2
+#define GROUPED_CONV_2_IN_CH 4
+#define GROUPED_CONV_2_INPUT_W 7
+#define GROUPED_CONV_2_INPUT_H 3
+#define GROUPED_CONV_2_DST_SIZE 28
+#define GROUPED_CONV_2_INPUT_SIZE 84
+#define GROUPED_CONV_2_OUT_ACTIVATION_MIN -128
+#define GROUPED_CONV_2_OUT_ACTIVATION_MAX 127
+#define GROUPED_CONV_2_INPUT_BATCHES 1
+#define GROUPED_CONV_2_FILTER_X 1
+#define GROUPED_CONV_2_FILTER_Y 2
+#define GROUPED_CONV_2_STRIDE_X 1
+#define GROUPED_CONV_2_STRIDE_Y 1
+#define GROUPED_CONV_2_PAD_X 0
+#define GROUPED_CONV_2_PAD_Y 0
+#define GROUPED_CONV_2_OUTPUT_W 7
+#define GROUPED_CONV_2_OUTPUT_H 2
+#define GROUPED_CONV_2_INPUT_OFFSET 128
+#define GROUPED_CONV_2_OUTPUT_OFFSET 127
+#define GROUPED_CONV_2_DILATION_X 1
+#define GROUPED_CONV_2_DILATION_Y 1
+#define GROUPED_CONV_2_FILTER_CH 2
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_2/input_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/input_data.h
new file mode 100644
index 00000000..e6292c97
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/input_data.h
@@ -0,0 +1,10 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include <stdint.h>
+
+const int8_t grouped_conv_2_input[84] = {
+    84,  -53,  -94, -95, -64, -85, -94, 3,   -21, -109, 123, -90,  -108, -34,  97,   -106, 104,  9,  -106, -14,  -110,
+    116, 95,   8,   116, 77,  -70, -51, -67, -84, 3,    -10, -69,  -92,  23,   -116, 104,  86,   37, -97,  -108, -108,
+    -90, 8,    -56, 50,  -48, -23, 102, -76, -73, 0,    5,   0,    -8,   -100, 112,  31,   41,   99, 116,  96,   -46,
+    93,  -110, -96, -70, -61, 120, 62,  2,   -70, -48,  56,  -121, 117,  -24,  -21,  61,   -104, 41, -77,  -97,  99};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_2/output_mult_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/output_mult_data.h
new file mode 100644
index 00000000..4ba76479
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/output_mult_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include <stdint.h>
+
+const int32_t grouped_conv_2_output_mult[2] = {1191882470, 1352029555};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_2/output_ref_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/output_ref_data.h
new file mode 100644
index 00000000..a6626227
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/output_ref_data.h
@@ -0,0 +1,7 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include <stdint.h>
+
+const int8_t grouped_conv_2_output_ref[28] = {127, -128, 3, -82,  63, -27, -93, -61, 1,    -123, 36,  -26, 103, -86,
+                                              70,  -128, 9, -128, 59, -51, 15,  -99, -128, -128, 127, -95, 127, -128};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_2/output_shift_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/output_shift_data.h
new file mode 100644
index 00000000..a1ce4b56
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/output_shift_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include <stdint.h>
+
+const int32_t grouped_conv_2_output_shift[2] = {-6, -7};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_2/test_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/test_data.h
new file mode 100644
index 00000000..2c64febe
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/test_data.h
@@ -0,0 +1,9 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#include "biases_data.h"
+#include "config_data.h"
+#include "input_data.h"
+#include "output_mult_data.h"
+#include "output_ref_data.h"
+#include "output_shift_data.h"
+#include "weights_data.h"
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_2/weights_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/weights_data.h
new file mode 100644
index 00000000..0b6ef315
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_2/weights_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include <stdint.h>
+
+const int8_t grouped_conv_2_weights[8] = {120, -52, 127, -120, 96, 67, -79, -127};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_3/biases_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/biases_data.h
new file mode 100644
index 00000000..3d1a303c
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/biases_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include <stdint.h>
+
+const int32_t grouped_conv_3_biases[4] = {-16494, 1675, -6184, -27704};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_3/config_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/config_data.h
new file mode 100644
index 00000000..0ce23d83
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/config_data.h
@@ -0,0 +1,25 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#define GROUPED_CONV_3_OUT_CH 4
+#define GROUPED_CONV_3_IN_CH 2
+#define GROUPED_CONV_3_INPUT_W 3
+#define GROUPED_CONV_3_INPUT_H 2
+#define GROUPED_CONV_3_DST_SIZE 16
+#define GROUPED_CONV_3_INPUT_SIZE 12
+#define GROUPED_CONV_3_OUT_ACTIVATION_MIN -128
+#define GROUPED_CONV_3_OUT_ACTIVATION_MAX 127
+#define GROUPED_CONV_3_INPUT_BATCHES 2
+#define GROUPED_CONV_3_FILTER_X 3
+#define GROUPED_CONV_3_FILTER_Y 2
+#define GROUPED_CONV_3_STRIDE_X 2
+#define GROUPED_CONV_3_STRIDE_Y 2
+#define GROUPED_CONV_3_PAD_X 1
+#define GROUPED_CONV_3_PAD_Y 0
+#define GROUPED_CONV_3_OUTPUT_W 2
+#define GROUPED_CONV_3_OUTPUT_H 1
+#define GROUPED_CONV_3_INPUT_OFFSET 128
+#define GROUPED_CONV_3_OUTPUT_OFFSET 47
+#define GROUPED_CONV_3_DILATION_X 1
+#define GROUPED_CONV_3_DILATION_Y 1
+#define GROUPED_CONV_3_FILTER_CH 1
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_3/input_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/input_data.h
new file mode 100644
index 00000000..09c0bdfe
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/input_data.h
@@ -0,0 +1,7 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include <stdint.h>
+
+const int8_t grouped_conv_3_input[24] = {49, 51,   -76, 16,  5,  -65,  34, 38,   -9,   -95, -84, -104,
+                                         41, -114, 85,  -57, 58, -110, -3, -122, -115, -89, 60,  55};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_3/output_mult_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/output_mult_data.h
new file mode 100644
index 00000000..bf56c239
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/output_mult_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include <stdint.h>
+
+const int32_t grouped_conv_3_output_mult[4] = {1096383366, 1906223722, 1992067095, 1833146972};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_3/output_ref_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/output_ref_data.h
new file mode 100644
index 00000000..aad049e5
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/output_ref_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include <stdint.h>
+
+const int8_t grouped_conv_3_output_ref[16] = {-41, 43, -38, 16, -16, 77, 60, -6, -22, 30, 11, 7, -18, 93, 32, -7};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_3/output_shift_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/output_shift_data.h
new file mode 100644
index 00000000..1bccd965
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/output_shift_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include <stdint.h>
+
+const int32_t grouped_conv_3_output_shift[4] = {-8, -9, -9, -9};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_3/test_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/test_data.h
new file mode 100644
index 00000000..2c64febe
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/test_data.h
@@ -0,0 +1,9 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#include "biases_data.h"
+#include "config_data.h"
+#include "input_data.h"
+#include "output_mult_data.h"
+#include "output_ref_data.h"
+#include "output_shift_data.h"
+#include "weights_data.h"
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_3/weights_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/weights_data.h
new file mode 100644
index 00000000..9d7c6217
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_3/weights_data.h
@@ -0,0 +1,7 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include <stdint.h>
+
+const int8_t grouped_conv_3_weights[24] = {117, -127, 67,   -4, -92, 52,   127, 22, -50, 63,   -37, 8,
+                                           127, -66,  -106, 14, -56, -125, -16, 38, -12, -127, 4,   108};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_4/biases_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/biases_data.h
new file mode 100644
index 00000000..fa8fc17e
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/biases_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include <stdint.h>
+
+const int32_t grouped_conv_4_biases[6] = {-14062, 34706, -14058, 6024, 3930, 9268};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_4/config_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/config_data.h
new file mode 100644
index 00000000..3ab708f9
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/config_data.h
@@ -0,0 +1,25 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#define GROUPED_CONV_4_OUT_CH 6
+#define GROUPED_CONV_4_IN_CH 3
+#define GROUPED_CONV_4_INPUT_W 9
+#define GROUPED_CONV_4_INPUT_H 9
+#define GROUPED_CONV_4_DST_SIZE 486
+#define GROUPED_CONV_4_INPUT_SIZE 243
+#define GROUPED_CONV_4_OUT_ACTIVATION_MIN -128
+#define GROUPED_CONV_4_OUT_ACTIVATION_MAX 127
+#define GROUPED_CONV_4_INPUT_BATCHES 1
+#define GROUPED_CONV_4_FILTER_X 2
+#define GROUPED_CONV_4_FILTER_Y 2
+#define GROUPED_CONV_4_STRIDE_X 1
+#define GROUPED_CONV_4_STRIDE_Y 1
+#define GROUPED_CONV_4_PAD_X 1
+#define GROUPED_CONV_4_PAD_Y 1
+#define GROUPED_CONV_4_OUTPUT_W 9
+#define GROUPED_CONV_4_OUTPUT_H 9
+#define GROUPED_CONV_4_INPUT_OFFSET 128
+#define GROUPED_CONV_4_OUTPUT_OFFSET -62
+#define GROUPED_CONV_4_DILATION_X 3
+#define GROUPED_CONV_4_DILATION_Y 3
+#define GROUPED_CONV_4_FILTER_CH 1
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_4/input_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/input_data.h
new file mode 100644
index 00000000..f1d86ff5
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/input_data.h
@@ -0,0 +1,19 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include <stdint.h>
+
+const int8_t grouped_conv_4_input[243] = {
+    74,   -104, -81,  46,   -118, -58, -106, 95,   114,  -72,  59,  89,   123,  -2,  -41,  -104, 87,   23,  0,
+    66,   -119, -102, 103,  85,   35,  67,   59,   -118, -108, -74, -67,  -100, -55, 111,  -58,  -6,   89,  -22,
+    -66,  45,   -54,  -62,  -91,  87,  0,    -122, 59,   -78,  78,  59,   -24,  -46, 66,   125,  71,   -15, 124,
+    82,   -21,  12,   -119, 3,    126, 82,   -29,  85,   14,   -25, -91,  77,   -23, -106, 107,  -35,  -14, 64,
+    83,   -39,  79,   29,   19,   92,  95,   91,   70,   21,   70,  38,   110,  49,  112,  -41,  -125, -97, 6,
+    -48,  36,   -76,  48,   -8,   -79, 93,   -33,  -96,  -61,  57,  -90,  -41,  36,  -63,  92,   -39,  76,  -39,
+    33,   65,   -58,  -13,  -94,  -91, 115,  104,  -42,  -11,  -97, -66,  -100, 82,  125,  67,   39,   75,  -21,
+    2,    -53,  2,    120,  16,   -39, 64,   70,   -34,  -52,  84,  -114, 79,   -80, 42,   86,   105,  -53, 49,
+    -22,  -37,  -72,  -115, -33,  -99, 55,   -57,  34,   -126, 1,   123,  16,   -61, -76,  25,   98,   111, 124,
+    -64,  97,   103,  5,    32,   76,  -30,  6,    -96,  106,  -33, -36,  -8,   56,  -103, -61,  -39,  -41, -99,
+    55,   -112, 109,  106,  -115, 58,  110,  -55,  -85,  -81,  -3,  -20,  -58,  -21, 122,  -16,  93,   -37, 75,
+    -64,  -90,  -94,  47,   -83,  15,  -111, 97,   -76,  69,   90,  91,   -112, -87, -4,   18,   -65,  79,  48,
+    -115, 19,   -45,  -31,  -99,  -38, -8,   29,   -79,  75,   15,  10,   -79,  -46, 29};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_4/output_mult_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/output_mult_data.h
new file mode 100644
index 00000000..c58a42ae
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/output_mult_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include <stdint.h>
+
+const int32_t grouped_conv_4_output_mult[6] = {1813678179, 1858473757, 1768903071, 1183163162, 1102339756, 1617679616};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_4/output_ref_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/output_ref_data.h
new file mode 100644
index 00000000..e9328247
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/output_ref_data.h
@@ -0,0 +1,28 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include <stdint.h>
+
+const int8_t grouped_conv_4_output_ref[486] = {
+    -83, -3,  -62,  -40, 5,   -46, -77, 53,  -44, -39, 20,  -2,  -93, 51,  -45, -39, -32, -23, -47, 12,   -40, -38, -24,
+    -3,  -74, 57,   -48, -40, -7,  -10, -71, 40,  -27, -32, -30, -41, -79, 54,  -36, -35, -18, -43, -124, 47,  -66, -46,
+    -43, -28, -117, 38,  -42, -42, -45, -33, -52, 8,   -14, -62, 49,  -60, -30, 33,  1,   -54, 29,  -13,  -65, 23,  -14,
+    -49, 15,  -4,   -75, 41,  4,   -5,  73,  21,  -85, 51,  -30, -17, 27,  -6,  -18, -25, -19, -38, 35,   -29, -64, 45,
+    -39, -11, 50,   1,   -78, 4,   -73, 6,   -31, -8,  -96, 13,  -76, 15,  -21, 3,   -44, 13,  -42, -45,  -7,  -54, -79,
+    43,  -51, -53,  -1,  -3,  -33, 27,  7,   -29, 1,   -22, -38, 6,   -10, -49, 15,  -20, -53, -7,  -15,  -29, 29,  -31,
+    -43, 42,  18,   -34, 36,  -25, -73, 26,  -28, -6,  50,  -27, -89, 2,   -40, 9,   -23, 6,   -75, 10,   -49, 7,   -21,
+    7,   -67, 3,    -54, -61, 60,  -61, -53, 3,   16,  -9,  57,  13,  -17, 1,   6,   -13, 47,  12,  -76,  25,  -23, -13,
+    29,  36,  -16,  -18, -19, -24, 10,  -6,  -57, 24,  -6,  -39, 39,  -4,  -32, -6,  2,   -24, -3,  -34,  -51, -17, -72,
+    -22, -38, -25,  -61, -10, -77, 10,  -25, 1,   -34, 15,  -11, -64, 50,  -56, -38, 3,   19,  24,  42,   20,  -28, -7,
+    -27, -13, 54,   13,  -62, 32,  -3,  26,  45,  20,  5,   -5,  -15, -18, 47,  -18, -73, 31,  -15, 1,    -1,  -1,  -46,
+    3,   -36, -29,  14,  -14, -99, 30,  -65, -33, -17, 9,   -85, 9,   -47, -34, -43, -31, -42, 13,  -18,  -58, -19, -52,
+    -42, -15, -34,  -27, 14,  -6,  -75, 48,  9,   -11, 5,   -33, -29, 33,  -11, 15,  29,  -24, -49, -4,   -11, -52, 42,
+    -33, -37, -7,   -40, -3,  60,  -23, -89, 39,  -19, -44, -2,  -2,  -94, 12,  -41, 15,  -16, 10,  -51,  -23, -76, -2,
+    -11, 20,  -73,  2,   -53, -51, 34,  -59, -83, 31,  -8,  8,   37,  12,  -93, 36,  16,  -7,  51,  -21,  -50, 0,   -30,
+    -45, 35,  12,   -65, 18,  -6,  17,  -16, -9,  -8,  -9,  -24, 20,  62,  1,   -73, 12,  -41, -15, 5,    -12, -86, 10,
+    -52, -29, -48,  -37, -99, 27,  -55, -36, -17, 10,  -76, 2,   -52, -80, 10,  -62, -55, -20, -52, -9,   23,  -34, -65,
+    -10, -62, -55,  17,  -31, -34, -33, -64, -1,  -14, -3,  -62, -7,  -70, 0,   -2,  -10, -52, -19, -58,  -29, -22, -12,
+    -62, -15, -71,  -24, -28, -47, -36, -36, -84, -23, -42, -31, -60, -20, -83, 1,   -51, -44, -77, 1,    -52, -80, -36,
+    -52, -77, -6,   -76, -5,  -20, -52, -31, -34, -73, 6,   -25, -52, -36, -24, -67, 2,   11,  -47, -72,  -7,  -57, -62,
+    -22, -28, -61,  -18, -79, -34, 4,   -38, -31, -37, -64, -37, -22, -8,  -66, -16, -83, 6,   -46, -36,  -77, -9,  -84,
+    -40, -32, -16};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_4/output_shift_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/output_shift_data.h
new file mode 100644
index 00000000..4ebe6c11
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/output_shift_data.h
@@ -0,0 +1,6 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include <stdint.h>
+
+const int32_t grouped_conv_4_output_shift[6] = {-9, -9, -9, -8, -8, -9};
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_4/test_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/test_data.h
new file mode 100644
index 00000000..2c64febe
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/test_data.h
@@ -0,0 +1,9 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#include "biases_data.h"
+#include "config_data.h"
+#include "input_data.h"
+#include "output_mult_data.h"
+#include "output_ref_data.h"
+#include "output_shift_data.h"
+#include "weights_data.h"
diff --git a/Tests/UnitTest/TestCases/TestData/grouped_conv_4/weights_data.h b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/weights_data.h
new file mode 100644
index 00000000..87cb7b8d
--- /dev/null
+++ b/Tests/UnitTest/TestCases/TestData/grouped_conv_4/weights_data.h
@@ -0,0 +1,7 @@
+// Generated by test_settings.py using tensorflow version 2.13.0 (Keras version 2.13.1).
+// Interpreter from tensorflow version 2.13.0 and revision upstream/v2.13.0-0-g1cb1a030a62.
+#pragma once
+#include <stdint.h>
+
+const int8_t grouped_conv_4_weights[24] = {127, 26,  -101, 118, -83, 14,  127, 39,  6,   84,  127, 107,
+                                           127, -60, 15,   31,  63,  127, 49,  116, 127, -37, 120, 5};
diff --git a/Tests/UnitTest/TestCases/test_arm_convolve_1_x_n_s8/test_arm_convolve_1_x_n_s8.c b/Tests/UnitTest/TestCases/test_arm_convolve_1_x_n_s8/test_arm_convolve_1_x_n_s8.c
index 1d9f9824..afe45fa3 100644
--- a/Tests/UnitTest/TestCases/test_arm_convolve_1_x_n_s8/test_arm_convolve_1_x_n_s8.c
+++ b/Tests/UnitTest/TestCases/test_arm_convolve_1_x_n_s8/test_arm_convolve_1_x_n_s8.c
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright 2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ * SPDX-FileCopyrightText: Copyright 2023-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -53,6 +53,7 @@ void conv_1_x_n_1_arm_convolve_s8(void)
     input_dims.c = CONV_1_X_N_1_IN_CH;
     filter_dims.w = CONV_1_X_N_1_FILTER_X;
     filter_dims.h = CONV_1_X_N_1_FILTER_Y;
+    filter_dims.c = CONV_1_X_N_1_IN_CH;
     output_dims.w = CONV_1_X_N_1_OUTPUT_W;
     output_dims.h = CONV_1_X_N_1_OUTPUT_H;
     output_dims.c = CONV_1_X_N_1_OUT_CH;
@@ -144,6 +145,7 @@ void conv_1_x_n_2_arm_convolve_s8(void)
     input_dims.c = CONV_1_X_N_2_IN_CH;
     filter_dims.w = CONV_1_X_N_2_FILTER_X;
     filter_dims.h = CONV_1_X_N_2_FILTER_Y;
+    filter_dims.c = CONV_1_X_N_2_IN_CH;
     output_dims.w = CONV_1_X_N_2_OUTPUT_W;
     output_dims.h = CONV_1_X_N_2_OUTPUT_H;
     output_dims.c = CONV_1_X_N_2_OUT_CH;
@@ -233,6 +235,7 @@ void conv_1_x_n_3_arm_convolve_s8(void)
     input_dims.c = CONV_1_X_N_3_IN_CH;
     filter_dims.w = CONV_1_X_N_3_FILTER_X;
     filter_dims.h = CONV_1_X_N_3_FILTER_Y;
+    filter_dims.c = CONV_1_X_N_3_IN_CH;
     output_dims.w = CONV_1_X_N_3_OUTPUT_W;
     output_dims.h = CONV_1_X_N_3_OUTPUT_H;
     output_dims.c = CONV_1_X_N_3_OUT_CH;
@@ -324,6 +327,7 @@ void conv_1_x_n_4_arm_convolve_s8(void)
     input_dims.c = CONV_1_X_N_4_IN_CH;
     filter_dims.w = CONV_1_X_N_4_FILTER_X;
     filter_dims.h = CONV_1_X_N_4_FILTER_Y;
+    filter_dims.c = CONV_1_X_N_4_IN_CH;
     output_dims.w = CONV_1_X_N_4_OUTPUT_W;
     output_dims.h = CONV_1_X_N_4_OUTPUT_H;
     output_dims.c = CONV_1_X_N_4_OUT_CH;
@@ -415,6 +419,7 @@ void conv_1_x_n_5_arm_convolve_s8(void)
     input_dims.c = CONV_1_X_N_5_IN_CH;
     filter_dims.w = CONV_1_X_N_5_FILTER_X;
     filter_dims.h = CONV_1_X_N_5_FILTER_Y;
+    filter_dims.c = CONV_1_X_N_5_IN_CH;
     output_dims.w = CONV_1_X_N_5_OUTPUT_W;
     output_dims.h = CONV_1_X_N_5_OUTPUT_H;
     output_dims.c = CONV_1_X_N_5_OUT_CH;
@@ -502,6 +507,7 @@ void conv_1_x_n_6_arm_convolve_s8(void)
     input_dims.c = CONV_1_X_N_3_IN_CH;
     filter_dims.w = CONV_1_X_N_3_FILTER_X;
     filter_dims.h = CONV_1_X_N_3_FILTER_Y;
+    filter_dims.c = CONV_1_X_N_3_IN_CH;
     output_dims.w = CONV_1_X_N_3_OUTPUT_W;
     output_dims.h = CONV_1_X_N_3_OUTPUT_H;
     output_dims.c = CONV_1_X_N_3_OUT_CH;
diff --git a/Tests/UnitTest/TestCases/test_arm_convolve_s8/test_arm_convolve_s8.c b/Tests/UnitTest/TestCases/test_arm_convolve_s8/test_arm_convolve_s8.c
index 3d37c7ef..a6065946 100644
--- a/Tests/UnitTest/TestCases/test_arm_convolve_s8/test_arm_convolve_s8.c
+++ b/Tests/UnitTest/TestCases/test_arm_convolve_s8/test_arm_convolve_s8.c
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright 2010-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -61,6 +61,7 @@ void basic_arm_convolve_s8(void)
     input_dims.c = BASIC_IN_CH;
     filter_dims.w = BASIC_FILTER_X;
     filter_dims.h = BASIC_FILTER_Y;
+    filter_dims.c = BASIC_IN_CH;
     output_dims.w = BASIC_OUTPUT_W;
     output_dims.h = BASIC_OUTPUT_H;
     output_dims.c = BASIC_OUT_CH;
@@ -155,6 +156,7 @@ void stride2pad1_arm_convolve_s8(void)
     input_dims.c = STRIDE2PAD1_IN_CH;
     filter_dims.w = STRIDE2PAD1_FILTER_X;
     filter_dims.h = STRIDE2PAD1_FILTER_Y;
+    filter_dims.c = STRIDE2PAD1_IN_CH;
     output_dims.w = STRIDE2PAD1_OUTPUT_W;
     output_dims.h = STRIDE2PAD1_OUTPUT_H;
     output_dims.c = STRIDE2PAD1_OUT_CH;
@@ -248,6 +250,7 @@ void conv_2_arm_convolve_s8(void)
     input_dims.c = CONV_2_IN_CH;
     filter_dims.w = CONV_2_FILTER_X;
     filter_dims.h = CONV_2_FILTER_Y;
+    filter_dims.c = CONV_2_IN_CH;
     output_dims.w = CONV_2_OUTPUT_W;
     output_dims.h = CONV_2_OUTPUT_H;
     output_dims.c = CONV_2_OUT_CH;
@@ -341,6 +344,7 @@ void conv_3_arm_convolve_s8(void)
     input_dims.c = CONV_3_IN_CH;
     filter_dims.w = CONV_3_FILTER_X;
     filter_dims.h = CONV_3_FILTER_Y;
+    filter_dims.c = CONV_3_IN_CH;
     output_dims.w = CONV_3_OUTPUT_W;
     output_dims.h = CONV_3_OUTPUT_H;
     output_dims.c = CONV_3_OUT_CH;
@@ -434,6 +438,7 @@ void conv_4_arm_convolve_s8(void)
     input_dims.c = CONV_4_IN_CH;
     filter_dims.w = CONV_4_FILTER_X;
     filter_dims.h = CONV_4_FILTER_Y;
+    filter_dims.c = CONV_4_IN_CH;
     output_dims.w = CONV_4_OUTPUT_W;
     output_dims.h = CONV_4_OUTPUT_H;
     output_dims.c = CONV_4_OUT_CH;
@@ -526,6 +531,7 @@ void conv_out_activation_arm_convolve_s8(void)
     input_dims.c = CONV_OUT_ACTIVATION_IN_CH;
     filter_dims.w = CONV_OUT_ACTIVATION_FILTER_X;
     filter_dims.h = CONV_OUT_ACTIVATION_FILTER_Y;
+    filter_dims.c = CONV_OUT_ACTIVATION_IN_CH;
     output_dims.w = CONV_OUT_ACTIVATION_OUTPUT_W;
     output_dims.h = CONV_OUT_ACTIVATION_OUTPUT_H;
     output_dims.c = CONV_OUT_ACTIVATION_OUT_CH;
@@ -592,6 +598,7 @@ void conv_2x2_dilation_arm_convolve_s8(void)
     input_dims.c = CONV_2X2_DILATION_IN_CH;
     filter_dims.w = CONV_2X2_DILATION_FILTER_X;
     filter_dims.h = CONV_2X2_DILATION_FILTER_Y;
+    filter_dims.c = CONV_2X2_DILATION_IN_CH;
     output_dims.w = CONV_2X2_DILATION_OUTPUT_W;
     output_dims.h = CONV_2X2_DILATION_OUTPUT_H;
     output_dims.c = CONV_2X2_DILATION_OUT_CH;
@@ -685,6 +692,7 @@ void conv_2x2_dilation_5x5_input_arm_convolve_s8(void)
     input_dims.c = CONV_2X2_DILATION_5X5_INPUT_IN_CH;
     filter_dims.w = CONV_2X2_DILATION_5X5_INPUT_FILTER_X;
     filter_dims.h = CONV_2X2_DILATION_5X5_INPUT_FILTER_Y;
+    filter_dims.c = CONV_2X2_DILATION_5X5_INPUT_IN_CH;
     output_dims.w = CONV_2X2_DILATION_5X5_INPUT_OUTPUT_W;
     output_dims.h = CONV_2X2_DILATION_5X5_INPUT_OUTPUT_H;
     output_dims.c = CONV_2X2_DILATION_5X5_INPUT_OUT_CH;
@@ -776,6 +784,7 @@ void conv_3x3_dilation_5x5_input_arm_convolve_s8(void)
     input_dims.c = CONV_3X3_DILATION_5X5_INPUT_IN_CH;
     filter_dims.w = CONV_3X3_DILATION_5X5_INPUT_FILTER_X;
     filter_dims.h = CONV_3X3_DILATION_5X5_INPUT_FILTER_Y;
+    filter_dims.c = CONV_3X3_DILATION_5X5_INPUT_IN_CH;
     output_dims.w = CONV_3X3_DILATION_5X5_INPUT_OUTPUT_W;
     output_dims.h = CONV_3X3_DILATION_5X5_INPUT_OUTPUT_H;
     output_dims.c = CONV_3X3_DILATION_5X5_INPUT_OUT_CH;
@@ -867,6 +876,7 @@ void conv_2x3_dilation_arm_convolve_s8(void)
     input_dims.c = CONV_2X3_DILATION_IN_CH;
     filter_dims.w = CONV_2X3_DILATION_FILTER_X;
     filter_dims.h = CONV_2X3_DILATION_FILTER_Y;
+    filter_dims.c = CONV_2X3_DILATION_IN_CH;
     output_dims.w = CONV_2X3_DILATION_OUTPUT_W;
     output_dims.h = CONV_2X3_DILATION_OUTPUT_H;
     output_dims.c = CONV_2X3_DILATION_OUT_CH;
@@ -958,6 +968,7 @@ void conv_3x2_dilation_arm_convolve_s8(void)
     input_dims.c = CONV_3X2_DILATION_IN_CH;
     filter_dims.w = CONV_3X2_DILATION_FILTER_X;
     filter_dims.h = CONV_3X2_DILATION_FILTER_Y;
+    filter_dims.c = CONV_3X2_DILATION_IN_CH;
     output_dims.w = CONV_3X2_DILATION_OUTPUT_W;
     output_dims.h = CONV_3X2_DILATION_OUTPUT_H;
     output_dims.c = CONV_3X2_DILATION_OUT_CH;
@@ -1049,6 +1060,7 @@ void conv_dilation_golden_arm_convolve_s8(void)
     input_dims.c = CONV_DILATION_GOLDEN_IN_CH;
     filter_dims.w = CONV_DILATION_GOLDEN_FILTER_X;
     filter_dims.h = CONV_DILATION_GOLDEN_FILTER_Y;
+    filter_dims.c = CONV_DILATION_GOLDEN_IN_CH;
     output_dims.w = CONV_DILATION_GOLDEN_OUTPUT_W;
     output_dims.h = CONV_DILATION_GOLDEN_OUTPUT_H;
     output_dims.c = CONV_DILATION_GOLDEN_OUT_CH;
@@ -1140,6 +1152,7 @@ void conv_5_arm_convolve_s8(void)
     input_dims.c = CONV_5_IN_CH;
     filter_dims.w = CONV_5_FILTER_X;
     filter_dims.h = CONV_5_FILTER_Y;
+    filter_dims.c = CONV_5_IN_CH;
     output_dims.w = CONV_5_OUTPUT_W;
     output_dims.h = CONV_5_OUTPUT_H;
     output_dims.c = CONV_5_OUT_CH;
@@ -1221,6 +1234,7 @@ void buffer_size_arm_convolve_s8(void)
     input_dims.c = CONV_5_IN_CH;
     filter_dims.w = CONV_5_FILTER_X;
     filter_dims.h = CONV_5_FILTER_Y;
+    filter_dims.c = CONV_5_IN_CH;
     output_dims.w = CONV_5_OUTPUT_W;
     output_dims.h = CONV_5_OUTPUT_H;
     output_dims.c = CONV_5_OUT_CH;
@@ -1258,6 +1272,7 @@ void buffer_size_mve_arm_convolve_s8(void)
     input_dims.c = CONV_5_IN_CH;
     filter_dims.w = CONV_5_FILTER_X;
     filter_dims.h = CONV_5_FILTER_Y;
+    filter_dims.c = CONV_5_IN_CH;
     output_dims.w = CONV_5_OUTPUT_W;
     output_dims.h = CONV_5_OUTPUT_H;
     output_dims.c = CONV_5_OUT_CH;
@@ -1297,6 +1312,7 @@ void buffer_size_dsp_arm_convolve_s8(void)
     input_dims.c = CONV_5_IN_CH;
     filter_dims.w = CONV_5_FILTER_X;
     filter_dims.h = CONV_5_FILTER_Y;
+    filter_dims.c = CONV_5_IN_CH;
     output_dims.w = CONV_5_OUTPUT_W;
     output_dims.h = CONV_5_OUTPUT_H;
     output_dims.c = CONV_5_OUT_CH;
diff --git a/Tests/UnitTest/TestCases/test_arm_ds_cnn_l_s8/test_arm_ds_cnn_l_s8.c b/Tests/UnitTest/TestCases/test_arm_ds_cnn_l_s8/test_arm_ds_cnn_l_s8.c
index 1e6eafd9..f17a7f0e 100644
--- a/Tests/UnitTest/TestCases/test_arm_ds_cnn_l_s8/test_arm_ds_cnn_l_s8.c
+++ b/Tests/UnitTest/TestCases/test_arm_ds_cnn_l_s8/test_arm_ds_cnn_l_s8.c
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright 2022-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ * SPDX-FileCopyrightText: Copyright 2022-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -58,6 +58,7 @@ int ds_cnn_l_s8_get_buffer_size(void)
 
     filter_dims.h = CONV_2D_1_FILTER_H;
     filter_dims.w = CONV_2D_1_FILTER_W;
+    filter_dims.c = CONV_2D_1_IN_CH;
 
     output_dims.n = input_dims.n;
     output_dims.h = CONV_2D_1_OUTPUT_H;
@@ -170,6 +171,7 @@ void ds_cnn_l_s8_inference(void)
 
     conv_filter_dims.h = CONV_2D_1_FILTER_H;
     conv_filter_dims.w = CONV_2D_1_FILTER_W;
+    conv_filter_dims.c = CONV_2D_1_IN_CH;
 
     in_out_dim_1.n = in_out_dim_0.n;
     in_out_dim_1.h = CONV_2D_1_OUTPUT_H;
@@ -237,6 +239,7 @@ void ds_cnn_l_s8_inference(void)
     in_out_dim_1.c = in_out_dim_0.c;
     conv_filter_dims.h = CONV_2D_3_FILTER_H;
     conv_filter_dims.w = CONV_2D_3_FILTER_W;
+    conv_filter_dims.c = CONV_2D_3_IN_CH;
 
     conv_params.padding.h = CONV_2D_3_PAD_H;
     conv_params.padding.w = CONV_2D_3_PAD_W;
diff --git a/Tests/UnitTest/TestCases/test_arm_ds_cnn_s_s8/test_arm_ds_cnn_s_s8.c b/Tests/UnitTest/TestCases/test_arm_ds_cnn_s_s8/test_arm_ds_cnn_s_s8.c
index 5862df7b..87f24e92 100644
--- a/Tests/UnitTest/TestCases/test_arm_ds_cnn_s_s8/test_arm_ds_cnn_s_s8.c
+++ b/Tests/UnitTest/TestCases/test_arm_ds_cnn_s_s8/test_arm_ds_cnn_s_s8.c
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright 2022-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ * SPDX-FileCopyrightText: Copyright 2022-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -60,6 +60,7 @@ int ds_cnn_s_s8_get_buffer_size(void)
 
     filter_dims.h = CONV_2D_1_FILTER_H;
     filter_dims.w = CONV_2D_1_FILTER_W;
+    filter_dims.c = CONV_2D_1_IN_CH;
 
     output_dims.n = input_dims.n;
     output_dims.h = CONV_2D_1_OUTPUT_H;
@@ -151,6 +152,7 @@ void ds_cnn_s_s8_inference(void)
 
     conv_filter_dims.h = CONV_2D_1_FILTER_H;
     conv_filter_dims.w = CONV_2D_1_FILTER_W;
+    conv_filter_dims.c = CONV_2D_1_IN_CH;
 
     in_out_dim_1.n = in_out_dim_0.n;
     in_out_dim_1.h = CONV_2D_1_OUTPUT_H;
@@ -221,6 +223,7 @@ void ds_cnn_s_s8_inference(void)
     in_out_dim_1.c = in_out_dim_0.c;
     conv_filter_dims.h = CONV_2D_3_FILTER_H;
     conv_filter_dims.w = CONV_2D_3_FILTER_W;
+    conv_filter_dims.c = CONV_2D_3_IN_CH;
 
     conv_params.padding.h = CONV_2D_3_PAD_H;
     conv_params.padding.w = CONV_2D_3_PAD_W;
diff --git a/Tests/UnitTest/TestCases/test_arm_grouped_convolve_s8/CMakeLists.txt b/Tests/UnitTest/TestCases/test_arm_grouped_convolve_s8/CMakeLists.txt
new file mode 100644
index 00000000..4363d841
--- /dev/null
+++ b/Tests/UnitTest/TestCases/test_arm_grouped_convolve_s8/CMakeLists.txt
@@ -0,0 +1,23 @@
+#
+# Copyright (C) 2010-2024 Arm Limited or its affiliates. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+add_cmsis_nn_unit_test_executable(test_arm_grouped_convolve_s8)
+
+target_sources(test_arm_grouped_convolve_s8 PRIVATE
+    Unity/unity_test_arm_grouped_convolve_s8.c
+    Unity/TestRunner/unity_test_arm_grouped_convolve_s8_runner.c)
diff --git a/Tests/UnitTest/TestCases/test_arm_grouped_convolve_s8/Unity/unity_test_arm_grouped_convolve_s8.c b/Tests/UnitTest/TestCases/test_arm_grouped_convolve_s8/Unity/unity_test_arm_grouped_convolve_s8.c
new file mode 100644
index 00000000..a915234a
--- /dev/null
+++ b/Tests/UnitTest/TestCases/test_arm_grouped_convolve_s8/Unity/unity_test_arm_grouped_convolve_s8.c
@@ -0,0 +1,50 @@
+/*
+ * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../test_arm_grouped_convolve_s8.c"
+#include "unity.h"
+
+#ifdef USING_FVP_CORSTONE_300
+extern void uart_init(void);
+#endif
+
+/* This function is called from the autogenerated file.
+ * The name must be exactly like this
+ */
+void setUp(void)
+{ /* This is run before EACH TEST */
+#ifdef USING_FVP_CORSTONE_300
+    uart_init();
+#endif
+}
+
+/* This function is called from the autogenerated file.
+ * The name must be exactly like this
+ */
+void tearDown(void) {}
+
+void test_basic_arm_grouped_convolve_1_s8(void) { grouped_conv_arm_grouped_convolve_1_s8(); }
+void test_basic_arm_grouped_convolve_2_s8(void) { grouped_conv_arm_grouped_convolve_2_s8(); }
+void test_basic_arm_grouped_convolve_3_s8(void) { grouped_conv_arm_grouped_convolve_3_s8(); }
+void test_basic_arm_grouped_convolve_4_s8(void) { grouped_conv_arm_grouped_convolve_4_s8(); }
diff --git a/Tests/UnitTest/TestCases/test_arm_grouped_convolve_s8/test_arm_grouped_convolve_s8.c b/Tests/UnitTest/TestCases/test_arm_grouped_convolve_s8/test_arm_grouped_convolve_s8.c
new file mode 100644
index 00000000..1aaa6d10
--- /dev/null
+++ b/Tests/UnitTest/TestCases/test_arm_grouped_convolve_s8/test_arm_grouped_convolve_s8.c
@@ -0,0 +1,312 @@
+/*
+ * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdlib.h>
+
+#include <arm_nnfunctions.h>
+#include <unity.h>
+
+#include "../TestData/grouped_conv_1/test_data.h"
+#include "../TestData/grouped_conv_2/test_data.h"
+#include "../TestData/grouped_conv_3/test_data.h"
+#include "../TestData/grouped_conv_4/test_data.h"
+#include "../Utils/validate.h"
+
+void grouped_conv_arm_grouped_convolve_1_s8(void)
+{
+    const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS;
+    int8_t output[GROUPED_CONV_1_DST_SIZE] = {0};
+
+    cmsis_nn_context ctx;
+    cmsis_nn_conv_params conv_params;
+    cmsis_nn_per_channel_quant_params quant_params;
+    cmsis_nn_dims input_dims;
+    cmsis_nn_dims filter_dims;
+    cmsis_nn_dims bias_dims;
+    cmsis_nn_dims output_dims;
+
+    const int32_t *bias_data = grouped_conv_1_biases;
+    const int8_t *kernel_data = grouped_conv_1_weights;
+    const int8_t *input_data = grouped_conv_1_input;
+    const int8_t *output_ref = grouped_conv_1_output_ref;
+    const int32_t output_ref_size = GROUPED_CONV_1_DST_SIZE;
+
+    input_dims.n = GROUPED_CONV_1_INPUT_BATCHES;
+    input_dims.w = GROUPED_CONV_1_INPUT_W;
+    input_dims.h = GROUPED_CONV_1_INPUT_H;
+    input_dims.c = GROUPED_CONV_1_IN_CH;
+    filter_dims.w = GROUPED_CONV_1_FILTER_X;
+    filter_dims.h = GROUPED_CONV_1_FILTER_Y;
+    filter_dims.c = GROUPED_CONV_1_FILTER_CH;
+    output_dims.w = GROUPED_CONV_1_OUTPUT_W;
+    output_dims.h = GROUPED_CONV_1_OUTPUT_H;
+    output_dims.c = GROUPED_CONV_1_OUT_CH;
+
+    conv_params.padding.w = GROUPED_CONV_1_PAD_X;
+    conv_params.padding.h = GROUPED_CONV_1_PAD_Y;
+    conv_params.stride.w = GROUPED_CONV_1_STRIDE_X;
+    conv_params.stride.h = GROUPED_CONV_1_STRIDE_Y;
+    conv_params.dilation.w = GROUPED_CONV_1_DILATION_X;
+    conv_params.dilation.h = GROUPED_CONV_1_DILATION_Y;
+
+    conv_params.input_offset = GROUPED_CONV_1_INPUT_OFFSET;
+    conv_params.output_offset = GROUPED_CONV_1_OUTPUT_OFFSET;
+    conv_params.activation.min = GROUPED_CONV_1_OUT_ACTIVATION_MIN;
+    conv_params.activation.max = GROUPED_CONV_1_OUT_ACTIVATION_MAX;
+    quant_params.multiplier = (int32_t *)grouped_conv_1_output_mult;
+    quant_params.shift = (int32_t *)grouped_conv_1_output_shift;
+
+    int32_t buf_size = arm_convolve_s8_get_buffer_size(&input_dims, &filter_dims);
+    ctx.buf = malloc(buf_size);
+    ctx.size = 0;
+
+    arm_cmsis_nn_status result = arm_convolve_s8(&ctx,
+                                                 &conv_params,
+                                                 &quant_params,
+                                                 &input_dims,
+                                                 input_data,
+                                                 &filter_dims,
+                                                 kernel_data,
+                                                 &bias_dims,
+                                                 bias_data,
+                                                 &output_dims,
+                                                 output);
+
+    if (ctx.buf)
+    {
+        // The caller is responsible to clear the scratch buffers for security reasons if applicable.
+        memset(ctx.buf, 0, buf_size);
+        free(ctx.buf);
+    }
+    TEST_ASSERT_EQUAL(expected, result);
+    TEST_ASSERT_TRUE(validate(output, output_ref, output_ref_size));
+    memset(output, 0, sizeof(output));
+}
+
+void grouped_conv_arm_grouped_convolve_2_s8(void)
+{
+    const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS;
+    int8_t output[GROUPED_CONV_2_DST_SIZE] = {0};
+
+    cmsis_nn_context ctx;
+    cmsis_nn_conv_params conv_params;
+    cmsis_nn_per_channel_quant_params quant_params;
+    cmsis_nn_dims input_dims;
+    cmsis_nn_dims filter_dims;
+    cmsis_nn_dims bias_dims;
+    cmsis_nn_dims output_dims;
+
+    const int32_t *bias_data = grouped_conv_2_biases;
+    const int8_t *kernel_data = grouped_conv_2_weights;
+    const int8_t *input_data = grouped_conv_2_input;
+    const int8_t *output_ref = grouped_conv_2_output_ref;
+    const int32_t output_ref_size = GROUPED_CONV_2_DST_SIZE;
+
+    input_dims.n = GROUPED_CONV_2_INPUT_BATCHES;
+    input_dims.w = GROUPED_CONV_2_INPUT_W;
+    input_dims.h = GROUPED_CONV_2_INPUT_H;
+    input_dims.c = GROUPED_CONV_2_IN_CH;
+    filter_dims.w = GROUPED_CONV_2_FILTER_X;
+    filter_dims.h = GROUPED_CONV_2_FILTER_Y;
+    filter_dims.c = GROUPED_CONV_2_FILTER_CH;
+    output_dims.w = GROUPED_CONV_2_OUTPUT_W;
+    output_dims.h = GROUPED_CONV_2_OUTPUT_H;
+    output_dims.c = GROUPED_CONV_2_OUT_CH;
+
+    conv_params.padding.w = GROUPED_CONV_2_PAD_X;
+    conv_params.padding.h = GROUPED_CONV_2_PAD_Y;
+    conv_params.stride.w = GROUPED_CONV_2_STRIDE_X;
+    conv_params.stride.h = GROUPED_CONV_2_STRIDE_Y;
+    conv_params.dilation.w = GROUPED_CONV_2_DILATION_X;
+    conv_params.dilation.h = GROUPED_CONV_2_DILATION_Y;
+
+    conv_params.input_offset = GROUPED_CONV_2_INPUT_OFFSET;
+    conv_params.output_offset = GROUPED_CONV_2_OUTPUT_OFFSET;
+    conv_params.activation.min = GROUPED_CONV_2_OUT_ACTIVATION_MIN;
+    conv_params.activation.max = GROUPED_CONV_2_OUT_ACTIVATION_MAX;
+    quant_params.multiplier = (int32_t *)grouped_conv_2_output_mult;
+    quant_params.shift = (int32_t *)grouped_conv_2_output_shift;
+
+    int32_t buf_size = arm_convolve_s8_get_buffer_size(&input_dims, &filter_dims);
+    ctx.buf = malloc(buf_size);
+    ctx.size = 0;
+
+    arm_cmsis_nn_status result = arm_convolve_s8(&ctx,
+                                                 &conv_params,
+                                                 &quant_params,
+                                                 &input_dims,
+                                                 input_data,
+                                                 &filter_dims,
+                                                 kernel_data,
+                                                 &bias_dims,
+                                                 bias_data,
+                                                 &output_dims,
+                                                 output);
+
+    if (ctx.buf)
+    {
+        // The caller is responsible to clear the scratch buffers for security reasons if applicable.
+        memset(ctx.buf, 0, buf_size);
+        free(ctx.buf);
+    }
+    TEST_ASSERT_EQUAL(expected, result);
+    TEST_ASSERT_TRUE(validate(output, output_ref, output_ref_size));
+    memset(output, 0, sizeof(output));
+}
+
+void grouped_conv_arm_grouped_convolve_3_s8(void)
+{
+    const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS;
+    int8_t output[GROUPED_CONV_3_DST_SIZE] = {0};
+
+    cmsis_nn_context ctx;
+    cmsis_nn_conv_params conv_params;
+    cmsis_nn_per_channel_quant_params quant_params;
+    cmsis_nn_dims input_dims;
+    cmsis_nn_dims filter_dims;
+    cmsis_nn_dims bias_dims;
+    cmsis_nn_dims output_dims;
+
+    const int32_t *bias_data = grouped_conv_3_biases;
+    const int8_t *kernel_data = grouped_conv_3_weights;
+    const int8_t *input_data = grouped_conv_3_input;
+    const int8_t *output_ref = grouped_conv_3_output_ref;
+    const int32_t output_ref_size = GROUPED_CONV_3_DST_SIZE;
+
+    input_dims.n = GROUPED_CONV_3_INPUT_BATCHES;
+    input_dims.w = GROUPED_CONV_3_INPUT_W;
+    input_dims.h = GROUPED_CONV_3_INPUT_H;
+    input_dims.c = GROUPED_CONV_3_IN_CH;
+    filter_dims.w = GROUPED_CONV_3_FILTER_X;
+    filter_dims.h = GROUPED_CONV_3_FILTER_Y;
+    filter_dims.c = GROUPED_CONV_3_FILTER_CH;
+    output_dims.w = GROUPED_CONV_3_OUTPUT_W;
+    output_dims.h = GROUPED_CONV_3_OUTPUT_H;
+    output_dims.c = GROUPED_CONV_3_OUT_CH;
+
+    conv_params.padding.w = GROUPED_CONV_3_PAD_X;
+    conv_params.padding.h = GROUPED_CONV_3_PAD_Y;
+    conv_params.stride.w = GROUPED_CONV_3_STRIDE_X;
+    conv_params.stride.h = GROUPED_CONV_3_STRIDE_Y;
+    conv_params.dilation.w = GROUPED_CONV_3_DILATION_X;
+    conv_params.dilation.h = GROUPED_CONV_3_DILATION_Y;
+
+    conv_params.input_offset = GROUPED_CONV_3_INPUT_OFFSET;
+    conv_params.output_offset = GROUPED_CONV_3_OUTPUT_OFFSET;
+    conv_params.activation.min = GROUPED_CONV_3_OUT_ACTIVATION_MIN;
+    conv_params.activation.max = GROUPED_CONV_3_OUT_ACTIVATION_MAX;
+    quant_params.multiplier = (int32_t *)grouped_conv_3_output_mult;
+    quant_params.shift = (int32_t *)grouped_conv_3_output_shift;
+
+    int32_t buf_size = arm_convolve_s8_get_buffer_size(&input_dims, &filter_dims);
+    ctx.buf = malloc(buf_size);
+    ctx.size = 0;
+
+    arm_cmsis_nn_status result = arm_convolve_s8(&ctx,
+                                                 &conv_params,
+                                                 &quant_params,
+                                                 &input_dims,
+                                                 input_data,
+                                                 &filter_dims,
+                                                 kernel_data,
+                                                 &bias_dims,
+                                                 bias_data,
+                                                 &output_dims,
+                                                 output);
+
+    if (ctx.buf)
+    {
+        // The caller is responsible to clear the scratch buffers for security reasons if applicable.
+        memset(ctx.buf, 0, buf_size);
+        free(ctx.buf);
+    }
+    TEST_ASSERT_EQUAL(expected, result);
+    TEST_ASSERT_TRUE(validate(output, output_ref, output_ref_size));
+    memset(output, 0, sizeof(output));
+}
+
+void grouped_conv_arm_grouped_convolve_4_s8(void)
+{
+    const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS;
+    int8_t output[GROUPED_CONV_4_DST_SIZE] = {0};
+
+    cmsis_nn_context ctx;
+    cmsis_nn_conv_params conv_params;
+    cmsis_nn_per_channel_quant_params quant_params;
+    cmsis_nn_dims input_dims;
+    cmsis_nn_dims filter_dims;
+    cmsis_nn_dims bias_dims;
+    cmsis_nn_dims output_dims;
+
+    const int32_t *bias_data = grouped_conv_4_biases;
+    const int8_t *kernel_data = grouped_conv_4_weights;
+    const int8_t *input_data = grouped_conv_4_input;
+    const int8_t *output_ref = grouped_conv_4_output_ref;
+    const int32_t output_ref_size = GROUPED_CONV_4_DST_SIZE;
+
+    input_dims.n = GROUPED_CONV_4_INPUT_BATCHES;
+    input_dims.w = GROUPED_CONV_4_INPUT_W;
+    input_dims.h = GROUPED_CONV_4_INPUT_H;
+    input_dims.c = GROUPED_CONV_4_IN_CH;
+    filter_dims.w = GROUPED_CONV_4_FILTER_X;
+    filter_dims.h = GROUPED_CONV_4_FILTER_Y;
+    filter_dims.c = GROUPED_CONV_4_FILTER_CH;
+    output_dims.w = GROUPED_CONV_4_OUTPUT_W;
+    output_dims.h = GROUPED_CONV_4_OUTPUT_H;
+    output_dims.c = GROUPED_CONV_4_OUT_CH;
+
+    conv_params.padding.w = GROUPED_CONV_4_PAD_X;
+    conv_params.padding.h = GROUPED_CONV_4_PAD_Y;
+    conv_params.stride.w = GROUPED_CONV_4_STRIDE_X;
+    conv_params.stride.h = GROUPED_CONV_4_STRIDE_Y;
+    conv_params.dilation.w = GROUPED_CONV_4_DILATION_X;
+    conv_params.dilation.h = GROUPED_CONV_4_DILATION_Y;
+
+    conv_params.input_offset = GROUPED_CONV_4_INPUT_OFFSET;
+    conv_params.output_offset = GROUPED_CONV_4_OUTPUT_OFFSET;
+    conv_params.activation.min = GROUPED_CONV_4_OUT_ACTIVATION_MIN;
+    conv_params.activation.max = GROUPED_CONV_4_OUT_ACTIVATION_MAX;
+    quant_params.multiplier = (int32_t *)grouped_conv_4_output_mult;
+    quant_params.shift = (int32_t *)grouped_conv_4_output_shift;
+
+    int32_t buf_size = arm_convolve_s8_get_buffer_size(&input_dims, &filter_dims);
+    ctx.buf = malloc(buf_size);
+    ctx.size = 0;
+
+    arm_cmsis_nn_status result = arm_convolve_s8(&ctx,
+                                                 &conv_params,
+                                                 &quant_params,
+                                                 &input_dims,
+                                                 input_data,
+                                                 &filter_dims,
+                                                 kernel_data,
+                                                 &bias_dims,
+                                                 bias_data,
+                                                 &output_dims,
+                                                 output);
+
+    if (ctx.buf)
+    {
+        // The caller is responsible to clear the scratch buffers for security reasons if applicable.
+        memset(ctx.buf, 0, buf_size);
+        free(ctx.buf);
+    }
+    TEST_ASSERT_EQUAL(expected, result);
+    TEST_ASSERT_TRUE(validate(output, output_ref, output_ref_size));
+    memset(output, 0, sizeof(output));
+}
\ No newline at end of file
diff --git a/Tests/UnitTest/conv_settings.py b/Tests/UnitTest/conv_settings.py
index 9b41a90d..f16c688b 100644
--- a/Tests/UnitTest/conv_settings.py
+++ b/Tests/UnitTest/conv_settings.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2010-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 #
 # SPDX-License-Identifier: Apache-2.0
 #
@@ -38,6 +38,7 @@ def __init__(self,
                  w_y=3,
                  stride_x=2,
                  stride_y=2,
+                 groups=1,
                  pad=True,
                  randmin=TestSettings.INT8_MIN,
                  randmax=TestSettings.INT8_MAX,
@@ -84,11 +85,23 @@ def __init__(self,
                          int4_weights=int4_weights)
 
         self.scaling_factors = []
+        self.groups = groups
 
         if self.test_type == 'depthwise_conv':
             self.channel_multiplier = self.output_ch // self.input_ch
             if self.output_ch % self.input_ch != 0:
                 raise RuntimeError("out channel ({}) is not multiple of in channel ({})".format(out_ch, in_ch))
+            if groups != 1:
+                raise RuntimeError("ERROR: Groups cannot be used for depthwise convolution")
+
+        self.filter_ch = in_ch // groups
+        if in_ch % groups != 0:
+            print(in_ch)
+            print(groups)
+            raise RuntimeError("ERROR: Number of input channels must be an even multiple of groups")
+        if out_ch % groups != 0:
+            raise RuntimeError("ERROR: Number of output channels must be an even multiple of groups")
+
         else:
             self.channel_multiplier = 0
 
@@ -113,6 +126,8 @@ def write_c_config_header(self) -> None:
             f.write("#define {}_OUTPUT_OFFSET {}\n".format(prefix, self.output_zero_point))
             f.write("#define {}_DILATION_X {}\n".format(prefix, self.dilation_x))
             f.write("#define {}_DILATION_Y {}\n".format(prefix, self.dilation_y))
+            if self.groups != 1:
+                f.write("#define {}_FILTER_CH {}\n".format(prefix, self.filter_ch))
             if self.test_type == 'transpose_conv':
                 f.write("#define {}_PAD_X_WITH_OFFSET {}\n".format(prefix, self.pad_x_with_offset))
                 f.write("#define {}_PAD_Y_WITH_OFFSET {}\n".format(prefix, self.pad_y_with_offset))
@@ -279,7 +294,7 @@ def generate_data(self, input_data=None, weights=None, biases=None) -> None:
             if self.test_type == 'transpose_conv':
                 weight_shape = [self.filter_y, self.filter_x, out_channel, self.input_ch]
             else:
-                weight_shape = [self.filter_y, self.filter_x, self.input_ch, out_channel]
+                weight_shape = [self.filter_y, self.filter_x, self.filter_ch, out_channel]
 
             if weights is not None:
                 weights = tf.reshape(weights, weight_shape)
@@ -301,7 +316,8 @@ def generate_data(self, input_data=None, weights=None, biases=None) -> None:
                                                     strides=(self.stride_y, self.stride_x),
                                                     padding=self.padding,
                                                     input_shape=input_shape[1:],
-                                                    dilation_rate=(self.dilation_y, self.dilation_x))
+                                                    dilation_rate=(self.dilation_y, self.dilation_x),
+                                                groups=self.groups)
                 model.add(conv_layer)
                 conv_layer.set_weights([weights, biases])
             elif self.test_type == 'depthwise_conv':
diff --git a/Tests/UnitTest/generate_test_data.py b/Tests/UnitTest/generate_test_data.py
index 9f8ba15a..b82c53d7 100755
--- a/Tests/UnitTest/generate_test_data.py
+++ b/Tests/UnitTest/generate_test_data.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 #
-# SPDX-FileCopyrightText: Copyright 2010-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
 #
 # SPDX-License-Identifier: Apache-2.0
 #
@@ -642,7 +642,86 @@ def load_testdata_sets(regenerate_input, regenerate_weights, regenerate_biases,
                                           int16xint8=True,
                                           dilation_x=2,
                                           interpreter=interpreter)
-    dataset = 'basic_int4'
+    dataset = 'grouped_conv_1'
+    testdata_sets[dataset] = ConvSettings(dataset,
+                                          type_of_test,
+                                          regenerate_weights,
+                                          regenerate_input,
+                                          regenerate_biases,
+                                          schema_file,
+                                          in_ch=2,
+                                          out_ch=6,
+                                          groups=2,
+                                          x_in=5,
+                                          y_in=5,
+                                          w_x=2,
+                                          w_y=2,
+                                          generate_bias=False,
+                                          stride_x=1,
+                                          stride_y=1,
+                                          pad=False,
+                                          batches=2,
+                                          interpreter=interpreter)
+    dataset = 'grouped_conv_2'
+    testdata_sets[dataset] = ConvSettings(dataset,
+                                          type_of_test,
+                                          regenerate_weights,
+                                          regenerate_input,
+                                          regenerate_biases,
+                                          schema_file,
+                                          in_ch=4,
+                                          out_ch=2,
+                                          groups=2,
+                                          x_in=7,
+                                          y_in=3,
+                                          w_x=1,
+                                          w_y=2,
+                                          generate_bias=True,
+                                          stride_x=1,
+                                          stride_y=1,
+                                          pad=False,
+                                          interpreter=interpreter)
+    dataset = 'grouped_conv_3'
+    testdata_sets[dataset] = ConvSettings(dataset,
+                                          type_of_test,
+                                          regenerate_weights,
+                                          regenerate_input,
+                                          regenerate_biases,
+                                          schema_file,
+                                          in_ch=2,
+                                          out_ch=4,
+                                          groups=2,
+                                          x_in=3,
+                                          y_in=2,
+                                          w_x=3,
+                                          w_y=2,
+                                          generate_bias=True,
+                                          stride_x=2,
+                                          stride_y=2,
+                                          pad=True,
+                                          batches=2,
+                                          interpreter=interpreter)
+    dataset = 'grouped_conv_4'
+    testdata_sets[dataset] = ConvSettings(dataset,
+                                          type_of_test,
+                                          regenerate_weights,
+                                          regenerate_input,
+                                          regenerate_biases,
+                                          schema_file,
+                                          in_ch=3,
+                                          out_ch=6,
+                                          groups=3,
+                                          x_in=9,
+                                          y_in=9,
+                                          w_x=2,
+                                          w_y=2,
+                                          generate_bias=True,
+                                          stride_x=1,
+                                          stride_y=1,
+                                          dilation_x=3,
+                                          dilation_y=3,
+                                          pad=True,
+                                          interpreter=interpreter)    dataset = 'basic_int4'
     testdata_sets[dataset] = ConvSettings(dataset,
                                           type_of_test,
                                           regenerate_weights,