Skip to content

Commit

Permalink
Transposed convolution improvements
Browse files Browse the repository at this point in the history
- New more efficient algorithm for strides <= 2 and large input channels
- Minimizes scratch buffer needed for original algorithm

Change-Id: I79cb20bd7298cbb3b9d2ed27ab1a954e1f4b906c
  • Loading branch information
AdrianLundell committed Nov 8, 2024
1 parent c47e857 commit b6178f8
Show file tree
Hide file tree
Showing 84 changed files with 2,884 additions and 1,284 deletions.
2 changes: 2 additions & 0 deletions ARM.CMSIS-NN.pdsc
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@
<file category="source" name="Source/ConvolutionFunctions/arm_nn_depthwise_conv_s8_core.c"/>
<file category="source" name="Source/ConvolutionFunctions/arm_transpose_conv_get_buffer_sizes_s8.c"/>
<file category="source" name="Source/ConvolutionFunctions/arm_transpose_conv_s8.c"/>
<file category="source" name="Source/ConvolutionFunctions/arm_transpose_conv_wrapper_s8.c"/>
<file category="source" name="Source/ConcatenationFunctions/arm_concatenation_s8_x.c"/>
<file category="source" name="Source/ConcatenationFunctions/arm_concatenation_s8_w.c"/>
<file category="source" name="Source/ConcatenationFunctions/arm_concatenation_s8_y.c"/>
Expand Down Expand Up @@ -121,6 +122,7 @@
<file category="source" name="Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_padded_s8.c"/>
<file category="source" name="Source/NNSupportFunctions/arm_nn_mat_mul_core_4x_s8.c"/>
<file category="source" name="Source/NNSupportFunctions/arm_nntables.c"/>
<file category="source" name="Source/NNSupportFunctions/arm_nn_transpose_conv_row_s8_s32.c"/>
<file category="source" name="Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_s4.c"/>
<file category="source" name="Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_s8.c"/>
<file category="source" name="Source/NNSupportFunctions/arm_nn_mat_mul_core_1x_s4.c"/>
Expand Down
83 changes: 75 additions & 8 deletions Include/arm_nnfunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
* Title: arm_nnfunctions.h
* Description: Public header file for CMSIS NN Library
*
* $Date: 23 October 2024
* $Revision: V.17.3.0
* $Date: 04 November 2024
* $Revision: V.18.0.0
*
* Target : Arm(R) M-Profile Architecture
* -------------------------------------------------------------------- */
Expand Down Expand Up @@ -415,6 +415,8 @@ arm_cmsis_nn_status arm_convolve_even_s4(const cmsis_nn_context *ctx,
* @param[in] filter_data Filter data pointer. Data type: int8
* @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
* @param[in] bias_data Optional bias data pointer. Data type: int32
* @param[in] upscale_dims Inserts zeroes to upscale the input in h/w dimensions if set to 2. This is used for
* tranposed convolution.
* @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
* @param[out] output_data Output data pointer. Data type: int8
*
Expand All @@ -436,6 +438,7 @@ arm_cmsis_nn_status arm_convolve_s8(const cmsis_nn_context *ctx,
const int8_t *filter_data,
const cmsis_nn_dims *bias_dims,
const int32_t *bias_data,
const cmsis_nn_dims *upscale_dims,
const cmsis_nn_dims *output_dims,
int8_t *output_data);

Expand All @@ -461,6 +464,54 @@ int32_t arm_convolve_s4_get_buffer_size(const cmsis_nn_dims *input_dims, const c
*/
int32_t arm_convolve_s8_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims);

/**
* @brief Wrapper to select optimal transposed convolution algorithm depending on parameters.
* @param[in, out] ctx Function context that contains the additional buffer if required by the
* function.
* arm_transpose_conv_s8_get_buffer_size will return the buffer_size if required.
* The caller is expected to clear the buffer, if applicable, for security
reasons.
* @param[in, out] output_ctx Temporary scratch buffer.
* The size required size is: output width * output height * output channel * 4
* The caller is expected to clear the buffer, if applicable, for security
* reasons.
* @param[in] transpose_conv_params Convolution parameters (e.g. strides, dilations, pads,...).
* Range of transpose_conv_params->input_offset : [-127, 128]
* Range of transpose_conv_params->output_offset : [-128, 127]
* @param[in] quant_params Per-channel quantization info.
* It contains the multiplier and shift values to be applied to each out channel.
* @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
* @param[in] input_data Input (activation) data pointer. Data type: int8
* @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the
* spatial filter dimensions
* @param[in] filter_data Filter data pointer. Data type: int8
* @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
* @param[in] bias_data Optional bias data pointer. Data type: int32
* @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
* @param[out] output_data Output data pointer. Data type: int8
* @return The function returns either
* <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or,
* <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion.
*
* @details
* 1. Supported framework: TensorFlow Lite micro
* 2. Additional memory is required for optimization. Refer to arguments 'ctx' and 'output_ctx' for details.
*
*/
arm_cmsis_nn_status arm_transpose_conv_wrapper_s8(const cmsis_nn_context *ctx,
const cmsis_nn_context *output_ctx,
const cmsis_nn_transpose_conv_params *transpose_conv_params,
const cmsis_nn_per_channel_quant_params *quant_params,
const cmsis_nn_dims *input_dims,
const int8_t *input_data,
const cmsis_nn_dims *filter_dims,
const int8_t *filter_data,
const cmsis_nn_dims *bias_dims,
const int32_t *bias_data,
const cmsis_nn_dims *output_dims,
int8_t *output_data);

/**
* @brief Basic s8 transpose convolution function
* @param[in, out] ctx Function context that contains the additional buffer if required by the
Expand Down Expand Up @@ -510,19 +561,35 @@ arm_cmsis_nn_status arm_transpose_conv_s8(const cmsis_nn_context *ctx,
int8_t *output_data);

/**
* @brief Get the required buffer size for s8 transpose conv function
* @brief Get the required buffer size for ctx in s8 transpose conv function
*
* @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
* @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK
* are the spatial filter dimensions
* @param[in] out_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
* @param[in9 transposed_conv_params Transposed convolution parameters
* @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
* @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK
* are the spatial filter dimensions
* @param[in] out_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
* @return The function returns required buffer size(bytes)
*
*/
int32_t arm_transpose_conv_s8_get_buffer_size(const cmsis_nn_dims *input_dims,
int32_t arm_transpose_conv_s8_get_buffer_size(const cmsis_nn_transpose_conv_params *transposed_conv_params,
const cmsis_nn_dims *input_dims,
const cmsis_nn_dims *filter_dims,
const cmsis_nn_dims *out_dims);

/**
* @brief Get the required buffer size for output_ctx in s8 transpose conv function
*
* @param[in9 transposed_conv_params Transposed convolution parameters
* @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
* @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK
* are the spatial filter dimensions
* @return The function returns required buffer size(bytes)
*
*/
int32_t arm_transpose_conv_s8_get_reverse_conv_buffer_size(const cmsis_nn_transpose_conv_params *transposed_conv_params,
const cmsis_nn_dims *input_dims,
const cmsis_nn_dims *filter_dims);

/**
* @brief Get size of additional buffer required by arm_transpose_conv_s8() for processors with DSP extension.
* Refer to arm_transpose_conv_s8_get_buffer_size() for function argument details.
Expand Down
49 changes: 47 additions & 2 deletions Include/arm_nnsupportfunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
* Title: arm_nnsupportfunctions.h
* Description: Public header file of support functions for CMSIS NN Library
*
* $Date: 08 November 2024
* $Revision: V.22.6.1
* $Date: 08 Nov 2024
* $Revision: V.22.7.0
*
* Target : Arm(R) M-Profile Architecture
* -------------------------------------------------------------------- */
Expand Down Expand Up @@ -72,6 +72,10 @@ extern "C" {
// to not loose precision.
#define MAX_COL_COUNT (512)

// CMSIS-NN has two implementations of the transpose conv operator, selected depending on the number of input
// channels. This is based on heuristics and may be finetuned depending on other parameters of the operator
#define REVERSE_TCOL_EFFICIENT_THRESHOLD (16)

// Threshold for number of output channels that decide whether to convert a depthwise conv to a
// regular conv operation when number of input channels is one.
// Only applicable for processors with MVE extension.
Expand Down Expand Up @@ -1014,6 +1018,47 @@ int16_t *arm_nn_depthwise_conv_nt_t_s16(const int16_t *lhs,
const int64_t *const output_bias,
int16_t *out);

/**
* @brief Row of s8 scalars multiplicated with a s8 matrix ad accumulated into a s32 rolling scratch buffer.
* Helpfunction for transposed convolution.
*
* @param[in] lhs Input left-hand side scalars
* @param[in] rhs Input right-hand side matrix
* @param[out] output_start Output buffer start
* @param[in] output_index Output buffer current index
* @param[in] output_max Output buffer size
* @param[in] rhs_rows Number of rows in rhs matrix
* @param[in] rhs_cols Number of columns in rhs matrix
* @param[in] input_channels Number of input channels
* @param[in] output_channels Number of output channels
* @param[in] lhs_offset Offset added to lhs before multiplication
* @param[in] row_offset Address offset between each row of data output
* @param[in] input_x Length of lhs scalar row.
* @param[in] stride_x Address offset between each scalar-matrix multiplication result.
* @param[in] skip_row_top Skip rows on top of the filter, used for padding.
* @param[in] skip_row_bottom Skip rows in the bottom of the filter, used for padding.
*
* @return The function returns ARM_CMSIS_NN_SUCCESS
*
* @note Rolling buffer refers to how the function wraps around the scratch buffer, e.g. it starts writing at
* [output_start + output_index], writes to [output_start + output_max] and then continues at [output_start] again.
*/
arm_cmsis_nn_status arm_nn_transpose_conv_row_s8_s32(const int8_t *lhs,
const int8_t *rhs,
int32_t *output_start,
const int32_t output_index,
const int32_t output_max,
const int32_t rhs_rows,
const int32_t rhs_cols,
const int32_t input_channels,
const int32_t output_channels,
const int32_t lhs_offset,
const int32_t row_offset,
const int32_t input_x,
const int32_t stride_x,
const int32_t skip_row_top,
const int32_t skip_row_bottom);

/**
@brief Read 2 s16 elements and post increment pointer.
@param[in] in_q15 Pointer to pointer that holds address of input.
Expand Down
6 changes: 4 additions & 2 deletions Source/ConvolutionFunctions/arm_convolve_1_x_n_s8.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
* Title: arm_convolve_1_x_n_s8.c
* Description: s8 version of 1xN convolution using symmetric quantization.
*
* $Date: 19 March 2024
* $Revision: V.3.6.0
* $Date: 04 November 2024
* $Revision: V.3.6.1
*
* Target : Arm(R) M-Profile Architecture
*
Expand Down Expand Up @@ -107,6 +107,7 @@ arm_cmsis_nn_status arm_convolve_1_x_n_s8(const cmsis_nn_context *ctx,
filter_data,
bias_dims,
bias_data,
NULL,
output_dims,
output_data);
}
Expand Down Expand Up @@ -219,6 +220,7 @@ arm_cmsis_nn_status arm_convolve_1_x_n_s8(const cmsis_nn_context *ctx,
filter_data,
bias_dims,
bias_data,
NULL,
output_dims,
output_data);

Expand Down
Loading

0 comments on commit b6178f8

Please sign in to comment.