From 5072f1dab5c1d6ead6cb749d23d24fad2bef89e0 Mon Sep 17 00:00:00 2001 From: Adrian Lundell Date: Tue, 19 Mar 2024 09:33:09 +0100 Subject: [PATCH] Add support for int16 unidirectional lstm Change-Id: I1ca5cf4f0778a119bf0fd2fa6e1daadf16d53e83 --- ARM.CMSIS-NN.pdsc | 6 + Include/arm_nn_types.h | 14 +- Include/arm_nnfunctions.h | 87 ++- Include/arm_nnsupportfunctions.h | 109 ++- .../arm_elementwise_mul_s16_batch_offset.c | 166 +++++ Source/FullyConnectedFunctions/CMakeLists.txt | 6 +- .../arm_vector_sum_s8_s64.c | 156 +++++ Source/LSTMFunctions/CMakeLists.txt | 3 +- .../arm_lstm_unidirectional_s16.c | 95 +++ .../arm_nn_lstm_calculate_gate_s16.c | 96 +++ .../NNSupportFunctions/arm_nn_lstm_step_s16.c | 112 ++++ .../arm_nn_vec_mat_mul_result_acc_s16.c | 362 ++++++++++ Tests/UnitTest/CMakeLists.txt | 1 + .../{test_arm_lstm_s16.json => lstm_s16.json} | 2 +- .../JsonTemplates/lstm_s16_tm.json | 620 ++++++++++++++++++ .../UnitTest/RefactoredTestGen/Lib/op_lstm.py | 14 +- .../RefactoredTestGen/Lib/op_utils.py | 2 +- Tests/UnitTest/RefactoredTestGen/Lib/test.py | 35 +- .../RefactoredTestGen/Lib/test_plan.py | 21 +- .../RefactoredTestGen/generate_test_data.py | 7 +- .../UnitTest/RefactoredTestGen/test_plan.json | 6 +- .../TestData/lstm_1_s16/cell_gate_bias.h | 7 + .../lstm_1_s16/cell_gate_hidden_weights.h | 12 + .../lstm_1_s16/cell_gate_input_weights.h | 19 + .../TestData/lstm_1_s16/config_data.h | 34 + .../TestData/lstm_1_s16/forget_gate_bias.h | 6 + .../lstm_1_s16/forget_gate_hidden_weights.h | 12 + .../lstm_1_s16/forget_gate_input_weights.h | 19 + .../TestCases/TestData/lstm_1_s16/input.h | 22 + .../TestData/lstm_1_s16/input_gate_bias.h | 6 + .../lstm_1_s16/input_gate_hidden_weights.h | 12 + .../lstm_1_s16/input_gate_input_weights.h | 19 + .../TestCases/TestData/lstm_1_s16/output.h | 13 + .../TestData/lstm_1_s16/output_gate_bias.h | 7 + .../lstm_1_s16/output_gate_hidden_weights.h | 12 + .../lstm_1_s16/output_gate_input_weights.h | 18 + .../TestCases/TestData/lstm_1_s16/test_data.h | 15 + .../TestData/lstm_2_s16/cell_gate_bias.h | 6 + .../lstm_2_s16/cell_gate_hidden_weights.h | 9 + .../lstm_2_s16/cell_gate_input_weights.h | 8 + .../TestData/lstm_2_s16/config_data.h | 34 + .../TestData/lstm_2_s16/forget_gate_bias.h | 6 + .../lstm_2_s16/forget_gate_hidden_weights.h | 9 + .../lstm_2_s16/forget_gate_input_weights.h | 8 + .../TestCases/TestData/lstm_2_s16/input.h | 10 + .../TestData/lstm_2_s16/input_gate_bias.h | 6 + .../lstm_2_s16/input_gate_hidden_weights.h | 9 + .../lstm_2_s16/input_gate_input_weights.h | 8 + .../TestCases/TestData/lstm_2_s16/output.h | 10 + .../TestData/lstm_2_s16/output_gate_bias.h | 6 + .../lstm_2_s16/output_gate_hidden_weights.h | 9 + .../lstm_2_s16/output_gate_input_weights.h | 8 + .../TestCases/TestData/lstm_2_s16/test_data.h | 15 + .../lstm_one_time_step_s16/cell_gate_bias.h | 6 + .../cell_gate_hidden_weights.h | 6 + .../cell_gate_input_weights.h | 9 + .../lstm_one_time_step_s16/config_data.h | 34 + .../lstm_one_time_step_s16/forget_gate_bias.h | 6 + .../forget_gate_hidden_weights.h | 6 + .../forget_gate_input_weights.h | 9 + .../TestData/lstm_one_time_step_s16/input.h | 11 + .../lstm_one_time_step_s16/input_gate_bias.h | 6 + .../input_gate_hidden_weights.h | 6 + .../input_gate_input_weights.h | 9 + .../TestData/lstm_one_time_step_s16/output.h | 6 + .../lstm_one_time_step_s16/output_gate_bias.h | 6 + .../output_gate_hidden_weights.h | 6 + .../output_gate_input_weights.h | 9 + .../lstm_one_time_step_s16/test_data.h | 15 + .../CMakeLists.txt | 23 + .../unity_test_arm_lstm_unidirectional_s16.c | 48 ++ .../test_arm_lstm_unidirectional_s16.c | 475 ++++++++++++++ 72 files changed, 2945 insertions(+), 65 deletions(-) create mode 100644 Source/BasicMathFunctions/arm_elementwise_mul_s16_batch_offset.c create mode 100644 Source/FullyConnectedFunctions/arm_vector_sum_s8_s64.c create mode 100644 Source/LSTMFunctions/arm_lstm_unidirectional_s16.c create mode 100644 Source/NNSupportFunctions/arm_nn_lstm_calculate_gate_s16.c create mode 100644 Source/NNSupportFunctions/arm_nn_lstm_step_s16.c create mode 100644 Source/NNSupportFunctions/arm_nn_vec_mat_mul_result_acc_s16.c rename Tests/UnitTest/RefactoredTestGen/JsonTemplates/{test_arm_lstm_s16.json => lstm_s16.json} (99%) create mode 100644 Tests/UnitTest/RefactoredTestGen/JsonTemplates/lstm_s16_tm.json create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_1_s16/cell_gate_bias.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_1_s16/cell_gate_hidden_weights.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_1_s16/cell_gate_input_weights.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_1_s16/config_data.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_1_s16/forget_gate_bias.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_1_s16/forget_gate_hidden_weights.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_1_s16/forget_gate_input_weights.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_1_s16/input.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_1_s16/input_gate_bias.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_1_s16/input_gate_hidden_weights.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_1_s16/input_gate_input_weights.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_1_s16/output.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_1_s16/output_gate_bias.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_1_s16/output_gate_hidden_weights.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_1_s16/output_gate_input_weights.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_1_s16/test_data.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_2_s16/cell_gate_bias.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_2_s16/cell_gate_hidden_weights.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_2_s16/cell_gate_input_weights.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_2_s16/config_data.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_2_s16/forget_gate_bias.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_2_s16/forget_gate_hidden_weights.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_2_s16/forget_gate_input_weights.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_2_s16/input.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_2_s16/input_gate_bias.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_2_s16/input_gate_hidden_weights.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_2_s16/input_gate_input_weights.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_2_s16/output.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_2_s16/output_gate_bias.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_2_s16/output_gate_hidden_weights.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_2_s16/output_gate_input_weights.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_2_s16/test_data.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/cell_gate_bias.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/cell_gate_hidden_weights.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/cell_gate_input_weights.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/config_data.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/forget_gate_bias.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/forget_gate_hidden_weights.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/forget_gate_input_weights.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/input.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/input_gate_bias.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/input_gate_hidden_weights.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/input_gate_input_weights.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/output.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/output_gate_bias.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/output_gate_hidden_weights.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/output_gate_input_weights.h create mode 100644 Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/test_data.h create mode 100644 Tests/UnitTest/TestCases/test_arm_lstm_unidirectional_s16/CMakeLists.txt create mode 100644 Tests/UnitTest/TestCases/test_arm_lstm_unidirectional_s16/Unity/unity_test_arm_lstm_unidirectional_s16.c create mode 100644 Tests/UnitTest/TestCases/test_arm_lstm_unidirectional_s16/test_arm_lstm_unidirectional_s16.c diff --git a/ARM.CMSIS-NN.pdsc b/ARM.CMSIS-NN.pdsc index 5dbf5495..c8246ea4 100644 --- a/ARM.CMSIS-NN.pdsc +++ b/ARM.CMSIS-NN.pdsc @@ -84,6 +84,7 @@ + @@ -110,16 +111,21 @@ + + + + + diff --git a/Include/arm_nn_types.h b/Include/arm_nn_types.h index c567f0c1..affc1d5f 100644 --- a/Include/arm_nn_types.h +++ b/Include/arm_nn_types.h @@ -22,8 +22,8 @@ * Description: Public header file to contain the CMSIS-NN structs for the * TensorFlowLite micro compliant functions * - * $Date: 19 January 2024 - * $Revision: V.3.0.0 + * $Date: 26 March 2024 + * $Revision: V.3.1.0 * * Target : Arm(R) M-Profile Architecture * -------------------------------------------------------------------- */ @@ -191,15 +191,15 @@ typedef struct { int32_t input_multiplier; int32_t input_shift; - const int8_t *input_weights; - const int32_t *input_effective_bias; /**< Bias added with precomputed kernel_sum * lhs_offset*/ + const void *input_weights; + const void *input_effective_bias; /**< Bias added with precomputed kernel_sum * lhs_offset*/ int32_t hidden_multiplier; int32_t hidden_shift; - const int8_t *hidden_weights; - const int32_t *hidden_effective_bias; /**< Precomputed kernel_sum * lhs_offset*/ + const void *hidden_weights; + const void *hidden_effective_bias; /**< Precomputed kernel_sum * lhs_offset*/ - const int32_t *bias; + const void *bias; arm_nn_activation_type activation_type; } cmsis_nn_lstm_gate; diff --git a/Include/arm_nnfunctions.h b/Include/arm_nnfunctions.h index e4604c17..b381a7fe 100644 --- a/Include/arm_nnfunctions.h +++ b/Include/arm_nnfunctions.h @@ -21,8 +21,8 @@ * Title: arm_nnfunctions.h * Description: Public header file for CMSIS NN Library * - * $Date: 11 March 2024 - * $Revision: V.15.0.0 + * $Date: 20 February 2024 + * $Revision: V.15.1.0 * * Target : Arm(R) M-Profile Architecture @@ -1475,7 +1475,7 @@ arm_cmsis_nn_status arm_fully_connected_s8(const cmsis_nn_context *ctx, int8_t *output_data); /** - * @brief Calculate the sum of each row in vector_data, multiply by lhs_offset and optionally add bias_data. + * @brief Calculate the sum of each row in vector_data, multiply by lhs_offset and optionally add s32 bias_data. * @param[in, out] vector_sum_buf Buffer for vector sums * @param[in] vector_cols Number of vector columns * @param[in] vector_rows Number of vector rows @@ -1492,6 +1492,24 @@ arm_cmsis_nn_status arm_vector_sum_s8(int32_t *vector_sum_buf, const int32_t lhs_offset, const int32_t *bias_data); +/** + * @brief Calculate the sum of each row in vector_data, multiply by lhs_offset and optionally add s64 bias_data. + * @param[in, out] vector_sum_buf Buffer for vector sums + * @param[in] vector_cols Number of vector columns + * @param[in] vector_rows Number of vector rows + * @param[in] vector_data Vector of weigths data + * @param[in] lhs_offset Constant multiplied with each sum + * @param[in] bias_data Vector of bias data, added to each sum. + * @return The function returns + * ARM_CMSIS_NN_SUCCESS - Successful operation + */ +arm_cmsis_nn_status arm_vector_sum_s8_s64(int64_t *vector_sum_buf, + const int32_t vector_cols, + const int32_t vector_rows, + const int8_t *vector_data, + const int32_t lhs_offset, + const int64_t *bias_data); + /** * @brief Get size of additional buffer required by arm_fully_connected_s8(). * See also arm_vector_sum_s8, which is required if buffer size is > 0. @@ -2401,13 +2419,41 @@ arm_cmsis_nn_status arm_svdf_state_s16_s8(const cmsis_nn_context *input_ctx, const cmsis_nn_dims *output_dims, int8_t *output_data); +/** + * @brief Get size of additional buffer required by arm_svdf_s8(). + * @param[in] filter_dims dimension of filter + * @return The function returns required buffer size in bytes + * + */ +int32_t arm_svdf_s8_get_buffer_size(const cmsis_nn_dims *filter_dims); + +/** + * @brief Get size of additional buffer required by arm_svdf_s8() for processors with DSP extension. + * Refer to arm_svdf_s8_get_buffer_size() for function argument details. + * + * @note Intended for compilation on Host. If compiling for an Arm target, use + * arm_svdf_s8_get_buffer_size(). + * + */ +int32_t arm_svdf_s8_get_buffer_size_dsp(const cmsis_nn_dims *filter_dims); + +/** + * @brief Get size of additional buffer required by arm_svdf_s8() for Arm(R) Helium Architecture case. + * Refer to arm_svdf_s8_get_buffer_size() for function argument details. + * + * @note Intended for compilation on Host. If compiling for an Arm target, use + * arm_svdf_s8_get_buffer_size(). + * + */ +int32_t arm_svdf_s8_get_buffer_size_mve(const cmsis_nn_dims *filter_dims); + /** * @defgroup LSTM LSTM Layer Functions * */ /** - * @brief LSTM unidirectional function with 8 bit input and output and 16 bit gate output. + * @brief LSTM unidirectional function with 8 bit input and output and 16 bit gate output, 32 bit bias. * * @param[in] input Pointer to input data * @param[out] output Pointer to output data @@ -2428,32 +2474,25 @@ arm_cmsis_nn_status arm_lstm_unidirectional_s8(const int8_t *input, cmsis_nn_lstm_context *buffers); /** - * @brief Get size of additional buffer required by arm_svdf_s8(). - * @param[in] filter_dims dimension of filter - * @return The function returns required buffer size in bytes + * @brief LSTM unidirectional function with 16 bit input and output and 16 bit gate output, 64 bit bias. * - */ -int32_t arm_svdf_s8_get_buffer_size(const cmsis_nn_dims *filter_dims); - -/** - * @brief Get size of additional buffer required by arm_svdf_s8() for processors with DSP extension. - * Refer to arm_svdf_s8_get_buffer_size() for function argument details. + * @param[in] input Pointer to input data + * @param[out] output Pointer to output data + * @param[in] params Struct containing all information about the lstm operator, see arm_nn_types. + * @param[in] buffers Struct containing pointers to all temporary scratch buffers needed for the + * lstm operator, see arm_nn_types. * - * @note Intended for compilation on Host. If compiling for an Arm target, use - * arm_svdf_s8_get_buffer_size(). * - */ -int32_t arm_svdf_s8_get_buffer_size_dsp(const cmsis_nn_dims *filter_dims); - -/** - * @brief Get size of additional buffer required by arm_svdf_s8() for Arm(R) Helium Architecture case. - * Refer to arm_svdf_s8_get_buffer_size() for function argument details. + * @return The function returns ARM_CMSIS_NN_SUCCESS * - * @note Intended for compilation on Host. If compiling for an Arm target, use - * arm_svdf_s8_get_buffer_size(). + * @details + * 1. Supported framework: TensorFlow Lite Micro * */ -int32_t arm_svdf_s8_get_buffer_size_mve(const cmsis_nn_dims *filter_dims); +arm_cmsis_nn_status arm_lstm_unidirectional_s16(const int16_t *input, + int16_t *output, + const cmsis_nn_lstm_params *params, + cmsis_nn_lstm_context *buffers); #ifdef __cplusplus } diff --git a/Include/arm_nnsupportfunctions.h b/Include/arm_nnsupportfunctions.h index 70c57022..26d096e9 100644 --- a/Include/arm_nnsupportfunctions.h +++ b/Include/arm_nnsupportfunctions.h @@ -21,8 +21,8 @@ * Title: arm_nnsupportfunctions.h * Description: Public header file of support functions for CMSIS NN Library * - * $Date: 22 March 2024 - * $Revision: V.20.0.0 + * $Date: 14 February 2024 + * $Revision: V.20.1.0 * * Target : Arm(R) M-Profile Architecture * -------------------------------------------------------------------- */ @@ -1538,9 +1538,9 @@ __STATIC_FORCEINLINE void arm_nn_write_s8x2_ia(int8_t **dst, int16_t src) // Support functions for LSTM /** - * @brief Update LSTM function for an iteration step + * @brief Update LSTM function for an iteration step using s8 input and output, and s16 internally. * - * @param[in] data_in Data input pointervoid + * @param[in] data_in Data input pointer * @param[in] hidden_in Hidden state/ recurrent input pointer * @param[out] hidden_out Hidden state/ recurrent output pointer * @param[in] params Struct containg all information about the lstm operator, see @@ -1561,6 +1561,30 @@ arm_cmsis_nn_status arm_nn_lstm_step_s8(const int8_t *data_in, cmsis_nn_lstm_context *buffers, const int32_t batch_offset); +/** + * @brief Update LSTM function for an iteration step using s16 input and output, and s16 internally. + * + * @param[in] data_in Data input pointer + * @param[in] hidden_in Hidden state/ recurrent input pointer + * @param[out] hidden_out Hidden state/ recurrent output pointer + * @param[in] params Struct containg all information about the lstm operator, see + * arm_nn_types. + * @param[in] buffers Struct containg pointers to all temporary scratch buffers needed for the + * lstm operator, see arm_nn_types. + * @param[in] batch_offset Number of timesteps between consecutive batches. + * E.g for params->timing_major = true, all batches for t=0 are stored sequentially, so batch offset = 1. + * For params->time major = false, all time steps are stored continously before the next batch, so + * batch offset = params->time_steps. + * @return The function returns ARM_CMSIS_NN_SUCCESS + + */ +arm_cmsis_nn_status arm_nn_lstm_step_s16(const int16_t *data_in, + const int16_t *hidden_in, + int16_t *hidden_out, + const cmsis_nn_lstm_params *params, + cmsis_nn_lstm_context *buffers, + const int32_t batch_offset); + /** * @brief Updates a LSTM gate for an iteration step of LSTM function, int8x8_16 version. * @@ -1582,6 +1606,27 @@ arm_cmsis_nn_status arm_nn_lstm_calculate_gate_s8_s16(const int8_t *data_in, int16_t *output, const int32_t batch_offset); +/** + * @brief Updates a LSTM gate for an iteration step of LSTM function, int16x8_16 version. + * + * @param[in] data_in Data input pointer + * @param[in] hidden_in Hidden state/ recurrent input pointer + * @param[in] gate_data Struct containing all information about the gate caluclation, see + * arm_nn_types. + * @param[in] params Struct containing all information about the lstm_operation, see + * arm_nn_types + * @param[out] output Hidden state/ recurrent output pointer + * @param[in] batch_offset Number of timesteps between consecutive batches, see + * arm_nn_lstm_step_s16. + * @return The function returns ARM_CMSIS_NN_SUCCESS + */ +arm_cmsis_nn_status arm_nn_lstm_calculate_gate_s16(const int16_t *data_in, + const int16_t *hidden_in, + const cmsis_nn_lstm_gate *gate_data, + const cmsis_nn_lstm_params *params, + int16_t *output, + const int32_t batch_offset); + /** * @brief The result of the multiplication is accumulated to the passed result buffer. * Multiplies a matrix by a "batched" vector (i.e. a matrix with a batch dimension composed by input vectors independent @@ -1612,6 +1657,36 @@ arm_cmsis_nn_status arm_nn_vec_mat_mul_result_acc_s8_s16(const int8_t *lhs, const int32_t batches, const int32_t batch_offset); +/** + * @brief The result of the multiplication is accumulated to the passed result buffer. + * Multiplies a matrix by a "batched" vector (i.e. a matrix with a batch dimension composed by input vectors independent + * from each other). + * + * @param[in] lhs Batched vector + * @param[in] rhs Weights - input matrix (H(Rows)xW(Columns)) + * @param[in] effective_bias Bias + lhs_offset * kernel_sum term precalculated into a constant vector. + * @param[out] dst Output + * @param[in] dst_multiplier Multiplier for quantization + * @param[in] dst_shift Shift for quantization + * @param[in] rhs_cols Vector/matarix column length + * @param[in] rhs_rows Row count of matrix + * @param[in] batches Batch size + * @param[in] batch_offset Number of timesteps between consecutive batches in input, see arm_nn_lstm_step_s16. + Note that the output is always stored with sequential batches. + * @return The function returns ARM_CMSIS_NN_SUCCESS + + */ +arm_cmsis_nn_status arm_nn_vec_mat_mul_result_acc_s16(const int16_t *lhs, + const int8_t *rhs, + const int64_t *effective_bias, + int16_t *dst, + const int32_t dst_multiplier, + const int32_t dst_shift, + const int32_t rhs_cols, + const int32_t rhs_rows, + const int32_t batches, + const int32_t batch_offset); + /** * @brief s16 elementwise multiplication with s8 output * @param[in] input_1_vect pointer to input vector 1 @@ -1638,6 +1713,32 @@ arm_cmsis_nn_status arm_elementwise_mul_s16_s8(const int16_t *input_1_vect, const int32_t batch_size, const int32_t batch_offset); +/** + * @brief s16 elementwise multiplication with s16 output + * @param[in] input_1_vect pointer to input vector 1 + * @param[in] input_2_vect pointer to input vector 2 + * @param[in,out] output pointer to output vector + * @param[in] out_offset output offset + * @param[in] out_mult output multiplier + * @param[in] out_shift output shift + * @param[in] block_size number of samples per batch + * @param[in] batch_size number of samples per batch + * @param[in] batch_offset Number of timesteps between consecutive batches in output, see + * arm_nn_lstm_step_s16. Note that it is assumed that the input is stored with sequential batches. + * @return The function returns ARM_CMSIS_NN_SUCCESS + * + * @details Supported framework: TensorFlow Lite micro + */ +arm_cmsis_nn_status arm_elementwise_mul_s16_batch_offset(const int16_t *input_1_vect, + const int16_t *input_2_vect, + int16_t *output, + const int32_t out_offset, + const int32_t out_mult, + const int32_t out_shift, + const int32_t block_size, + const int32_t batch_size, + const int32_t batch_offset); + /** * @brief s16 elementwise multiplication. The result of the multiplication is accumulated to the passed result buffer. * @param[in] input_1_vect pointer to input vector 1 diff --git a/Source/BasicMathFunctions/arm_elementwise_mul_s16_batch_offset.c b/Source/BasicMathFunctions/arm_elementwise_mul_s16_batch_offset.c new file mode 100644 index 00000000..b3acc3eb --- /dev/null +++ b/Source/BasicMathFunctions/arm_elementwise_mul_s16_batch_offset.c @@ -0,0 +1,166 @@ +/* + * SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------- + * Project: CMSIS NN Library + * Title: arm_elementwise_mul_s16_batch_offset + * Description: Element wise multiplication + * + * $Date: 18 March 2024 + * $Revision: V.1.0.0 + * + * Target : Arm(R) M-Profile Architecture + * + * -------------------------------------------------------------------- */ + +#include "arm_nnfunctions.h" +#include "arm_nnsupportfunctions.h" + +/** + * @ingroup Public + */ + +/** + * @addtogroup groupElementwise + * @{ + */ + +/** + * @brief s16 element wise multiplication of batches of two vectors + * + * @note Refer header file for details. + * + */ +arm_cmsis_nn_status arm_elementwise_mul_s16_batch_offset(const int16_t *input_1_vect, + const int16_t *input_2_vect, + int16_t *output, + const int32_t out_offset, + const int32_t out_mult, + const int32_t out_shift, + const int32_t block_size, + const int32_t batch_size, + const int32_t batch_offset) +{ + + int32_t loop_count; + + for (int i = 0; i < batch_size; i++) + { + +#if defined(ARM_MATH_MVEI) + + const int16_t *input_1_ptr = input_1_vect; + const int16_t *input_2_ptr = input_2_vect; + int16_t *output_ptr = output; + + loop_count = block_size; + + while (loop_count > 0) + { + mve_pred16_t pred = vctp32q(loop_count); + + int32x4_t input_1 = vldrhq_z_s32(input_1_ptr, pred); + int32x4_t input_2 = vldrhq_z_s32(input_2_ptr, pred); + + int32x4_t res_0 = vmulq_s32(input_1, input_2); + + res_0 = arm_requantize_mve_32x4(res_0, vdupq_n_s32(out_mult), vdupq_n_s32(out_shift)); + res_0 = vaddq_n_s32(res_0, out_offset); + + res_0 = vmaxq_s32(res_0, vdupq_n_s32(NN_Q15_MIN)); + res_0 = vminq_s32(res_0, vdupq_n_s32(NN_Q15_MAX)); + + vstrhq_p_s32(output_ptr, res_0, pred); + input_1_ptr += 4; + input_2_ptr += 4; + + output_ptr += 4; + loop_count -= 4; + } + + input_1_vect += block_size; + input_2_vect += block_size; + output += block_size; + +#else + int32_t input_1; + int32_t input_2; + int32_t mul_res; + int32_t two_halfword_1, two_halfword_2; + int16_t mul_1, mul_2; + loop_count = block_size / 2; + + while (loop_count > 0) + { + two_halfword_1 = arm_nn_read_q15x2_ia(&input_1_vect); + two_halfword_2 = arm_nn_read_q15x2_ia(&input_2_vect); + + #if defined(ARM_MATH_DSP) + mul_res = SMULBB(two_halfword_1, two_halfword_2); + #else + input_1 = (int16_t)(two_halfword_1 & 0xFFFF); + input_2 = (int16_t)(two_halfword_2 & 0xFFFF); + mul_res = input_1 * input_2; + #endif + mul_res = arm_nn_requantize(mul_res, out_mult, out_shift) + out_offset; + mul_res = MAX(mul_res, NN_Q15_MIN); + mul_res = MIN(mul_res, NN_Q15_MAX); + mul_1 = (int16_t)mul_res; + + #if defined(ARM_MATH_DSP) + mul_res = SMULTT(two_halfword_1, two_halfword_2); + #else + input_1 = (int16_t)(two_halfword_1 >> 16); + input_2 = (int16_t)(two_halfword_2 >> 16); + mul_res = input_1 * input_2; + #endif + mul_res = arm_nn_requantize(mul_res, out_mult, out_shift) + out_offset; + mul_res = MAX(mul_res, NN_Q15_MIN); + mul_res = MIN(mul_res, NN_Q15_MAX); + mul_2 = (int16_t)mul_res; + + arm_nn_write_q15x2_ia(&output, PACK_Q15x2_32x1(mul_1, mul_2)); + + loop_count--; + } + + if (block_size & 0x1) + { + /* C = A * B */ + + input_1 = *input_1_vect++; + input_2 = *input_2_vect++; + + mul_res = input_1 * input_2; + mul_res = arm_nn_requantize(mul_res, out_mult, out_shift) + out_offset; + + mul_res = MAX(mul_res, NN_Q15_MIN); + mul_res = MIN(mul_res, NN_Q15_MAX); + + *output++ = (int16_t)mul_res; + } +#endif // #if defined(ARM_MATH_MVEI) + + output += (batch_offset - 1) * block_size; + } + return ARM_CMSIS_NN_SUCCESS; +} + +/** + * @} end of Doxygen group + */ diff --git a/Source/FullyConnectedFunctions/CMakeLists.txt b/Source/FullyConnectedFunctions/CMakeLists.txt index e2cfbf04..1d254c87 100644 --- a/Source/FullyConnectedFunctions/CMakeLists.txt +++ b/Source/FullyConnectedFunctions/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright 2019-2021, 2023 Arm Limited and/or its affiliates +# SPDX-FileCopyrightText: Copyright 2019-2021, 2023-2024 Arm Limited and/or its affiliates # # SPDX-License-Identifier: Apache-2.0 # @@ -19,4 +19,6 @@ file(GLOB SRC_S4 "./*_s4.c") file(GLOB SRC_S8 "./*_s8.c") file(GLOB SRC_S16 "./*_s16*.c") -target_sources(cmsis-nn PRIVATE ${SRC_S4} ${SRC_S8} ${SRC_S16}) +file(GLOB SRC_S64 "./*_s64.c") + +target_sources(cmsis-nn PRIVATE ${SRC_S4} ${SRC_S8} ${SRC_S16} ${SRC_S64}) diff --git a/Source/FullyConnectedFunctions/arm_vector_sum_s8_s64.c b/Source/FullyConnectedFunctions/arm_vector_sum_s8_s64.c new file mode 100644 index 00000000..f5fc824e --- /dev/null +++ b/Source/FullyConnectedFunctions/arm_vector_sum_s8_s64.c @@ -0,0 +1,156 @@ +/* + * SPDX-FileCopyrightText: Copyright 2023-2024 Arm Limited and/or its affiliates + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------- + * Project: CMSIS NN Library + * Title: arm_vector_sum_s8_s64 + * Description: Generic function for calculating vector sums + * + * $Date: 26 March 2024 + * $Revision: V.1.0.0 + * + * Target : Arm(R) M-Profile Architecture + * + * -------------------------------------------------------------------- */ + +#include "arm_nnfunctions.h" +#include "arm_nnsupportfunctions.h" +/** + * @ingroup Public + */ + +/** + * @addtogroup FC + * @{ + */ + +/* + * S8 vector sum fuction in preparation for e.g. kernel sums in fully connected and matrix multiplication layer function + * + * Refer header file for details. + * + */ +arm_cmsis_nn_status arm_vector_sum_s8_s64(int64_t *vector_sum_buf, + const int32_t vector_cols, + const int32_t vector_rows, + const int8_t *vector_data, + const int32_t lhs_offset, + const int64_t *bias_data) +{ + + if (bias_data) + { + memcpy(vector_sum_buf, bias_data, vector_rows * sizeof(int64_t)); + } + else + { + memset(vector_sum_buf, 0, vector_rows * sizeof(int64_t)); + } + if (lhs_offset) + { +#if defined(ARM_MATH_MVEI) + + const int32_t row_loop_cnt = vector_rows / 5; + for (int i_row_loop_cnt = 0; i_row_loop_cnt < row_loop_cnt; i_row_loop_cnt++) + { + const int32_t col_loop_cnt = (vector_cols + 15) / 16; + const int8_t *vector_0 = vector_data; + const int8_t *vector_1 = vector_data + vector_cols; + const int8_t *vector_2 = vector_data + 2 * vector_cols; + const int8_t *vector_3 = vector_data + 3 * vector_cols; + const int8_t *vector_4 = vector_data + 4 * vector_cols; + int32_t vector_sum_0 = 0; + int32_t vector_sum_1 = 0; + int32_t vector_sum_2 = 0; + int32_t vector_sum_3 = 0; + int32_t vector_sum_4 = 0; + uint32_t col_cnt = (uint32_t)vector_cols; + for (int i = 0; i < col_loop_cnt; i++) + { + mve_pred16_t p = vctp8q(col_cnt); + col_cnt -= 16; + const int8x16_t ker_0 = vldrbq_z_s8(vector_0, p); + vector_sum_0 = vaddvaq_s8(vector_sum_0, ker_0); + const int8x16_t ker_1 = vldrbq_z_s8(vector_1, p); + vector_sum_1 = vaddvaq_s8(vector_sum_1, ker_1); + const int8x16_t ker_2 = vldrbq_z_s8(vector_2, p); + vector_sum_2 = vaddvaq_s8(vector_sum_2, ker_2); + const int8x16_t ker_3 = vldrbq_z_s8(vector_3, p); + vector_sum_3 = vaddvaq_s8(vector_sum_3, ker_3); + const int8x16_t ker_4 = vldrbq_z_s8(vector_4, p); + vector_sum_4 = vaddvaq_s8(vector_sum_4, ker_4); + vector_0 += 16; + vector_1 += 16; + vector_2 += 16; + vector_3 += 16; + vector_4 += 16; + } + vector_data += 5 * vector_cols; + + vector_sum_0 *= lhs_offset; + vector_sum_1 *= lhs_offset; + vector_sum_2 *= lhs_offset; + vector_sum_3 *= lhs_offset; + vector_sum_4 *= lhs_offset; + + vector_sum_buf[0] += vector_sum_0; + vector_sum_buf[1] += vector_sum_1; + vector_sum_buf[2] += vector_sum_2; + vector_sum_buf[3] += vector_sum_3; + vector_sum_buf[4] += vector_sum_4; + vector_sum_buf += 5; + } + const int32_t loop_cnt = vector_rows % 5; + for (int i_row_loop_cnt = 0; i_row_loop_cnt < loop_cnt; i_row_loop_cnt++) + { + const int32_t col_loop_cnt = (vector_cols + 15) / 16; + const int8_t *vector_0 = vector_data; + int32_t vector_sum_0 = 0; + uint32_t col_cnt = (uint32_t)vector_cols; + for (int i = 0; i < col_loop_cnt; i++) + { + mve_pred16_t p = vctp8q(col_cnt); + col_cnt -= 16; + const int8x16_t ker_0 = vldrbq_z_s8(vector_0, p); + vector_sum_0 = vaddvaq_s8(vector_sum_0, ker_0); + vector_0 += 16; + } + vector_data += vector_cols; + vector_sum_0 *= lhs_offset; + + vector_sum_buf[i_row_loop_cnt] += vector_sum_0; + } +#else + for (int i = 0; i < vector_rows; i++) + { + int64_t sum = 0; + for (int j = 0; j < vector_cols; j++) + { + sum += *vector_data++; + } + *vector_sum_buf++ += sum * (int64_t)lhs_offset; + } +#endif + } + + return (ARM_CMSIS_NN_SUCCESS); +} + +/** + * @} end of FC group + */ diff --git a/Source/LSTMFunctions/CMakeLists.txt b/Source/LSTMFunctions/CMakeLists.txt index eed27265..e201e3a0 100644 --- a/Source/LSTMFunctions/CMakeLists.txt +++ b/Source/LSTMFunctions/CMakeLists.txt @@ -17,4 +17,5 @@ # file(GLOB SRC_S8 "./*_s8.c") -target_sources(cmsis-nn PRIVATE ${SRC_S8}) +file(GLOB SRC_S16 "./*_s16.c") +target_sources(cmsis-nn PRIVATE ${SRC_S8} ${SRC_S16}) diff --git a/Source/LSTMFunctions/arm_lstm_unidirectional_s16.c b/Source/LSTMFunctions/arm_lstm_unidirectional_s16.c new file mode 100644 index 00000000..4d4ed021 --- /dev/null +++ b/Source/LSTMFunctions/arm_lstm_unidirectional_s16.c @@ -0,0 +1,95 @@ +/* + * SPDX-FileCopyrightText: Copyright 2024, Arm Limited and/or its affiliates + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------- + * Project: CMSIS NN Library + * Title: arm_lstm_unidirectional_s16.c + * Description: S16 LSTM function with S16 gate output + * + * $Date: 26 March 2024 + * $Revision: V.1.0.0 + * + * Target Processor: Cortex-M processors + * + * -------------------------------------------------------------------- */ + +#include "arm_nnfunctions.h" +#include "arm_nnsupportfunctions.h" +/** + * @ingroup Public + */ + +/** + * @addtogroup LSTM + * @{ + */ + +/* + * S16 LSTM function for TensorFlow Lite with S16 gate output + * + * Refer to header file for details. + * + */ + +arm_cmsis_nn_status arm_lstm_unidirectional_s16(const int16_t *input, + int16_t *output, + const cmsis_nn_lstm_params *params, + cmsis_nn_lstm_context *buffers) +{ + + int16_t *hidden_in = NULL; + memset(buffers->cell_state, 0, params->batch_size * params->hidden_size * sizeof(int16_t)); + if (params->time_major) + { + // First dimension is time, input/output for each time step is stored continously in memory + for (int t = 0; t < params->time_steps; t++) + { + const int16_t *data_in = input + (t * params->batch_size * params->input_size); + int16_t *hidden_out = output + (t * params->batch_size * params->hidden_size); + arm_cmsis_nn_status status = arm_nn_lstm_step_s16(data_in, hidden_in, hidden_out, params, buffers, 1); + if (status != ARM_CMSIS_NN_SUCCESS) + { + return status; + } + // Output is used as recurrent input/hidden state for the next timestep. + hidden_in = &hidden_out[0]; + } + } + else + { + // First dimension is time, add batch_offset to jump in memory for each batch + for (int t = 0; t < params->time_steps; t++) + { + const int16_t *data_in = input + (t * params->input_size); + int16_t *hidden_out = output + (t * params->hidden_size); + arm_cmsis_nn_status status = + arm_nn_lstm_step_s16(data_in, hidden_in, hidden_out, params, buffers, params->time_steps); + if (status != ARM_CMSIS_NN_SUCCESS) + { + return status; + } + // Output is used as recurrent input/hidden state for the next timestep. + hidden_in = &hidden_out[0]; + } + } + return ARM_CMSIS_NN_SUCCESS; +} + +/** + * @} end of LSTM group + */ diff --git a/Source/NNSupportFunctions/arm_nn_lstm_calculate_gate_s16.c b/Source/NNSupportFunctions/arm_nn_lstm_calculate_gate_s16.c new file mode 100644 index 00000000..1c49fbfe --- /dev/null +++ b/Source/NNSupportFunctions/arm_nn_lstm_calculate_gate_s16.c @@ -0,0 +1,96 @@ +/* + * SPDX-FileCopyrightText: Copyright 2022, 2024 Arm Limited and/or its affiliates + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------- + * Project: CMSIS NN Library + * Title: arm_nn_lstm_calculate_gate_s16.c + * Description: Update single gate for an incremental step of LSTM function. + * + * $Date: 26 March 2024 + * $Revision: V.1.0.0 + * + * Target Processor: Cortex-M cores + * + * -------------------------------------------------------------------- */ + +#include "arm_nn_tables.h" +#include "arm_nnfunctions.h" +#include "arm_nnsupportfunctions.h" +/** + * @ingroup groupSupport + */ + +/** + * @defgroup supportLSTM + * + * Support functions for LSTM + * + */ + +/** + * @addtogroup supportLSTM + * @{ + */ + +/* + * Calculates a single LSTM gate, int16x8_16 version. + * Refer to header file for details + */ +arm_cmsis_nn_status arm_nn_lstm_calculate_gate_s16(const int16_t *data_in, + const int16_t *hidden_in, + const cmsis_nn_lstm_gate *gate, + const cmsis_nn_lstm_params *params, + int16_t *output, + const int32_t batch_offset) +{ + + memset(output, 0, params->hidden_size * params->batch_size * sizeof(int16_t)); + + arm_nn_vec_mat_mul_result_acc_s16(data_in, + gate->input_weights, + gate->input_effective_bias, + output, + gate->input_multiplier, + gate->input_shift, + params->input_size, + params->hidden_size, + params->batch_size, + batch_offset); + + if (hidden_in) + { + + arm_nn_vec_mat_mul_result_acc_s16(hidden_in, + gate->hidden_weights, + gate->hidden_effective_bias, + output, + gate->hidden_multiplier, + gate->hidden_shift, + params->hidden_size, + params->hidden_size, + params->batch_size, + batch_offset); + } + + arm_nn_activation_s16(output, output, params->hidden_size * params->batch_size, 0, gate->activation_type); + + return ARM_CMSIS_NN_SUCCESS; +} +/** + * @} end of supportLSTM group + */ \ No newline at end of file diff --git a/Source/NNSupportFunctions/arm_nn_lstm_step_s16.c b/Source/NNSupportFunctions/arm_nn_lstm_step_s16.c new file mode 100644 index 00000000..37cf363f --- /dev/null +++ b/Source/NNSupportFunctions/arm_nn_lstm_step_s16.c @@ -0,0 +1,112 @@ +/* + * SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------- + * Project: CMSIS NN Library + * Title: arm_nn_lstm_step_s16.c + * Description: Update LSTM function for a single iteration step. + * + * $Date: 26 March 2024 + * $Revision: V.1.0.0 + * + * Target : Arm(R) M-Profile Architecture + * + * -------------------------------------------------------------------- */ +#include "arm_nnfunctions.h" +#include "arm_nnsupportfunctions.h" + +/** + * @ingroup groupSupport + */ + +/** + * @addtogroup supportLSTM + * @{ + */ + +/* + * Calculate the output state tensor of an LSTM step, s16 input/output/weights and s16 internal buffers version. + * Refer to header file for details. + */ +arm_cmsis_nn_status arm_nn_lstm_step_s16(const int16_t *data_in, + const int16_t *hidden_in, + int16_t *hidden_out, + const cmsis_nn_lstm_params *params, + cmsis_nn_lstm_context *buffers, + const int32_t batch_offset) +{ + int16_t *forget_gate = buffers->temp1; + int16_t *input_gate = buffers->temp1; + int16_t *cell_gate = buffers->temp2; + int16_t *output_gate = buffers->temp1; + int16_t *hidden_temp = buffers->temp2; + + int16_t *cell_state = buffers->cell_state; + + arm_nn_lstm_calculate_gate_s16(data_in, hidden_in, ¶ms->forget_gate, params, forget_gate, batch_offset); + + // Calculate first term of cell state in place early to maximise reuse of scratch-buffers + arm_elementwise_mul_s16(forget_gate, + cell_state, + 0, + 0, + cell_state, + 0, + params->forget_to_cell_multiplier, + params->forget_to_cell_shift, + NN_Q15_MIN, + NN_Q15_MAX, + params->hidden_size * params->batch_size); + + arm_nn_lstm_calculate_gate_s16(data_in, hidden_in, ¶ms->input_gate, params, input_gate, batch_offset); + + arm_nn_lstm_calculate_gate_s16(data_in, hidden_in, ¶ms->cell_gate, params, cell_gate, batch_offset); + + // Reminder of cell state calculation, multiply and add to previous result. + arm_elementwise_mul_acc_s16(forget_gate, + cell_gate, + 0, + 0, + cell_state, + 0, + params->input_to_cell_multiplier, + params->input_to_cell_shift, + -params->cell_clip, + params->cell_clip, + params->hidden_size * params->batch_size); + + arm_nn_lstm_calculate_gate_s16(data_in, hidden_in, ¶ms->output_gate, params, output_gate, batch_offset); + + // Calculate hidden state directly to output. + arm_nn_activation_s16( + cell_state, hidden_temp, params->hidden_size * params->batch_size, params->cell_scale_power + 12, ARM_TANH); + arm_elementwise_mul_s16_batch_offset(output_gate, + hidden_temp, + hidden_out, + params->output_offset, + params->output_multiplier, + params->output_shift, + params->hidden_size, + params->batch_size, + batch_offset); + + return ARM_CMSIS_NN_SUCCESS; +} +/** + * @} end of supportLSTM group + */ diff --git a/Source/NNSupportFunctions/arm_nn_vec_mat_mul_result_acc_s16.c b/Source/NNSupportFunctions/arm_nn_vec_mat_mul_result_acc_s16.c new file mode 100644 index 00000000..6281e64f --- /dev/null +++ b/Source/NNSupportFunctions/arm_nn_vec_mat_mul_result_acc_s16.c @@ -0,0 +1,362 @@ +/* + * SPDX-FileCopyrightText: Copyright 2020-2024 Arm Limited and/or its affiliates + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------- + * Project: CMSIS NN Library + * Title: arm_nn_vec_mat_mul_result_acc_s16 + * Description: s16 vector by matrix (transposed) multiplication + * + * $Date: 26 March 2023 + * $Revision: V.1.0.0 + * + * Target : Arm(R) M-Profile Architecture + * + * -------------------------------------------------------------------- */ + +#include "arm_nnsupportfunctions.h" + +/** + * @ingroup groupSupport + */ + +/** + * @addtogroup supportFC + * @{ + */ + +/* + * s16 vector(lhs) by matrix (transposed) multiplication with result accumulation + * + * Refer header file for details. + * + */ +arm_cmsis_nn_status arm_nn_vec_mat_mul_result_acc_s16(const int16_t *lhs, + const int8_t *rhs, + const int64_t *effective_bias, + int16_t *dst, + const int32_t dst_multiplier, + const int32_t dst_shift, + const int32_t rhs_cols, + const int32_t rhs_rows, + const int32_t batches, + const int32_t batch_offset) +{ + + int32_t reduced_multiplier = REDUCE_MULTIPLIER(dst_multiplier); + + for (int batch = 0; batch < batches; batch++) + { + + const int8_t *rhs_ptr = &rhs[0]; + const int64_t *effective_bias_ptr = &effective_bias[0]; + +#if defined(ARM_MATH_DSP) + + int32_t rhs_cols_fast = rhs_cols; + + if (rhs_cols > MAX_COL_COUNT) + { + rhs_cols_fast = MAX_COL_COUNT; + } + + #if defined(ARM_MATH_MVEI) + int32_t row_loop_cnt = rhs_rows / 4; + const int32_t col_loop_cnt = (rhs_cols_fast + 7) / 8; + + for (int32_t i_row_loop_count = 0; i_row_loop_count < row_loop_cnt; i_row_loop_count++) + { + int32_t col_cnt = rhs_cols_fast; + + const int16_t *lhs_ptr = lhs; + const int8_t *rhs_ptr_0 = rhs_ptr; + const int8_t *rhs_ptr_1 = rhs_ptr + rhs_cols; + const int8_t *rhs_ptr_2 = rhs_ptr + rhs_cols * 2; + const int8_t *rhs_ptr_3 = rhs_ptr + rhs_cols * 3; + + int32_t result_0 = *effective_bias_ptr++; + int32_t result_1 = *effective_bias_ptr++; + int32_t result_2 = *effective_bias_ptr++; + int32_t result_3 = *effective_bias_ptr++; + + for (int i_col_loop_cnt = 0; i_col_loop_cnt < col_loop_cnt; i_col_loop_cnt++) + { + mve_pred16_t pred = vctp16q(col_cnt); + col_cnt -= 8; + + int16x8_t lhs_input = vldrhq_z_s16(lhs_ptr, pred); + + int16x8_t rhs_input_0 = vldrbq_z_s16(rhs_ptr_0, pred); + int16x8_t rhs_input_1 = vldrbq_z_s16(rhs_ptr_1, pred); + int16x8_t rhs_input_2 = vldrbq_z_s16(rhs_ptr_2, pred); + int16x8_t rhs_input_3 = vldrbq_z_s16(rhs_ptr_3, pred); + + result_0 = vmladavaq_s16(result_0, lhs_input, rhs_input_0); + result_1 = vmladavaq_s16(result_1, lhs_input, rhs_input_1); + result_2 = vmladavaq_s16(result_2, lhs_input, rhs_input_2); + result_3 = vmladavaq_s16(result_3, lhs_input, rhs_input_3); + + lhs_ptr += 8; + + rhs_ptr_0 += 8; + rhs_ptr_1 += 8; + rhs_ptr_2 += 8; + rhs_ptr_3 += 8; + } + + int64_t result_64_0 = result_0; + int64_t result_64_1 = result_1; + int64_t result_64_2 = result_2; + int64_t result_64_3 = result_3; + + if (rhs_cols > MAX_COL_COUNT) + { + for (int i_rhs_cols = MAX_COL_COUNT; i_rhs_cols < rhs_cols; i_rhs_cols++) + { + const int16_t lhs_temp = *lhs_ptr++; + + result_64_0 += *rhs_ptr_0++ * lhs_temp; + result_64_1 += *rhs_ptr_1++ * lhs_temp; + result_64_2 += *rhs_ptr_2++ * lhs_temp; + result_64_3 += *rhs_ptr_3++ * lhs_temp; + } + } + + int32_t tmp; + tmp = arm_nn_requantize_s64(result_64_0, reduced_multiplier, dst_shift); + tmp += (int64_t)*dst; + tmp = MAX(tmp, NN_Q15_MIN); + tmp = MIN(tmp, NN_Q15_MAX); + *dst++ = (int16_t)tmp; + + tmp = 0; + tmp = arm_nn_requantize_s64(result_64_1, reduced_multiplier, dst_shift); + tmp += (int64_t)*dst; + tmp = MAX(tmp, NN_Q15_MIN); + tmp = MIN(tmp, NN_Q15_MAX); + *dst++ = (int16_t)tmp; + + tmp = 0; + tmp = arm_nn_requantize_s64(result_64_2, reduced_multiplier, dst_shift); + tmp += (int64_t)*dst; + tmp = MAX(tmp, NN_Q15_MIN); + tmp = MIN(tmp, NN_Q15_MAX); + *dst++ = (int16_t)tmp; + + tmp = 0; + tmp = arm_nn_requantize_s64(result_64_3, reduced_multiplier, dst_shift); + tmp += (int64_t)*dst; + tmp = MAX(tmp, NN_Q15_MIN); + tmp = MIN(tmp, NN_Q15_MAX); + *dst++ = (int16_t)tmp; + + rhs_ptr += 4 * rhs_cols; + } + + for (int8_t rows_left = rhs_rows & 0x3; rows_left > 0; rows_left--) + { + int32_t result = *effective_bias_ptr++; + + const int16_t *lhs_ptr = lhs; + const int8_t *rhs_ptr0 = rhs_ptr; + + int32_t col_cnt = (int32_t)rhs_cols_fast; + + for (int i_col_loop_cnt = 0; i_col_loop_cnt < col_loop_cnt; i_col_loop_cnt++) + { + mve_pred16_t pred = vctp16q(col_cnt); + col_cnt -= 8; + + int16x8_t lhs_input = vldrhq_z_s16(lhs_ptr, pred); + int16x8_t rhs_input = vldrbq_z_s16(rhs_ptr0, pred); + + result = vmladavaq_p_s16(result, lhs_input, rhs_input, pred); + + lhs_ptr += 8; + rhs_ptr0 += 8; + } + + int64_t result_64 = result; + + if (rhs_cols > MAX_COL_COUNT) + { + for (int i_rhs_cols = MAX_COL_COUNT; i_rhs_cols < rhs_cols; i_rhs_cols++) + { + const int16_t lhs_temp = *lhs_ptr++; + + result_64 += *rhs_ptr0++ * lhs_temp; + } + } + + int32_t tmp = 0; + tmp = arm_nn_requantize_s64(result_64, reduced_multiplier, dst_shift); + tmp += (int64_t)*dst; + tmp = MAX(tmp, NN_Q15_MIN); + tmp = MIN(tmp, NN_Q15_MAX); + *dst++ = (int16_t)tmp; + + rhs_ptr += rhs_cols; + } + + #else // ARM_MATH_MVEI + + const int32_t row_loop_cnt = rhs_rows / 2; + + for (int32_t i = 0; i < row_loop_cnt; i++) + { + + int64_t acc_64_0 = 0; + int64_t acc_64_1 = 0; + int32_t acc_0 = 0; + int32_t acc_1 = 0; + + const int32_t col_loop_cnt = rhs_cols_fast / 4; + + const int16_t *lhs_vec = lhs; + const int8_t *rhs_0 = rhs_ptr; + rhs_ptr += rhs_cols; + const int8_t *rhs_1 = rhs_ptr; + rhs_ptr += rhs_cols; + + for (int j = col_loop_cnt; j != 0; j--) + { + int32_t ker_0, ker_1, vec_part_0, vec_part_1; + + vec_part_0 = arm_nn_read_q15x2_ia(&lhs_vec); + vec_part_1 = arm_nn_read_q15x2_ia(&lhs_vec); + + rhs_0 = read_and_pad(rhs_0, &ker_0, &ker_1); + + acc_0 = SMLAD(ker_0, vec_part_0, acc_0); + acc_0 = SMLAD(ker_1, vec_part_1, acc_0); + + rhs_1 = read_and_pad(rhs_1, &ker_0, &ker_1); + + acc_1 = SMLAD(ker_0, vec_part_0, acc_1); + acc_1 = SMLAD(ker_1, vec_part_1, acc_1); + } + + acc_64_0 += acc_0; + acc_64_1 += acc_1; + + for (int k = col_loop_cnt * 4; k < rhs_cols; k++) + { + const int32_t lhs_temp = (*lhs_vec); + lhs_vec++; + acc_64_0 += lhs_temp * (*rhs_0); + rhs_0++; + acc_64_1 += lhs_temp * (*rhs_1); + rhs_1++; + } + + acc_64_0 += *effective_bias_ptr++; + acc_64_1 += *effective_bias_ptr++; + int32_t tmp; + + tmp = arm_nn_requantize_s64(acc_64_0, reduced_multiplier, dst_shift); + tmp += (int64_t)*dst; + tmp = MAX(tmp, NN_Q15_MIN); + tmp = MIN(tmp, NN_Q15_MAX); + *dst++ = (int16_t)tmp; + + tmp = arm_nn_requantize_s64(acc_64_1, reduced_multiplier, dst_shift); + tmp += (int64_t)*dst; + tmp = MAX(tmp, NN_Q15_MIN); + tmp = MIN(tmp, NN_Q15_MAX); + *dst++ = (int16_t)tmp; + } + + if (rhs_rows & 0x1) + { + int64_t acc_64_0 = 0; + int32_t acc_0 = 0; + const int32_t col_loop_cnt = rhs_cols_fast / 4; + + const int16_t *lhs_vec = lhs; + const int8_t *rhs_0 = rhs_ptr; + + for (int i = col_loop_cnt; i != 0; i--) + { + int32_t ker_0, ker_1, vec; + rhs_0 = read_and_pad(rhs_0, &ker_0, &ker_1); + + vec = arm_nn_read_q15x2_ia(&lhs_vec); + acc_0 = SMLAD(ker_0, vec, acc_0); + + vec = arm_nn_read_q15x2_ia(&lhs_vec); + acc_0 = SMLAD(ker_1, vec, acc_0); + } + + acc_64_0 += acc_0; + + for (int j = col_loop_cnt * 4; j < rhs_cols; j++) + { + const int32_t lhs_temp = (*lhs_vec); + lhs_vec++; + acc_64_0 += lhs_temp * (*rhs_0); + rhs_0++; + } + + acc_64_0 += *effective_bias_ptr++; + + int32_t tmp; + tmp = arm_nn_requantize_s64(acc_64_0, reduced_multiplier, dst_shift); + tmp += (int64_t)*dst; + tmp = MAX(tmp, NN_Q15_MIN); + tmp = MIN(tmp, NN_Q15_MAX); + *dst++ = (int16_t)tmp; + } + + #endif // ARM_MATH_MVEI +#else // ARM_MATH_DSP + for (int i_row_loop_cnt = 0; i_row_loop_cnt < rhs_rows; i_row_loop_cnt++) + { + const int16_t *lhs_ptr = lhs; + + int64_t result = *effective_bias_ptr++; + + for (int32_t rhs_cols_idx = 0; rhs_cols_idx < rhs_cols; ++rhs_cols_idx) + { + const int64_t rhs_value0 = (int8_t)*rhs_ptr; + const int64_t lhs_value = *lhs_ptr; + + result += lhs_value * rhs_value0; + ++rhs_ptr; + ++lhs_ptr; + } + + // Quantize down + result = arm_nn_requantize_s64(result, reduced_multiplier, dst_shift); + result += (int64_t)*dst; + + // Clamp the result + result = ((result) > (NN_Q15_MIN) ? (result) : (NN_Q15_MIN)); + result = ((result) < (NN_Q15_MAX) ? (result) : (NN_Q15_MAX)); + + *dst++ = (int16_t)result; + } +#endif // ARM_MATH_DSP + + lhs += rhs_cols * batch_offset; + } + + return ARM_CMSIS_NN_SUCCESS; +} + +/** + * @} end of Doxygen group + */ diff --git a/Tests/UnitTest/CMakeLists.txt b/Tests/UnitTest/CMakeLists.txt index cb273db5..dcc06577 100644 --- a/Tests/UnitTest/CMakeLists.txt +++ b/Tests/UnitTest/CMakeLists.txt @@ -105,6 +105,7 @@ add_subdirectory(TestCases/test_arm_softmax_s8_s16) add_subdirectory(TestCases/test_arm_svdf_s8) add_subdirectory(TestCases/test_arm_svdf_state_s16_s8) add_subdirectory(TestCases/test_arm_transpose_conv_s8) +add_subdirectory(TestCases/test_arm_lstm_unidirectional_s16) set(MAKE_CMD "python3") set(MAKE_CMD_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/unittest_targets.py") diff --git a/Tests/UnitTest/RefactoredTestGen/JsonTemplates/test_arm_lstm_s16.json b/Tests/UnitTest/RefactoredTestGen/JsonTemplates/lstm_s16.json similarity index 99% rename from Tests/UnitTest/RefactoredTestGen/JsonTemplates/test_arm_lstm_s16.json rename to Tests/UnitTest/RefactoredTestGen/JsonTemplates/lstm_s16.json index d45163b9..0b947e20 100644 --- a/Tests/UnitTest/RefactoredTestGen/JsonTemplates/test_arm_lstm_s16.json +++ b/Tests/UnitTest/RefactoredTestGen/JsonTemplates/lstm_s16.json @@ -448,7 +448,7 @@ "fused_activation_function": "TANH", "cell_clip": cell_clip, "proj_clip": 0.0, - "time_major": false, + "time_major": time_major, "asymmetric_quantize_inputs": false, "diagonal_recurrent_tensors": false }, diff --git a/Tests/UnitTest/RefactoredTestGen/JsonTemplates/lstm_s16_tm.json b/Tests/UnitTest/RefactoredTestGen/JsonTemplates/lstm_s16_tm.json new file mode 100644 index 00000000..6e0e61d5 --- /dev/null +++ b/Tests/UnitTest/RefactoredTestGen/JsonTemplates/lstm_s16_tm.json @@ -0,0 +1,620 @@ +{ + "version": 3, + "operator_codes": [ + { + "deprecated_builtin_code": 44, + "version": 1, + "builtin_code": "UNIDIRECTIONAL_SEQUENCE_LSTM" + } + ], + "subgraphs": [ + { + "tensors": [ + { + "shape": [ + time_steps, + batch_size, + input_size + ], + "type": "INT16", + "buffer": 0, + "name": "serving_default_input:0", + "quantization": { + "scale": [ + input_scale + ], + "zero_point": [ + input_zero_point + ], + "details_type": "NONE", + "quantized_dimension": 0 + }, + "is_variable": false, + "has_rank": true + }, + { + "shape": [ + hidden_size + ], + "type": "INT64", + "buffer": 2, + "name": "arith.constant4", + "quantization": { + "scale": [ + 0.000029 + ], + "zero_point": [ + 0 + ], + "details_type": "NONE", + "quantized_dimension": 0 + }, + "is_variable": false, + "has_rank": true + }, + { + "shape": [ + hidden_size + ], + "type": "INT64", + "buffer": 3, + "name": "arith.constant5", + "quantization": { + "scale": [ + 0.000024 + ], + "zero_point": [ + 0 + ], + "details_type": "NONE", + "quantized_dimension": 0 + }, + "is_variable": false, + "has_rank": true + }, + { + "shape": [ + hidden_size + ], + "type": "INT64", + "buffer": 4, + "name": "arith.constant6", + "quantization": { + "scale": [ + 0.000028 + ], + "zero_point": [ + 0 + ], + "details_type": "NONE", + "quantized_dimension": 0 + }, + "is_variable": false, + "has_rank": true + }, + { + "shape": [ + hidden_size + ], + "type": "INT64", + "buffer": 5, + "name": "arith.constant7", + "quantization": { + "scale": [ + 0.00002 + ], + "zero_point": [ + 0 + ], + "details_type": "NONE", + "quantized_dimension": 0 + }, + "is_variable": false, + "has_rank": true + }, + { + "shape": [ + hidden_size, + hidden_size + ], + "type": "INT8", + "buffer": 6, + "name": "arith.constant", + "quantization": { + "scale": [ + output_gate_hidden_scale + ], + "zero_point": [ + 0 + ], + "details_type": "NONE", + "quantized_dimension": 0 + }, + "is_variable": false, + "has_rank": true + }, + { + "shape": [ + hidden_size, + hidden_size + ], + "type": "INT8", + "buffer": 7, + "name": "arith.constant1", + "quantization": { + "scale": [ + cell_gate_hidden_scale + ], + "zero_point": [ + 0 + ], + "details_type": "NONE", + "quantized_dimension": 0 + }, + "is_variable": false, + "has_rank": true + }, + { + "shape": [ + hidden_size, + hidden_size + ], + "type": "INT8", + "buffer": 8, + "name": "arith.constant2", + "quantization": { + "scale": [ + forget_gate_hidden_scale + ], + "zero_point": [ + 0 + ], + "details_type": "NONE", + "quantized_dimension": 0 + }, + "is_variable": false, + "has_rank": true + }, + { + "shape": [ + hidden_size, + hidden_size + ], + "type": "INT8", + "buffer": 9, + "name": "arith.constant3", + "quantization": { + "scale": [ + input_gate_hidden_scale + ], + "zero_point": [ + 0 + ], + "details_type": "NONE", + "quantized_dimension": 0 + }, + "is_variable": false, + "has_rank": true + }, + { + "shape": [ + hidden_size, + input_size + ], + "type": "INT8", + "buffer": 10, + "name": "arith.constant8", + "quantization": { + "scale": [ + output_gate_input_scale + ], + "zero_point": [ + 0 + ], + "details_type": "NONE", + "quantized_dimension": 0 + }, + "is_variable": false, + "has_rank": true + }, + { + "shape": [ + hidden_size, + input_size + ], + "type": "INT8", + "buffer": 11, + "name": "arith.constant9", + "quantization": { + "scale": [ + cell_gate_input_scale + ], + "zero_point": [ + 0 + ], + "details_type": "NONE", + "quantized_dimension": 0 + }, + "is_variable": false, + "has_rank": true + }, + { + "shape": [ + hidden_size, + input_size + ], + "type": "INT8", + "buffer": 12, + "name": "arith.constant10", + "quantization": { + "scale": [ + forget_gate_input_scale + ], + "zero_point": [ + 0 + ], + "details_type": "NONE", + "quantized_dimension": 0 + }, + "is_variable": false, + "has_rank": true + }, + { + "shape": [ + hidden_size, + input_size + ], + "type": "INT8", + "buffer": 13, + "name": "arith.constant11", + "quantization": { + "scale": [ + input_gate_input_scale + ], + "zero_point": [ + 0 + ], + "details_type": "NONE", + "quantized_dimension": 0 + }, + "is_variable": false, + "has_rank": true + }, + { + "shape": [ + batch_size, + hidden_size + ], + "type": "INT16", + "buffer": 0, + "name": "tfl.pseudo_qconst", + "quantization": { + "scale": [ + output_scale + ], + "zero_point": [ + output_zero_point + ], + "details_type": "NONE", + "quantized_dimension": 0 + }, + "is_variable": true, + "has_rank": true + }, + { + "shape": [ + batch_size, + hidden_size + ], + "type": "INT16", + "buffer": 0, + "name": "tfl.pseudo_qconst1", + "quantization": { + "scale": [ + cell_scale + ], + "zero_point": [ + 0 + ], + "details_type": "NONE", + "quantized_dimension": 0 + }, + "is_variable": true, + "has_rank": true + }, + { + "shape": [ + 0 + ], + "type": "FLOAT32", + "buffer": 0, + "name": "input_to_input_intermediate", + "is_variable": false, + "has_rank": true + }, + { + "shape": [ + 0 + ], + "type": "FLOAT32", + "buffer": 0, + "name": "input_to_forget_intermediate", + "is_variable": false, + "has_rank": true + }, + { + "shape": [ + 0 + ], + "type": "FLOAT32", + "buffer": 0, + "name": "input_to_cell_intermediate", + "is_variable": false, + "has_rank": true + }, + { + "shape": [ + 0 + ], + "type": "FLOAT32", + "buffer": 0, + "name": "input_to_output_intermediate", + "is_variable": false, + "has_rank": true + }, + { + "shape": [ + 0 + ], + "type": "INT16", + "buffer": 0, + "name": "effective_hidden_scale_intermediate", + "quantization": { + "scale": [ + 0 + ], + "zero_point": [ + 0 + ], + "details_type": "NONE", + "quantized_dimension": 0 + }, + "is_variable": false, + "has_rank": true + }, + { + "shape": [ + time_steps, + batch_size, + hidden_size + ], + "type": "INT16", + "buffer": 16, + "name": "StatefulPartitionedCall:0", + "quantization": { + "scale": [ + output_scale + ], + "zero_point": [ + output_zero_point + ], + "details_type": "NONE", + "quantized_dimension": 0 + }, + "is_variable": false, + "has_rank": true + } + ], + "inputs": [ + 0 + ], + "outputs": [ + 20 + ], + "operators": [ + { + "opcode_index": 0, + "inputs": [ + 0, + 12, + 11, + 10, + 9, + 8, + 7, + 6, + 5, + -1, + -1, + -1, + 4, + 3, + 2, + 1, + -1, + -1, + 13, + 14, + -1, + -1, + -1, + -1 + ], + "outputs": [ + 20 + ], + "builtin_options_type": "UnidirectionalSequenceLSTMOptions", + "builtin_options": { + "fused_activation_function": "TANH", + "cell_clip": cell_clip, + "proj_clip": 0.0, + "time_major": time_major, + "asymmetric_quantize_inputs": false, + "diagonal_recurrent_tensors": false + }, + "custom_options_format": "FLEXBUFFERS", + "intermediates": [ + 15, + 16, + 17, + 18, + 19 + ], + "large_custom_options_offset": 0, + "large_custom_options_size": 0, + "builtin_options_2_type": "NONE" + } + ], + "name": "main" + } + ], + "description": "MLIR Converted.", + "buffers": [ + { + "offset": 0, + "size": 10 + }, + { + "offset": 0, + "size": 0 + }, + { + "data": [ + output_gate_bias + ], + "offset": 0, + "size": 0 + }, + { +"data": [ + cell_gate_bias + ], + "offset": 0, + "size": 0 + }, + { +"data": [ + forget_gate_bias + ], + "offset": 0, + "size": 0 + }, + { +"data": [ + input_gate_bias + ], + "offset": 0, + "size": 0 + }, + { + "data": [ + output_gate_hidden_weights + ], + "offset": 0, + "size": 0 + }, + { + "data": [ + cell_gate_hidden_weights + ], + "offset": 0, + "size": 0 + }, + { + "data": [ + forget_gate_hidden_weights + ], + "offset": 0, + "size": 0 + }, + { + "data": [ + input_gate_hidden_weights + ], + "offset": 0, + "size": 0 + }, + { + "data": [ + output_gate_input_weights + ], + "offset": 0, + "size": 0 + }, + { + "data": [ + cell_gate_input_weights + ], + "offset": 0, + "size": 0 + }, + { + "data": [ + forget_gate_input_weights + ], + "offset": 0, + "size": 0 + }, + { + "data": [ + input_gate_input_weights + ], + "offset": 0, + "size": 0 + }, + { + "data": [ + 0, + 0, + 0, + 0 + ], + "offset": 0, + "size": 0 + }, + { + "offset": 0, + "size": 0 + }, + { + "offset": 0, + "size": 0 + }, + { + "offset": 0, + "size": 0 + }, + { + "offset": 0, + "size": 0 + } + ], + "metadata": [ + { + "name": "min_runtime_version", + "buffer": 17 + }, + { + "name": "CONVERSION_METADATA", + "buffer": 18 + } + ], + "signature_defs": [ + { + "inputs": [ + { + "name": "input", + "tensor_index": 0 + } + ], + "outputs": [ + { + "name": "lstm", + "tensor_index": 20 + } + ], + "signature_key": "serving_default", + "subgraph_index": 0 + } + ] +} diff --git a/Tests/UnitTest/RefactoredTestGen/Lib/op_lstm.py b/Tests/UnitTest/RefactoredTestGen/Lib/op_lstm.py index d1d8aef9..02f2ec88 100644 --- a/Tests/UnitTest/RefactoredTestGen/Lib/op_lstm.py +++ b/Tests/UnitTest/RefactoredTestGen/Lib/op_lstm.py @@ -27,7 +27,11 @@ class Op_lstm(Lib.op_utils.Op_type): def get_shapes(params): shapes = {} - shapes["input"] = (params["batch_size"], params["time_steps"], params["input_size"]) + if params["time_major"] and params["tflite_generator"] == "json": + shapes["input"] = (params["time_steps"], params["batch_size"], params["input_size"]) + else: + shapes["input"] = (params["batch_size"], params["time_steps"], params["input_size"]) + shapes["input_weights"] = (params["input_size"], params["hidden_size"]) shapes["all_input_weights"] = (params["input_size"], params["hidden_size"] * 4) @@ -135,8 +139,8 @@ def generate_data_json(shapes, params): effective_scales = {} generated_params = {} - maxval = 0.009 - minval = 0.002 + maxval = 0.001 + minval = 0.0001 scales["input_scale"] = np.round(np.random.rand(1) * (maxval - minval) + minval, 6)[0] scales["cell_scale"] = np.round(np.random.rand(1) * (maxval - minval) + maxval, 6)[0] @@ -175,8 +179,8 @@ def create_scales(name, input_scale1): tensors["cell_gate_input_weights"] = np.random.randint(minval, maxval, size=shapes["input_weights"]) tensors["output_gate_input_weights"] = np.random.randint(minval, maxval, size=shapes["input_weights"]) - maxval = 1 - minval = 0 + maxval = Lib.op_utils.get_dtype_max(params["input_data_type"]) + minval = 0 # Negative weights are not supported in test generation tensors["input_gate_bias"] = np.random.randint(minval, maxval, size=shapes["bias"]) tensors["forget_gate_bias"] = np.random.randint(minval, maxval, size=shapes["bias"]) tensors["cell_gate_bias"] = np.random.randint(minval, maxval, size=shapes["bias"]) diff --git a/Tests/UnitTest/RefactoredTestGen/Lib/op_utils.py b/Tests/UnitTest/RefactoredTestGen/Lib/op_utils.py index dc45e87c..ad74b0e8 100644 --- a/Tests/UnitTest/RefactoredTestGen/Lib/op_utils.py +++ b/Tests/UnitTest/RefactoredTestGen/Lib/op_utils.py @@ -88,7 +88,7 @@ def get_np_dtype(dtype): if dtype == "int32_t": return np.uint32 if dtype == "int64_t": - return np.uint32 + return np.uint64 else: raise Exception(f"Unrecognized dtype '{dtype}'") diff --git a/Tests/UnitTest/RefactoredTestGen/Lib/test.py b/Tests/UnitTest/RefactoredTestGen/Lib/test.py index 466f8c36..ea8d1826 100644 --- a/Tests/UnitTest/RefactoredTestGen/Lib/test.py +++ b/Tests/UnitTest/RefactoredTestGen/Lib/test.py @@ -117,7 +117,7 @@ def generate(params, args, fpaths): include_in_config = lambda key: key not in [ "suite_name", "name", "input_data_type", "op_type", "input_data_type", "weights_data_type", "bias_data_type", - "interpreter", "tflite_generator" + "interpreter", "tflite_generator", "json_template" ] config_params = {key: val for key, val in params.items() if include_in_config(key)} write_config(fpaths["config_data"], config_params, params["name"], fpaths["test_data"], header) @@ -223,20 +223,32 @@ def write_config(config_fpath, params, prefix, test_data_fpath, header): def write_c_array(data, fname, dtype, prefix, tensor_name, test_data_fpath, header): + + # Check that the data looks reasonable + values, counts = np.unique(data, return_counts=True) + if len(values) < data.size / 2 or max(counts) > data.size / 2: + print(f"WARNING: {fname} has repeating values, is this intended?") + if len(data) > 500: + print(f"WARNING: {fname} has more than 500 values, is this intended?") + with fname.open("w+") as f: f.write(header) f.write("#pragma once\n") f.write("#include \n\n") - if not data is None: + data_shape = data.shape + format_width = len(str(data.max())) + 1 data = data.flatten() f.write(f"const {dtype} {prefix}_{tensor_name}[{len(data)}] = \n" + "{") for i in range(len(data) - 1): - f.write(f"{data[i]: 5n}, ") - if i % 16 == 0: + if i % data_shape[-1] == 0: f.write("\n") - f.write(str(data[len(data) - 1]) + "\n};") + f.write(f"{data[i]: {format_width}n}, ") + + if len(data)-1 % data_shape[-1] == 0: + f.write("\n") + f.write(f"{data[len(data) - 1]: {format_width}n}" + "\n};") else: f.write(f"const {dtype} *{prefix}_{tensor_name} = NULL;\n") @@ -268,11 +280,16 @@ def convert_json_to_tflite(json_template_fpath, json_output_fpath, tensors, para for line in template: line_list = line.replace(",", "").split() replaced = False - for key in params: + for key, val in params.items(): if key in line_list: + if isinstance(val, bool): + if val: + val = "true" + else: + val = "false" # To be able to handle cases like "variable_name" : variable_name # make sure to only replace the last occurence per line - new_line = str(params[key]).join(line.rsplit(key, 1)) + new_line = str(val).join(line.rsplit(key, 1)) output.write(new_line) replaced = True break @@ -318,7 +335,7 @@ def quantize_scale(scale): def get_header(generator, interpreter): if generator == "keras": - header = f"// Generated by test.py using tensorflow version {tf.__version__} (Keras version {keras.__version__}).\n" + header = f"// Generated by {os.path.basename(__file__)} using tensorflow version {tf.__version__} (Keras version {keras.__version__}).\n" elif generator == "json": command = f"flatc --version" command_list = command.split() @@ -333,7 +350,7 @@ def get_header(generator, interpreter): sys.exit(1) except Exception as e: raise RuntimeError(f"{e} from: {command = }. Did you install flatc?") - header = f"// Generated by test.py using {flatc_version}\n" + header = f"// Generated by {os.path.basename(__file__)} using {str(flatc_version)[2:-3]}\n" else: raise Exception diff --git a/Tests/UnitTest/RefactoredTestGen/Lib/test_plan.py b/Tests/UnitTest/RefactoredTestGen/Lib/test_plan.py index c9c60dae..fa0540be 100644 --- a/Tests/UnitTest/RefactoredTestGen/Lib/test_plan.py +++ b/Tests/UnitTest/RefactoredTestGen/Lib/test_plan.py @@ -16,18 +16,27 @@ # import json import Lib.test_suite - +import sys def generate(args): """Generate a number of test suites defined by a json-file test plan""" - print(f"\nGenerating tests from {args.test_plan}") test_plan = args.test_plan.read_text() test_suite_params_list = json.loads(test_plan) - test_suites = [] + # List available tests for convenience + if args.list: + for suite in test_suite_params_list: + print(f"{suite['suite_name']}") + for test in suite["tests"]: + print(f"- {test['name']}") + + sys.exit() + + print(f"\nGenerating tests from {args.test_plan}") for test_suite_params in test_suite_params_list: if (test_suite_params["suite_name"] in args.test_suites) or (args.test_suites == []): - print(f"{test_suite_params['suite_name']}") - test_suite = Lib.test_suite.generate(test_suite_params, args) - test_suites.append(test_suite) + test_names = [test["name"] for test in test_suite_params["tests"] if test["name"] in args.tests] + if (len(test_names) > 0) or (args.tests == []): + print(f"{test_suite_params['suite_name']}") + test_suite = Lib.test_suite.generate(test_suite_params, args) diff --git a/Tests/UnitTest/RefactoredTestGen/generate_test_data.py b/Tests/UnitTest/RefactoredTestGen/generate_test_data.py index a0591c51..6af83ee4 100755 --- a/Tests/UnitTest/RefactoredTestGen/generate_test_data.py +++ b/Tests/UnitTest/RefactoredTestGen/generate_test_data.py @@ -17,8 +17,9 @@ # limitations under the License. # import os -os.environ["TF_USE_LEGACY_KERAS"]="1" # See https://github.com/tensorflow/tensorflow/releases/tag/v2.16.1 -os.environ['TF_CPP_MIN_LOG_LEVEL'] ="2" # See https://github.com/tensorflow/tensorflow/issues/59779 + +os.environ["TF_USE_LEGACY_KERAS"] = "1" # See https://github.com/tensorflow/tensorflow/releases/tag/v2.16.1 +os.environ['TF_CPP_MIN_LOG_LEVEL'] = "2" # See https://github.com/tensorflow/tensorflow/issues/59779 import json import argparse import pathlib @@ -55,6 +56,8 @@ def main(): default="../../../tflite_micro/tensorflow/lite/schema/schema.fbs", help="Path to the schema-file needed for generating tflite-files with flatc") parser.add_argument("--verbose", action="store_true", help="Enable additional logging") + parser.add_argument("--list", action="store_true", help="Only list tests in test plan") + args = parser.parse_args() Lib.test_plan.generate(args) diff --git a/Tests/UnitTest/RefactoredTestGen/test_plan.json b/Tests/UnitTest/RefactoredTestGen/test_plan.json index 3fe2f5fc..b577d23c 100644 --- a/Tests/UnitTest/RefactoredTestGen/test_plan.json +++ b/Tests/UnitTest/RefactoredTestGen/test_plan.json @@ -14,7 +14,7 @@ "time_steps" : 10, "input_size" : 22, "hidden_size" : 11, - "json_template": "test_arm_lstm_s16.json" + "json_template": "lstm_s16_tm.json" }, {"name" : "lstm_2_s16", "time_major" : false, @@ -22,7 +22,7 @@ "time_steps" : 9, "input_size" : 6, "hidden_size" : 7, - "json_template": "test_arm_lstm_s16.json" + "json_template": "lstm_s16.json" }, {"name" : "lstm_one_time_step_s16", "time_major" : false, @@ -30,7 +30,7 @@ "time_steps" : 1, "input_size" : 22, "hidden_size" : 3, - "json_template": "test_arm_lstm_s16.json" + "json_template": "lstm_s16.json" } ] }, diff --git a/Tests/UnitTest/TestCases/TestData/lstm_1_s16/cell_gate_bias.h b/Tests/UnitTest/TestCases/TestData/lstm_1_s16/cell_gate_bias.h new file mode 100644 index 00000000..8306e32b --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_1_s16/cell_gate_bias.h @@ -0,0 +1,7 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int64_t lstm_1_s16_cell_gate_bias[11] = + {20142, 23563, 15741, 25098, 29041, 25327, 12730, 19511, 21749, 13563, 31032}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_1_s16/cell_gate_hidden_weights.h b/Tests/UnitTest/TestCases/TestData/lstm_1_s16/cell_gate_hidden_weights.h new file mode 100644 index 00000000..ba8777b4 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_1_s16/cell_gate_hidden_weights.h @@ -0,0 +1,12 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int8_t lstm_1_s16_cell_gate_hidden_weights[121] = { + 99, -84, 84, -14, -61, 94, -60, 67, -85, 56, -108, 69, 53, -34, 103, 75, 97, -101, -84, -100, 89, + -105, 124, -83, 4, 42, -9, -18, 17, -32, -37, -117, 94, -105, 125, -92, 73, 122, -50, 31, -66, 123, + 2, 36, 118, -19, -98, 108, -67, -56, 77, 15, -61, -5, -25, -19, 106, -121, -102, -81, -54, 35, 80, + 39, -7, -115, 107, 38, 30, -28, 85, -12, 111, -124, 54, 0, -9, 39, -35, 102, -45, 22, 83, -39, + -94, -96, -60, 30, -101, 48, 108, -56, -78, -110, 13, 70, 49, -44, -75, -52, 78, 102, -57, -26, -97, + -95, 18, -77, -128, 50, -76, 37, -68, -63, 76, 107, 56, 115, -108, -74, 34}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_1_s16/cell_gate_input_weights.h b/Tests/UnitTest/TestCases/TestData/lstm_1_s16/cell_gate_input_weights.h new file mode 100644 index 00000000..83c21cc1 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_1_s16/cell_gate_input_weights.h @@ -0,0 +1,19 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int8_t lstm_1_s16_cell_gate_input_weights[242] = { + -66, 25, 6, 53, -50, 29, -127, -102, -15, 112, -13, 60, -87, -92, -108, 28, 81, -17, -16, + 54, -40, -109, -105, -60, -26, -71, -116, -76, -104, 48, 108, 118, 74, -124, 81, -111, -16, -117, + -91, 25, 38, 121, -19, -1, 109, 39, 77, -76, 99, 34, 2, 45, 26, 9, -111, -62, 73, + -127, 2, -8, -115, 110, -47, 114, -51, -23, 111, -84, -112, 27, -101, 13, 67, 95, 118, -46, + 37, -51, 116, -98, 87, 0, 109, 119, -102, -122, 79, 110, -47, -114, -121, -77, -45, -94, -1, + 5, -74, -40, -74, 41, -67, 16, -115, -20, 0, 27, 78, 29, -116, -114, -119, -43, -50, -31, + -121, -52, -42, -91, -24, -82, -24, -23, -106, 40, 62, -36, -102, 7, -79, -12, 56, 16, 79, + -29, -39, -74, 118, 6, -46, -115, 100, -38, 33, 70, 0, -41, 111, -28, 42, -9, -113, -22, + -15, -96, 81, -37, 88, -77, 47, 111, -39, 85, 79, 97, 62, -74, 91, -112, -62, 78, -5, + 52, -64, -97, 38, -98, 84, -88, -59, -2, -107, -91, 12, -56, 51, 32, 0, 78, -14, -14, + -7, 124, -85, -122, 52, 42, -9, 10, 19, 20, -8, 79, 104, 14, -56, 1, 124, 81, 69, + -20, -62, -105, -67, 69, -25, 88, -15, -108, 47, -111, -26, 87, 25, -98, 19, -4, 68, 4, + 47, -115, -18, -40, 87, 50, -79, -104, -122, -77, -64, 125, -93, -75}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_1_s16/config_data.h b/Tests/UnitTest/TestCases/TestData/lstm_1_s16/config_data.h new file mode 100644 index 00000000..a0b0925c --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_1_s16/config_data.h @@ -0,0 +1,34 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#define lstm_1_s16_time_major true +#define lstm_1_s16_batch_size 1 +#define lstm_1_s16_time_steps 10 +#define lstm_1_s16_input_size 22 +#define lstm_1_s16_hidden_size 11 +#define lstm_1_s16_cell_scale_power -9 +#define lstm_1_s16_output_zero_point 0 +#define lstm_1_s16_input_zero_point 0 +#define lstm_1_s16_cell_clip 32767 +#define lstm_1_s16_forget_to_cell_multiplier 1073741824 +#define lstm_1_s16_forget_to_cell_shift -14 +#define lstm_1_s16_input_to_cell_multiplier 1107260824 +#define lstm_1_s16_input_to_cell_shift -20 +#define lstm_1_s16_output_multiplier 1213629630 +#define lstm_1_s16_output_shift -16 +#define lstm_1_s16_output_gate_hidden_multiplier 1898861718 +#define lstm_1_s16_output_gate_hidden_shift -11 +#define lstm_1_s16_cell_gate_hidden_multiplier 1891079498 +#define lstm_1_s16_cell_gate_hidden_shift -11 +#define lstm_1_s16_forget_gate_hidden_multiplier 1103129707 +#define lstm_1_s16_forget_gate_hidden_shift -11 +#define lstm_1_s16_input_gate_hidden_multiplier 1470839610 +#define lstm_1_s16_input_gate_hidden_shift -12 +#define lstm_1_s16_output_gate_input_multiplier 1106912731 +#define lstm_1_s16_output_gate_input_shift -8 +#define lstm_1_s16_cell_gate_input_multiplier 1407915316 +#define lstm_1_s16_cell_gate_input_shift -11 +#define lstm_1_s16_forget_gate_input_multiplier 1606965412 +#define lstm_1_s16_forget_gate_input_shift -9 +#define lstm_1_s16_input_gate_input_multiplier 1177308497 +#define lstm_1_s16_input_gate_input_shift -8 diff --git a/Tests/UnitTest/TestCases/TestData/lstm_1_s16/forget_gate_bias.h b/Tests/UnitTest/TestCases/TestData/lstm_1_s16/forget_gate_bias.h new file mode 100644 index 00000000..bfa39ece --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_1_s16/forget_gate_bias.h @@ -0,0 +1,6 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int64_t lstm_1_s16_forget_gate_bias[11] = {8691, 24230, 5069, 6375, 13615, 17184, 3326, 27937, 5850, 1018, 15410}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_1_s16/forget_gate_hidden_weights.h b/Tests/UnitTest/TestCases/TestData/lstm_1_s16/forget_gate_hidden_weights.h new file mode 100644 index 00000000..cc2f7476 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_1_s16/forget_gate_hidden_weights.h @@ -0,0 +1,12 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int8_t lstm_1_s16_forget_gate_hidden_weights[121] = { + 59, 122, -110, 124, -13, 123, 42, 108, 71, -65, 116, -23, 62, -80, 96, 111, -27, 42, -34, -13, -39, + -119, 107, -44, -71, 91, 21, -28, 20, 96, -76, 23, 74, 23, -118, 95, -72, -47, 87, 111, -41, 68, + 7, 113, 66, 9, -36, 24, 126, -4, 2, -36, 52, 50, 108, 35, -22, 18, -114, -65, -16, -112, -52, + 35, 113, -77, 82, -120, 92, 61, -91, 104, -28, 74, -37, 55, -64, 106, -38, 48, -105, -127, -8, -110, + -109, -42, -71, 121, -24, -3, 30, -40, -29, -111, -63, 66, 119, 25, -75, -52, 93, -50, 15, -107, -11, + -108, -43, 23, 50, 45, 7, -90, 13, -125, 98, -118, -9, 125, -80, -66, -14}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_1_s16/forget_gate_input_weights.h b/Tests/UnitTest/TestCases/TestData/lstm_1_s16/forget_gate_input_weights.h new file mode 100644 index 00000000..2537dcfa --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_1_s16/forget_gate_input_weights.h @@ -0,0 +1,19 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int8_t lstm_1_s16_forget_gate_input_weights[242] = { + -56, -78, 96, -62, -38, -48, -92, -47, 4, 121, -38, -82, -39, 55, -53, -15, 53, -19, 89, + 77, -28, 73, 22, 112, 18, 71, -27, -110, -16, 16, -119, -33, 1, -52, -16, 92, -21, -7, + 108, -75, -16, 44, -55, -70, -106, -37, 40, 60, 86, -120, 80, -103, 25, -121, 27, -23, 57, + -39, -101, 111, 2, 25, 77, 4, -69, 13, 104, -64, -4, -3, 79, -114, -68, -57, -74, 103, + -120, -87, 112, -59, 44, 27, 49, -87, -37, 121, -83, -74, -109, 75, 28, 102, 110, -11, -113, + -30, -65, -86, 59, -125, -95, -76, 121, 31, 90, 123, -76, -15, -100, 39, 21, 117, 40, -106, + -72, 5, 83, -11, -121, 123, 52, 123, -77, 12, 23, 22, 44, 98, -50, 58, -48, 90, 78, + 125, -32, 7, -81, 18, 116, 38, 81, -14, 23, 77, -90, 109, -3, 52, -21, -40, -105, -121, + 10, -11, 38, 38, 21, -71, -97, -41, 99, -7, 31, 9, -23, -78, 58, -31, 19, -98, -63, + -19, -95, -10, 23, -35, -116, -74, -72, -91, 119, 62, -111, -120, -114, -117, 100, 90, -87, -20, + -6, -49, 66, 34, 54, -73, -76, 95, -73, 40, 95, 39, 64, 44, -109, 40, 15, -40, -94, + 6, -45, 54, 40, 45, 69, -103, 88, 112, 85, -63, -127, 3, -30, -78, -80, -118, 100, 83, + -94, 23, 24, -33, 23, -78, 37, 47, -3, 26, -108, 81, -86, 107}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_1_s16/input.h b/Tests/UnitTest/TestCases/TestData/lstm_1_s16/input.h new file mode 100644 index 00000000..69c055f9 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_1_s16/input.h @@ -0,0 +1,22 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int16_t lstm_1_s16_input[220] = { + 13736, 23944, -5299, 7737, -26699, -14813, -17886, -1647, -23298, 24851, 5100, -25001, 20574, 1552, + -14239, -24575, -21416, 15542, -17167, -14164, 9242, 15063, 2831, -5102, 18611, -24404, -1195, -5944, + 26930, -29354, -20621, 26317, -2661, 21990, 13978, -6300, -10229, -22228, -17884, 16041, 4267, -4187, + 3334, 26553, -32377, -9858, -20242, -19193, -32259, -13251, -16675, 23520, -24663, -6962, 916, 29657, + -31552, -32068, -27539, -29974, -30519, 3367, 10799, 10676, 29031, -21278, -1862, -30681, 18441, -9753, + 16575, 5763, 7587, -11638, -4090, 22728, 5364, 5460, -20007, 6908, -15077, 19795, 18050, -5621, + -9286, -29639, 28802, 30444, -26491, 5531, 10353, 8677, -21402, -11352, 20251, -11648, -20007, 11954, + -11831, 25472, -628, 14150, -8121, -5260, 29895, 14203, -30641, 15588, -4199, -32034, -30587, -12619, + 5842, 17187, 22779, 16282, 1806, 18624, 25125, 22730, -8889, -7833, 9798, 25136, -1294, -10280, + -30367, 20369, 30464, -14168, -29635, 18393, 3661, -5063, 28190, -17325, -12483, 24939, -29775, 14131, + 9790, -7480, 21842, 32219, 18428, -1242, 16135, 6860, 17424, -27147, 18622, -19581, -8972, 10224, + -1452, -31682, 3691, 31065, 21477, 13232, 3679, -3275, 26922, 13443, 26116, 20579, -14769, 23870, + 23100, -11558, -25290, -29482, -5273, -15625, 9577, 8195, 3928, 8730, -21723, 25012, 15320, -31171, + 8455, 21432, -13983, 16298, 1905, -19173, 19859, 3255, -26975, -12538, 20319, -31892, 14187, 9356, + 7952, -5000, 6683, 8591, 12667, 26386, 22751, -10073, -5509, 4861, -19329, 17171, 15986, -2173, + -2405, 31618, -4030, 12743, -32321, 26494, 15520, -20860, 17165, -1792}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_1_s16/input_gate_bias.h b/Tests/UnitTest/TestCases/TestData/lstm_1_s16/input_gate_bias.h new file mode 100644 index 00000000..5573eb00 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_1_s16/input_gate_bias.h @@ -0,0 +1,6 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int64_t lstm_1_s16_input_gate_bias[11] = {13543, 1638, 25244, 3970, 6981, 29954, 4375, 16836, 30316, 6762, 4189}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_1_s16/input_gate_hidden_weights.h b/Tests/UnitTest/TestCases/TestData/lstm_1_s16/input_gate_hidden_weights.h new file mode 100644 index 00000000..17652417 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_1_s16/input_gate_hidden_weights.h @@ -0,0 +1,12 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int8_t lstm_1_s16_input_gate_hidden_weights[121] = { + 110, -92, 49, 101, -32, -85, -5, -7, -92, 90, -52, -116, 2, -16, -70, -43, -93, 9, 62, 4, 24, + 104, -36, 35, -21, -50, -92, 90, -14, -49, -60, 63, -36, -22, -121, -47, 65, 93, -121, 107, -87, 36, + -122, -118, 21, 13, -75, -3, -39, -39, 24, -82, 4, 38, 122, -59, 2, 89, 20, 68, -65, 27, 95, + -123, -61, 84, 22, -111, 106, -66, -24, 51, -75, -83, 53, 19, -79, 6, 12, -19, 0, -4, 19, 122, + -101, -56, -1, 46, -12, 34, -128, 75, 90, -67, -108, -121, -13, 89, -24, 42, -108, 12, -20, -75, 19, + -43, -44, -83, 97, -41, -58, -9, -19, -107, 54, 21, 27, 59, -49, -13, 87}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_1_s16/input_gate_input_weights.h b/Tests/UnitTest/TestCases/TestData/lstm_1_s16/input_gate_input_weights.h new file mode 100644 index 00000000..74548f4e --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_1_s16/input_gate_input_weights.h @@ -0,0 +1,19 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int8_t lstm_1_s16_input_gate_input_weights[242] = { + 58, -27, -78, 32, -77, -64, -79, 118, 59, -60, -38, 27, 48, -27, -79, -4, 110, -126, -20, + -119, 29, 76, 63, -63, 26, -108, -87, 29, 80, -52, 87, -3, 24, 36, 32, 53, 75, -113, + -83, 107, 41, 108, -54, 30, 115, 7, -60, -38, -106, -50, -114, -15, -112, 75, -21, -55, -3, + -28, -73, 26, 126, 34, 116, -126, 72, -63, 106, -25, -73, 65, 63, -19, -112, -45, 107, 65, + -55, -48, 74, 66, -117, 108, 110, 95, -86, 29, 34, -43, 121, 82, 43, -63, 66, 77, 122, + 10, -24, -115, -112, -60, 118, 19, -61, 85, -97, -85, 5, -41, 24, 93, 58, 58, 2, 99, + 38, 6, -88, 30, 114, -20, -104, 44, 98, -68, 35, -23, -68, 11, -104, -33, 54, -10, -106, + 6, 118, 15, 64, 29, -111, 109, -5, 43, 37, 99, -96, 86, 119, 54, 36, -112, 126, -120, + 92, 79, 120, -25, 38, -44, -70, -77, 110, 65, -110, 23, -104, -81, -104, 94, -45, -112, 40, + 69, -46, 22, -32, 45, -56, 93, -92, -30, -61, 55, 47, 15, 117, -113, 11, 22, 28, -118, + 0, -42, -119, 29, 123, -17, -51, -99, -91, 120, -1, -29, -99, -117, 93, 91, 91, -68, -65, + -88, 87, 77, -35, 27, 58, 71, -33, -58, 69, 119, 94, 112, 72, 57, 78, -42, 100, 84, + -112, 45, 25, 8, 30, -19, -83, 8, 79, 124, -88, -119, 92, 81}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_1_s16/output.h b/Tests/UnitTest/TestCases/TestData/lstm_1_s16/output.h new file mode 100644 index 00000000..0c867fb9 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_1_s16/output.h @@ -0,0 +1,13 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int16_t lstm_1_s16_output[110] = { + -318, 2734, 1144, 2729, -1234, 1629, -44, 1369, -8, -10, 15, -19, 384, 2444, 1079, 0, + 713, -162, 252, -19, -542, 164, 109, 100, -2191, 24, 0, 189, -2293, -456, 9, -460, + 1051, 470, 120, -4504, 259, -2564, 146, -2036, 971, 1754, 332, -2445, 923, -257, -1112, -1547, + -69, 797, -954, -2815, 35, -2393, -4818, 672, 4690, -181, -146, -536, -84, -917, -190, -3255, + 0, -1013, 1482, 328, -3916, -83, -1429, -2188, -215, -7, -2003, 825, -32, -1072, -341, -260, + -8, -758, 1097, 1027, -1, -1078, 11, -46, -134, -1857, -194, -561, 1380, -1835, -30, -65, + 0, -10, 236, -171, -1609, -2539, -1146, -348, -81, -275, 52, -7, 2, -1087}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_1_s16/output_gate_bias.h b/Tests/UnitTest/TestCases/TestData/lstm_1_s16/output_gate_bias.h new file mode 100644 index 00000000..a4a36292 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_1_s16/output_gate_bias.h @@ -0,0 +1,7 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int64_t lstm_1_s16_output_gate_bias[11] = + {8300, 1579, 11812, 11224, 18423, 11748, 9932, 26717, 23831, 11737, 10113}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_1_s16/output_gate_hidden_weights.h b/Tests/UnitTest/TestCases/TestData/lstm_1_s16/output_gate_hidden_weights.h new file mode 100644 index 00000000..41efc9b4 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_1_s16/output_gate_hidden_weights.h @@ -0,0 +1,12 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int8_t lstm_1_s16_output_gate_hidden_weights[121] = { + -45, -67, -20, 112, -54, 13, 107, 74, -96, -111, 120, -102, -12, -21, 123, 62, 98, -19, -12, 32, -55, + 72, 59, -34, 40, 13, -89, 27, 72, -119, 96, 119, 26, -37, 64, 110, 45, -99, 86, -101, -7, -12, + 80, 80, 1, 13, 114, -24, -96, 67, -17, 92, 8, 43, 28, 97, 85, -69, -112, -82, 125, -13, -84, + -16, 67, 79, 55, 29, -64, -127, 14, -26, -67, -11, 21, -44, -16, 27, 5, 95, -120, -126, -33, 67, + 25, -112, 44, 16, -80, -8, -119, 63, -63, 124, 80, 24, 8, 96, -112, 82, -3, -43, 94, -83, 120, + 10, 8, -128, 108, -126, -63, 82, 52, -68, -51, -98, 66, -86, 114, -47, 46}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_1_s16/output_gate_input_weights.h b/Tests/UnitTest/TestCases/TestData/lstm_1_s16/output_gate_input_weights.h new file mode 100644 index 00000000..0b7e3439 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_1_s16/output_gate_input_weights.h @@ -0,0 +1,18 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int8_t lstm_1_s16_output_gate_input_weights[242] = { + -33, 89, 114, -9, -30, -112, -117, -4, 78, -28, 94, -22, 47, 73, 54, 28, -59, -96, -107, -22, -51, + -1, -21, 61, 38, 118, 112, 18, -63, 111, -95, 100, 78, -78, -31, 21, -23, 12, 126, 122, 86, 12, + 12, 107, 57, -75, 20, -74, -105, 53, 25, -110, -35, -62, -5, 87, 123, -31, -92, 72, -112, -26, 90, + -83, 101, -80, 44, -1, 80, 22, -30, -122, -97, 46, 29, 52, 19, 117, 89, -105, -16, 66, -39, 32, + -108, 81, -83, 29, 107, -119, -30, 77, 11, 53, 101, -8, 99, 90, 85, -26, 30, -26, -10, -40, -45, + -118, -98, -71, 1, 11, 85, -62, -97, 117, 56, 29, -62, -90, -81, -62, -27, 23, 124, -84, -18, -88, + 72, -84, 1, -34, -76, -27, -57, -46, 123, -118, -58, 42, 108, -46, -91, 13, 87, -127, -18, -57, 68, + -73, -49, -20, -24, 111, 19, 84, -96, -38, 66, -45, -14, -47, 78, 13, -19, 17, -65, 32, 90, -45, + 86, -79, 28, 124, -96, -56, -102, 35, -67, -101, 106, 100, 79, 112, 19, 66, 53, -58, -28, -15, 50, + -62, 54, 5, -52, -91, -122, -34, -111, 69, 83, 83, 39, -69, -98, 99, -88, -122, 18, 67, -48, 110, + -63, -93, 5, 71, 117, -83, -9, 75, -43, -101, -108, -54, 43, 2, -101, -68, 26, -24, 81, -12, -111, + -40, -54, 37, 63, 51, -11, 4, 61, 41, 70, -63}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_1_s16/test_data.h b/Tests/UnitTest/TestCases/TestData/lstm_1_s16/test_data.h new file mode 100644 index 00000000..8355af79 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_1_s16/test_data.h @@ -0,0 +1,15 @@ +#include "cell_gate_bias.h" +#include "cell_gate_hidden_weights.h" +#include "cell_gate_input_weights.h" +#include "config_data.h" +#include "forget_gate_bias.h" +#include "forget_gate_hidden_weights.h" +#include "forget_gate_input_weights.h" +#include "input.h" +#include "input_gate_bias.h" +#include "input_gate_hidden_weights.h" +#include "input_gate_input_weights.h" +#include "output.h" +#include "output_gate_bias.h" +#include "output_gate_hidden_weights.h" +#include "output_gate_input_weights.h" diff --git a/Tests/UnitTest/TestCases/TestData/lstm_2_s16/cell_gate_bias.h b/Tests/UnitTest/TestCases/TestData/lstm_2_s16/cell_gate_bias.h new file mode 100644 index 00000000..5a89575f --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_2_s16/cell_gate_bias.h @@ -0,0 +1,6 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int64_t lstm_2_s16_cell_gate_bias[7] = {19513, 13891, 21356, 10099, 17079, 31695, 8446}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_2_s16/cell_gate_hidden_weights.h b/Tests/UnitTest/TestCases/TestData/lstm_2_s16/cell_gate_hidden_weights.h new file mode 100644 index 00000000..7320ed78 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_2_s16/cell_gate_hidden_weights.h @@ -0,0 +1,9 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int8_t lstm_2_s16_cell_gate_hidden_weights[49] = { + 103, -81, -66, 119, 103, 58, -7, -56, -41, -99, 2, 12, -23, 77, -79, -4, 56, + 113, -2, -52, -77, 8, -6, 68, 77, 125, 106, -66, 88, 27, 117, 12, -44, 41, + -6, -119, 52, -82, -7, -5, -57, 71, 7, -62, -115, -108, -1, 119, 94}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_2_s16/cell_gate_input_weights.h b/Tests/UnitTest/TestCases/TestData/lstm_2_s16/cell_gate_input_weights.h new file mode 100644 index 00000000..abffd05c --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_2_s16/cell_gate_input_weights.h @@ -0,0 +1,8 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int8_t lstm_2_s16_cell_gate_input_weights[42] = { + -62, 75, 47, 104, 59, 76, 48, 52, 13, 68, -69, 93, 8, 8, -95, 26, -76, -49, 108, 81, -92, + -104, 80, -118, -87, 25, 112, -56, -14, 61, -10, -26, 43, -118, -86, -108, -27, 26, -40, 77, -63, -125}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_2_s16/config_data.h b/Tests/UnitTest/TestCases/TestData/lstm_2_s16/config_data.h new file mode 100644 index 00000000..3c54682d --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_2_s16/config_data.h @@ -0,0 +1,34 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#define lstm_2_s16_time_major false +#define lstm_2_s16_batch_size 1 +#define lstm_2_s16_time_steps 9 +#define lstm_2_s16_input_size 6 +#define lstm_2_s16_hidden_size 7 +#define lstm_2_s16_cell_scale_power -9 +#define lstm_2_s16_output_zero_point 0 +#define lstm_2_s16_input_zero_point 0 +#define lstm_2_s16_cell_clip 32767 +#define lstm_2_s16_forget_to_cell_multiplier 1073741824 +#define lstm_2_s16_forget_to_cell_shift -14 +#define lstm_2_s16_input_to_cell_multiplier 1316479598 +#define lstm_2_s16_input_to_cell_shift -20 +#define lstm_2_s16_output_multiplier 1956298507 +#define lstm_2_s16_output_shift -17 +#define lstm_2_s16_output_gate_hidden_multiplier 1549742675 +#define lstm_2_s16_output_gate_hidden_shift -11 +#define lstm_2_s16_cell_gate_hidden_multiplier 1564226251 +#define lstm_2_s16_cell_gate_hidden_shift -12 +#define lstm_2_s16_forget_gate_hidden_multiplier 1670439145 +#define lstm_2_s16_forget_gate_hidden_shift -12 +#define lstm_2_s16_input_gate_hidden_multiplier 1164720936 +#define lstm_2_s16_input_gate_hidden_shift -10 +#define lstm_2_s16_output_gate_input_multiplier 1924775430 +#define lstm_2_s16_output_gate_input_shift -10 +#define lstm_2_s16_cell_gate_input_multiplier 1582330722 +#define lstm_2_s16_cell_gate_input_shift -11 +#define lstm_2_s16_forget_gate_input_multiplier 1281215547 +#define lstm_2_s16_forget_gate_input_shift -9 +#define lstm_2_s16_input_gate_input_multiplier 1578394576 +#define lstm_2_s16_input_gate_input_shift -9 diff --git a/Tests/UnitTest/TestCases/TestData/lstm_2_s16/forget_gate_bias.h b/Tests/UnitTest/TestCases/TestData/lstm_2_s16/forget_gate_bias.h new file mode 100644 index 00000000..a673376e --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_2_s16/forget_gate_bias.h @@ -0,0 +1,6 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int64_t lstm_2_s16_forget_gate_bias[7] = {20964, 16982, 16930, 22508, 31250, 30498, 16337}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_2_s16/forget_gate_hidden_weights.h b/Tests/UnitTest/TestCases/TestData/lstm_2_s16/forget_gate_hidden_weights.h new file mode 100644 index 00000000..7990b6cd --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_2_s16/forget_gate_hidden_weights.h @@ -0,0 +1,9 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int8_t lstm_2_s16_forget_gate_hidden_weights[49] = { + 118, 14, 8, -95, -3, -65, 2, -67, 41, 104, 18, 51, 21, -4, -86, 31, 56, + -60, -75, 74, 73, -12, 115, 1, -109, -119, 96, 15, -31, -37, 25, 0, -22, -24, + 95, 30, -6, 23, -25, -59, 105, -84, 102, -16, 19, -118, 42, -124, -38}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_2_s16/forget_gate_input_weights.h b/Tests/UnitTest/TestCases/TestData/lstm_2_s16/forget_gate_input_weights.h new file mode 100644 index 00000000..d639622f --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_2_s16/forget_gate_input_weights.h @@ -0,0 +1,8 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int8_t lstm_2_s16_forget_gate_input_weights[42] = { + -37, -21, 118, -39, 90, -97, -99, 73, 59, -12, 1, -74, -39, -125, -60, 101, 68, -22, -65, 47, 55, + 114, 86, 48, 57, -72, -103, 103, -3, 78, 123, -51, 78, -100, -100, -54, -25, -40, -21, 33, 60, -107}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_2_s16/input.h b/Tests/UnitTest/TestCases/TestData/lstm_2_s16/input.h new file mode 100644 index 00000000..a3b0c229 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_2_s16/input.h @@ -0,0 +1,10 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int16_t lstm_2_s16_input[54] = { + 23536, 27304, 24693, 30688, 19633, -25440, 14520, -25776, -23552, -13305, -30589, -2019, 18827, 25138, + 24159, 18605, 10528, 3507, 13149, 27508, -23963, -28654, 12585, -1653, 14551, -23416, 6110, -24065, + -19733, -24258, 13594, 28016, 13389, 201, 3040, -9602, 7736, -26340, -32711, 13470, 27619, -4043, + 30212, -20972, -1625, 30113, 937, 17488, -13724, -26733, 25147, 11850, -17325, -19942}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_2_s16/input_gate_bias.h b/Tests/UnitTest/TestCases/TestData/lstm_2_s16/input_gate_bias.h new file mode 100644 index 00000000..c2b86290 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_2_s16/input_gate_bias.h @@ -0,0 +1,6 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int64_t lstm_2_s16_input_gate_bias[7] = {28793, 9909, 4344, 1117, 3627, 30848, 32766}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_2_s16/input_gate_hidden_weights.h b/Tests/UnitTest/TestCases/TestData/lstm_2_s16/input_gate_hidden_weights.h new file mode 100644 index 00000000..4fc2d2ba --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_2_s16/input_gate_hidden_weights.h @@ -0,0 +1,9 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int8_t lstm_2_s16_input_gate_hidden_weights[49] = { + 3, 53, -35, -6, -43, -23, -98, 16, -114, -96, 15, 40, 80, -82, 109, -22, 13, + 117, 90, 87, -52, 58, -46, 106, 34, -90, -63, -121, 106, 122, -101, 55, 7, 51, + -55, 7, 124, 53, 97, -96, -9, -61, -25, 123, -69, -1, -55, 69, -104}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_2_s16/input_gate_input_weights.h b/Tests/UnitTest/TestCases/TestData/lstm_2_s16/input_gate_input_weights.h new file mode 100644 index 00000000..37d25b83 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_2_s16/input_gate_input_weights.h @@ -0,0 +1,8 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int8_t lstm_2_s16_input_gate_input_weights[42] = { + 83, 92, 125, 94, -48, 9, 118, 97, -55, -91, 102, -33, 107, 93, -3, 93, -122, 89, 117, 65, 89, + 80, -120, -87, 101, 34, 70, -7, 103, -55, -96, 82, 121, 58, 17, -28, 78, -96, 107, -127, -40, 26}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_2_s16/output.h b/Tests/UnitTest/TestCases/TestData/lstm_2_s16/output.h new file mode 100644 index 00000000..70f397c1 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_2_s16/output.h @@ -0,0 +1,10 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int16_t lstm_2_s16_output[63] = {1163, 635, -271, 765, -382, -1332, 230, -197, 83, 1107, 452, -1470, -597, + 433, 915, 1438, -536, 235, -44, -1806, 51, 367, 15, 92, 1041, -668, + -646, -282, -912, -703, 283, 1122, -585, 71, 239, 19, 34, -123, 1358, + 10, 266, 221, -49, -169, 0, 1190, -1863, -84, 304, 151, 313, 73, + -379, -2443, -498, -115, -164, 305, 28, -1771, -415, 981, 674}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_2_s16/output_gate_bias.h b/Tests/UnitTest/TestCases/TestData/lstm_2_s16/output_gate_bias.h new file mode 100644 index 00000000..0ec709fc --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_2_s16/output_gate_bias.h @@ -0,0 +1,6 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int64_t lstm_2_s16_output_gate_bias[7] = {5138, 5363, 14184, 3723, 23687, 28588, 26805}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_2_s16/output_gate_hidden_weights.h b/Tests/UnitTest/TestCases/TestData/lstm_2_s16/output_gate_hidden_weights.h new file mode 100644 index 00000000..69b797d5 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_2_s16/output_gate_hidden_weights.h @@ -0,0 +1,9 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int8_t lstm_2_s16_output_gate_hidden_weights[49] = { + 29, -8, -81, -77, -33, 23, 74, -78, 68, -77, 22, -74, 95, 112, 13, 97, -124, + -106, -3, -122, 110, -124, -74, 47, 97, 11, 44, 8, -100, 91, -103, 85, 68, 13, + 28, 119, -94, -74, -39, -15, -45, 60, 23, 37, 52, 70, 83, 111, -9}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_2_s16/output_gate_input_weights.h b/Tests/UnitTest/TestCases/TestData/lstm_2_s16/output_gate_input_weights.h new file mode 100644 index 00000000..6263cf16 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_2_s16/output_gate_input_weights.h @@ -0,0 +1,8 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int8_t lstm_2_s16_output_gate_input_weights[42] = { + -83, -37, -5, -58, 37, -85, 37, 121, 112, -12, -104, -55, 116, -43, 33, -108, -15, 79, 8, -34, -64, + -12, -63, 20, 90, -89, -126, -49, -43, -16, 81, 65, 78, -7, -63, -92, -91, 8, -108, -8, 37, -67}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_2_s16/test_data.h b/Tests/UnitTest/TestCases/TestData/lstm_2_s16/test_data.h new file mode 100644 index 00000000..8355af79 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_2_s16/test_data.h @@ -0,0 +1,15 @@ +#include "cell_gate_bias.h" +#include "cell_gate_hidden_weights.h" +#include "cell_gate_input_weights.h" +#include "config_data.h" +#include "forget_gate_bias.h" +#include "forget_gate_hidden_weights.h" +#include "forget_gate_input_weights.h" +#include "input.h" +#include "input_gate_bias.h" +#include "input_gate_hidden_weights.h" +#include "input_gate_input_weights.h" +#include "output.h" +#include "output_gate_bias.h" +#include "output_gate_hidden_weights.h" +#include "output_gate_input_weights.h" diff --git a/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/cell_gate_bias.h b/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/cell_gate_bias.h new file mode 100644 index 00000000..9c2539a5 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/cell_gate_bias.h @@ -0,0 +1,6 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int64_t lstm_one_time_step_s16_cell_gate_bias[3] = {20195, 27538, 32653}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/cell_gate_hidden_weights.h b/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/cell_gate_hidden_weights.h new file mode 100644 index 00000000..39f4ea18 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/cell_gate_hidden_weights.h @@ -0,0 +1,6 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int8_t lstm_one_time_step_s16_cell_gate_hidden_weights[9] = {96, 122, 83, -16, 68, -124, -120, 18, -57}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/cell_gate_input_weights.h b/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/cell_gate_input_weights.h new file mode 100644 index 00000000..931d2608 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/cell_gate_input_weights.h @@ -0,0 +1,9 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int8_t lstm_one_time_step_s16_cell_gate_input_weights[66] = { + 87, 101, 93, 23, -94, -70, -51, -59, -76, -21, -16, -37, 122, -91, -112, -111, -46, -68, -38, 15, -38, 74, + -97, -9, -61, -7, -4, 94, 105, -112, -123, -18, -29, 49, 42, -13, 34, 83, 106, 60, -24, 96, 48, -86, + 66, 97, -90, -21, 101, -90, -6, 78, -1, 41, 2, 8, 11, -123, -79, -12, -36, -25, -37, -127, 73, 100}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/config_data.h b/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/config_data.h new file mode 100644 index 00000000..1ee11ca3 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/config_data.h @@ -0,0 +1,34 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#define lstm_one_time_step_s16_time_major false +#define lstm_one_time_step_s16_batch_size 3 +#define lstm_one_time_step_s16_time_steps 1 +#define lstm_one_time_step_s16_input_size 22 +#define lstm_one_time_step_s16_hidden_size 3 +#define lstm_one_time_step_s16_cell_scale_power -10 +#define lstm_one_time_step_s16_output_zero_point 0 +#define lstm_one_time_step_s16_input_zero_point 0 +#define lstm_one_time_step_s16_cell_clip 32767 +#define lstm_one_time_step_s16_forget_to_cell_multiplier 1073741824 +#define lstm_one_time_step_s16_forget_to_cell_shift -14 +#define lstm_one_time_step_s16_input_to_cell_multiplier 1899594203 +#define lstm_one_time_step_s16_input_to_cell_shift -20 +#define lstm_one_time_step_s16_output_multiplier 1713359477 +#define lstm_one_time_step_s16_output_shift -17 +#define lstm_one_time_step_s16_output_gate_hidden_multiplier 1328489832 +#define lstm_one_time_step_s16_output_gate_hidden_shift -11 +#define lstm_one_time_step_s16_cell_gate_hidden_multiplier 1181032973 +#define lstm_one_time_step_s16_cell_gate_hidden_shift -10 +#define lstm_one_time_step_s16_forget_gate_hidden_multiplier 2122276288 +#define lstm_one_time_step_s16_forget_gate_hidden_shift -12 +#define lstm_one_time_step_s16_input_gate_hidden_multiplier 1120396508 +#define lstm_one_time_step_s16_input_gate_hidden_shift -10 +#define lstm_one_time_step_s16_output_gate_input_multiplier 1673987981 +#define lstm_one_time_step_s16_output_gate_input_shift -10 +#define lstm_one_time_step_s16_cell_gate_input_multiplier 1551228863 +#define lstm_one_time_step_s16_cell_gate_input_shift -8 +#define lstm_one_time_step_s16_forget_gate_input_multiplier 1681427928 +#define lstm_one_time_step_s16_forget_gate_input_shift -11 +#define lstm_one_time_step_s16_input_gate_input_multiplier 1677707955 +#define lstm_one_time_step_s16_input_gate_input_shift -8 diff --git a/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/forget_gate_bias.h b/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/forget_gate_bias.h new file mode 100644 index 00000000..ee744c92 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/forget_gate_bias.h @@ -0,0 +1,6 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int64_t lstm_one_time_step_s16_forget_gate_bias[3] = {2956, 21127, 23202}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/forget_gate_hidden_weights.h b/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/forget_gate_hidden_weights.h new file mode 100644 index 00000000..1c2e881b --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/forget_gate_hidden_weights.h @@ -0,0 +1,6 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int8_t lstm_one_time_step_s16_forget_gate_hidden_weights[9] = {43, 123, -49, -80, 18, -113, 97, 41, 74}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/forget_gate_input_weights.h b/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/forget_gate_input_weights.h new file mode 100644 index 00000000..883e0c9b --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/forget_gate_input_weights.h @@ -0,0 +1,9 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int8_t lstm_one_time_step_s16_forget_gate_input_weights[66] = { + 25, -38, 56, -13, 108, -21, 92, -97, 118, 105, 101, -2, -44, 123, -17, -23, 3, 92, 118, -115, -18, 78, + 27, 103, -108, -41, -8, 37, -115, 55, 59, 56, 15, 76, -119, 18, -72, 124, 20, -8, -29, 75, 66, 28, + 51, -44, 42, -13, 101, 41, -102, 30, -118, -55, -19, 13, -106, 19, 19, 118, 106, -79, -15, -125, 29, -109}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/input.h b/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/input.h new file mode 100644 index 00000000..bcedeea9 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/input.h @@ -0,0 +1,11 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int16_t lstm_one_time_step_s16_input[66] = { + 26737, -3312, 27665, 19141, 4716, -7008, 4549, -21899, -25689, 4240, -17667, 22726, 10924, 30458, + 7515, -17421, -25080, -26557, -16850, 6022, -22044, -32139, -10312, 28342, -30325, 25737, 10769, 3648, + -11893, -26036, -15103, -9396, -16565, -12245, -25711, -27976, 8469, -3610, 7150, -23798, -1771, -586, + -7059, -25873, 24134, -8545, -10029, -8123, 17637, -9169, 19492, 24515, 26091, -15449, 16251, 3491, + -32627, 28448, -6072, 26552, 32338, 258, 29453, -29835, -17778, -15912}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/input_gate_bias.h b/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/input_gate_bias.h new file mode 100644 index 00000000..10828957 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/input_gate_bias.h @@ -0,0 +1,6 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int64_t lstm_one_time_step_s16_input_gate_bias[3] = {20943, 5502, 20939}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/input_gate_hidden_weights.h b/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/input_gate_hidden_weights.h new file mode 100644 index 00000000..a7953bf8 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/input_gate_hidden_weights.h @@ -0,0 +1,6 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int8_t lstm_one_time_step_s16_input_gate_hidden_weights[9] = {76, 36, 15, 85, 17, 55, 33, 25, -31}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/input_gate_input_weights.h b/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/input_gate_input_weights.h new file mode 100644 index 00000000..74270d4e --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/input_gate_input_weights.h @@ -0,0 +1,9 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int8_t lstm_one_time_step_s16_input_gate_input_weights[66] = { + 18, -27, 78, 115, 101, -86, 106, 102, 23, 30, 13, -90, -89, -67, 51, 11, -97, -34, -113, 87, 88, 15, + 55, -55, -116, -11, -72, 101, -107, 59, -29, -119, 109, -4, 120, -22, 4, 57, 26, -87, 79, 19, 81, 28, + -126, 96, 115, -100, 27, 34, 14, 78, 84, 28, -56, -67, 75, 33, -93, -70, 19, 76, 103, -47, 35, 69}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/output.h b/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/output.h new file mode 100644 index 00000000..d2858128 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/output.h @@ -0,0 +1,6 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int16_t lstm_one_time_step_s16_output[9] = {2054, 0, 0, 1138, 129, 0, -61, -2801, 225}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/output_gate_bias.h b/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/output_gate_bias.h new file mode 100644 index 00000000..0fc61ce6 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/output_gate_bias.h @@ -0,0 +1,6 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int64_t lstm_one_time_step_s16_output_gate_bias[3] = {32739, 16303, 29216}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/output_gate_hidden_weights.h b/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/output_gate_hidden_weights.h new file mode 100644 index 00000000..3f105f76 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/output_gate_hidden_weights.h @@ -0,0 +1,6 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int8_t lstm_one_time_step_s16_output_gate_hidden_weights[9] = {63, 24, 19, 25, 71, -107, 75, -20, 80}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/output_gate_input_weights.h b/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/output_gate_input_weights.h new file mode 100644 index 00000000..246948eb --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/output_gate_input_weights.h @@ -0,0 +1,9 @@ +// Generated by RefactoredTestGen/test.py using flatc version 23.5.26 +// Interpreter from tflite_micro runtime version 0.dev20240224054047-gcfa4c91. +#pragma once +#include + +const int8_t lstm_one_time_step_s16_output_gate_input_weights[66] = { + 107, 18, 61, 4, 27, 96, 76, -43, -48, -63, -25, 18, -58, -69, 101, -75, -36, 68, 61, 126, 122, 78, + 90, -88, -36, 115, -6, -28, 99, 96, 55, -108, -34, 70, 102, 116, 98, -103, -55, 76, 98, -78, -53, 81, + -47, 66, 98, -128, 103, -98, -124, -25, -37, -19, -60, 4, 46, 28, 90, 88, -90, -71, 124, 52, 58, 83}; \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/test_data.h b/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/test_data.h new file mode 100644 index 00000000..8355af79 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/lstm_one_time_step_s16/test_data.h @@ -0,0 +1,15 @@ +#include "cell_gate_bias.h" +#include "cell_gate_hidden_weights.h" +#include "cell_gate_input_weights.h" +#include "config_data.h" +#include "forget_gate_bias.h" +#include "forget_gate_hidden_weights.h" +#include "forget_gate_input_weights.h" +#include "input.h" +#include "input_gate_bias.h" +#include "input_gate_hidden_weights.h" +#include "input_gate_input_weights.h" +#include "output.h" +#include "output_gate_bias.h" +#include "output_gate_hidden_weights.h" +#include "output_gate_input_weights.h" diff --git a/Tests/UnitTest/TestCases/test_arm_lstm_unidirectional_s16/CMakeLists.txt b/Tests/UnitTest/TestCases/test_arm_lstm_unidirectional_s16/CMakeLists.txt new file mode 100644 index 00000000..16e9eddd --- /dev/null +++ b/Tests/UnitTest/TestCases/test_arm_lstm_unidirectional_s16/CMakeLists.txt @@ -0,0 +1,23 @@ +# +# SPDX-FileCopyrightText: Copyright 2010-2022, 2024 Arm Limited and/or its affiliates +# +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the License); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an AS IS BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +add_cmsis_nn_unit_test_executable(lstm_unidirectional_s16) + +target_sources(lstm_unidirectional_s16 PRIVATE + Unity/unity_test_arm_lstm_unidirectional_s16.c + Unity/TestRunner/unity_test_arm_lstm_unidirectional_s16_runner.c) \ No newline at end of file diff --git a/Tests/UnitTest/TestCases/test_arm_lstm_unidirectional_s16/Unity/unity_test_arm_lstm_unidirectional_s16.c b/Tests/UnitTest/TestCases/test_arm_lstm_unidirectional_s16/Unity/unity_test_arm_lstm_unidirectional_s16.c new file mode 100644 index 00000000..b9ef170e --- /dev/null +++ b/Tests/UnitTest/TestCases/test_arm_lstm_unidirectional_s16/Unity/unity_test_arm_lstm_unidirectional_s16.c @@ -0,0 +1,48 @@ +/* + * SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include "../test_arm_lstm_unidirectional_s16.c" +#include "unity.h" + +#ifdef USING_FVP_CORSTONE_300 +extern void uart_init(void); +#endif + +/* This function is called from the autogenerated file. + * The name must be exactly like this + */ +void setUp(void) +{ /* This is run before EACH TEST */ +#ifdef USING_FVP_CORSTONE_300 + uart_init(); +#endif +} + +/* This function is called from the autogenerated file. + * The name must be exactly like this + */ +void tearDown(void) {} +void test_lstm_1_s16(void) { lstm_1_s16(); } +void test_lstm_2_s16(void) { lstm_2_s16(); } +void test_lstm_one_time_step_s16(void) { lstm_one_time_step_s16(); } diff --git a/Tests/UnitTest/TestCases/test_arm_lstm_unidirectional_s16/test_arm_lstm_unidirectional_s16.c b/Tests/UnitTest/TestCases/test_arm_lstm_unidirectional_s16/test_arm_lstm_unidirectional_s16.c new file mode 100644 index 00000000..83abb468 --- /dev/null +++ b/Tests/UnitTest/TestCases/test_arm_lstm_unidirectional_s16/test_arm_lstm_unidirectional_s16.c @@ -0,0 +1,475 @@ +/* + * SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../TestData/lstm_1_s16/test_data.h" +#include "../TestData/lstm_2_s16/test_data.h" +#include "../TestData/lstm_one_time_step_s16/test_data.h" +#include "../Utils/validate.h" +#include +#include +#include +#include +#include + +// update the buffer size if adding a unit test with larger buffer. +#define LARGEST_BUFFER_SIZE lstm_1_s16_hidden_size *lstm_1_s16_batch_size *lstm_1_s16_time_steps + +int16_t buffer1[LARGEST_BUFFER_SIZE]; +int16_t buffer2[LARGEST_BUFFER_SIZE]; +int16_t buffer3[LARGEST_BUFFER_SIZE]; + +void lstm_1_s16(void) +{ + int16_t output[lstm_1_s16_batch_size * lstm_1_s16_time_steps * lstm_1_s16_hidden_size] = {0}; + const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS; + const int16_t *output_ref = &lstm_1_s16_output[0]; + const int32_t output_ref_size = lstm_1_s16_batch_size * lstm_1_s16_time_steps * lstm_1_s16_hidden_size; + + int64_t input_data_kernel_sum[lstm_1_s16_hidden_size]; + int64_t forget_data_kernel_sum[lstm_1_s16_hidden_size]; + int64_t cell_data_kernel_sum[lstm_1_s16_hidden_size]; + int64_t output_data_kernel_sum[lstm_1_s16_hidden_size]; + + int64_t input_hidden_kernel_sum[lstm_1_s16_hidden_size]; + int64_t forget_hidden_kernel_sum[lstm_1_s16_hidden_size]; + int64_t cell_hidden_kernel_sum[lstm_1_s16_hidden_size]; + int64_t output_hidden_kernel_sum[lstm_1_s16_hidden_size]; + + arm_vector_sum_s8_s64(&input_data_kernel_sum[0], + lstm_1_s16_input_size, + lstm_1_s16_hidden_size, + &lstm_1_s16_input_gate_input_weights[0], + lstm_1_s16_input_zero_point, + &lstm_1_s16_input_gate_bias[0]); + arm_vector_sum_s8_s64(&forget_data_kernel_sum[0], + lstm_1_s16_input_size, + lstm_1_s16_hidden_size, + &lstm_1_s16_forget_gate_input_weights[0], + lstm_1_s16_input_zero_point, + &lstm_1_s16_forget_gate_bias[0]); + arm_vector_sum_s8_s64(&cell_data_kernel_sum[0], + lstm_1_s16_input_size, + lstm_1_s16_hidden_size, + &lstm_1_s16_cell_gate_input_weights[0], + lstm_1_s16_input_zero_point, + &lstm_1_s16_cell_gate_bias[0]); + arm_vector_sum_s8_s64(&output_data_kernel_sum[0], + lstm_1_s16_input_size, + lstm_1_s16_hidden_size, + &lstm_1_s16_output_gate_input_weights[0], + lstm_1_s16_input_zero_point, + &lstm_1_s16_output_gate_bias[0]); + + arm_vector_sum_s8_s64(&input_hidden_kernel_sum[0], + lstm_1_s16_hidden_size, + lstm_1_s16_hidden_size, + &lstm_1_s16_input_gate_hidden_weights[0], + -lstm_1_s16_output_zero_point, + NULL); + arm_vector_sum_s8_s64(&forget_hidden_kernel_sum[0], + lstm_1_s16_hidden_size, + lstm_1_s16_hidden_size, + &lstm_1_s16_forget_gate_hidden_weights[0], + -lstm_1_s16_output_zero_point, + NULL); + arm_vector_sum_s8_s64(&cell_hidden_kernel_sum[0], + lstm_1_s16_hidden_size, + lstm_1_s16_hidden_size, + &lstm_1_s16_cell_gate_hidden_weights[0], + -lstm_1_s16_output_zero_point, + NULL); + arm_vector_sum_s8_s64(&output_hidden_kernel_sum[0], + lstm_1_s16_hidden_size, + lstm_1_s16_hidden_size, + &lstm_1_s16_output_gate_hidden_weights[0], + -lstm_1_s16_output_zero_point, + NULL); + + // INPUT GATE + const cmsis_nn_lstm_gate gate_input = {lstm_1_s16_input_gate_input_multiplier, + lstm_1_s16_input_gate_input_shift, + &lstm_1_s16_input_gate_input_weights[0], + &input_data_kernel_sum[0], + lstm_1_s16_input_gate_hidden_multiplier, + lstm_1_s16_input_gate_hidden_shift, + &lstm_1_s16_input_gate_hidden_weights[0], + &input_hidden_kernel_sum[0], + &lstm_1_s16_input_gate_bias[0], + ARM_SIGMOID}; + + // FORGET GATE + const cmsis_nn_lstm_gate gate_forget = {lstm_1_s16_forget_gate_input_multiplier, + lstm_1_s16_forget_gate_input_shift, + &lstm_1_s16_forget_gate_input_weights[0], + &forget_data_kernel_sum[0], + lstm_1_s16_forget_gate_hidden_multiplier, + lstm_1_s16_forget_gate_hidden_shift, + &lstm_1_s16_forget_gate_hidden_weights[0], + &forget_hidden_kernel_sum[0], + &lstm_1_s16_forget_gate_bias[0], + ARM_SIGMOID}; + + // CELL GATE + const cmsis_nn_lstm_gate gate_cell = {lstm_1_s16_cell_gate_input_multiplier, + lstm_1_s16_cell_gate_input_shift, + &lstm_1_s16_cell_gate_input_weights[0], + &cell_data_kernel_sum[0], + lstm_1_s16_cell_gate_hidden_multiplier, + lstm_1_s16_cell_gate_hidden_shift, + &lstm_1_s16_cell_gate_hidden_weights[0], + &cell_hidden_kernel_sum[0], + &lstm_1_s16_cell_gate_bias[0], + ARM_TANH}; + + // OUTPUT GATE + const cmsis_nn_lstm_gate gate_output = {lstm_1_s16_output_gate_input_multiplier, + lstm_1_s16_output_gate_input_shift, + &lstm_1_s16_output_gate_input_weights[0], + &output_data_kernel_sum[0], + lstm_1_s16_output_gate_hidden_multiplier, + lstm_1_s16_output_gate_hidden_shift, + &lstm_1_s16_output_gate_hidden_weights[0], + &output_hidden_kernel_sum[0], + &lstm_1_s16_output_gate_bias[0], + ARM_SIGMOID}; + + // LSTM DATA + const cmsis_nn_lstm_params params = {lstm_1_s16_time_major, + lstm_1_s16_batch_size, + lstm_1_s16_time_steps, + lstm_1_s16_input_size, + lstm_1_s16_hidden_size, + lstm_1_s16_input_zero_point, + lstm_1_s16_forget_to_cell_multiplier, + lstm_1_s16_forget_to_cell_shift, + lstm_1_s16_input_to_cell_multiplier, + lstm_1_s16_input_to_cell_shift, + lstm_1_s16_cell_clip, + lstm_1_s16_cell_scale_power, + lstm_1_s16_output_multiplier, + lstm_1_s16_output_shift, + lstm_1_s16_output_zero_point, + gate_forget, + gate_input, + gate_cell, + gate_output}; + + cmsis_nn_lstm_context buffers; + buffers.temp1 = buffer1; + buffers.temp2 = buffer2; + buffers.cell_state = buffer3; + + arm_cmsis_nn_status result = arm_lstm_unidirectional_s16(lstm_1_s16_input, output, ¶ms, &buffers); + + TEST_ASSERT_EQUAL(expected, result); + TEST_ASSERT_TRUE(validate_s16(output, output_ref, output_ref_size)); +} +void lstm_2_s16(void) +{ + int16_t output[lstm_2_s16_batch_size * lstm_2_s16_time_steps * lstm_2_s16_hidden_size] = {0}; + const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS; + const int16_t *output_ref = &lstm_2_s16_output[0]; + const int32_t output_ref_size = lstm_2_s16_batch_size * lstm_2_s16_time_steps * lstm_2_s16_hidden_size; + + int64_t input_data_kernel_sum[lstm_2_s16_hidden_size]; + int64_t forget_data_kernel_sum[lstm_2_s16_hidden_size]; + int64_t cell_data_kernel_sum[lstm_2_s16_hidden_size]; + int64_t output_data_kernel_sum[lstm_2_s16_hidden_size]; + + int64_t input_hidden_kernel_sum[lstm_2_s16_hidden_size]; + int64_t forget_hidden_kernel_sum[lstm_2_s16_hidden_size]; + int64_t cell_hidden_kernel_sum[lstm_2_s16_hidden_size]; + int64_t output_hidden_kernel_sum[lstm_2_s16_hidden_size]; + + arm_vector_sum_s8_s64(&input_data_kernel_sum[0], + lstm_2_s16_input_size, + lstm_2_s16_hidden_size, + &lstm_2_s16_input_gate_input_weights[0], + lstm_2_s16_input_zero_point, + &lstm_2_s16_input_gate_bias[0]); + arm_vector_sum_s8_s64(&forget_data_kernel_sum[0], + lstm_2_s16_input_size, + lstm_2_s16_hidden_size, + &lstm_2_s16_forget_gate_input_weights[0], + lstm_2_s16_input_zero_point, + &lstm_2_s16_forget_gate_bias[0]); + arm_vector_sum_s8_s64(&cell_data_kernel_sum[0], + lstm_2_s16_input_size, + lstm_2_s16_hidden_size, + &lstm_2_s16_cell_gate_input_weights[0], + lstm_2_s16_input_zero_point, + &lstm_2_s16_cell_gate_bias[0]); + arm_vector_sum_s8_s64(&output_data_kernel_sum[0], + lstm_2_s16_input_size, + lstm_2_s16_hidden_size, + &lstm_2_s16_output_gate_input_weights[0], + lstm_2_s16_input_zero_point, + &lstm_2_s16_output_gate_bias[0]); + + arm_vector_sum_s8_s64(&input_hidden_kernel_sum[0], + lstm_2_s16_hidden_size, + lstm_2_s16_hidden_size, + &lstm_2_s16_input_gate_hidden_weights[0], + -lstm_2_s16_output_zero_point, + NULL); + arm_vector_sum_s8_s64(&forget_hidden_kernel_sum[0], + lstm_2_s16_hidden_size, + lstm_2_s16_hidden_size, + &lstm_2_s16_forget_gate_hidden_weights[0], + -lstm_2_s16_output_zero_point, + NULL); + arm_vector_sum_s8_s64(&cell_hidden_kernel_sum[0], + lstm_2_s16_hidden_size, + lstm_2_s16_hidden_size, + &lstm_2_s16_cell_gate_hidden_weights[0], + -lstm_2_s16_output_zero_point, + NULL); + arm_vector_sum_s8_s64(&output_hidden_kernel_sum[0], + lstm_2_s16_hidden_size, + lstm_2_s16_hidden_size, + &lstm_2_s16_output_gate_hidden_weights[0], + -lstm_2_s16_output_zero_point, + NULL); + + // INPUT GATE + const cmsis_nn_lstm_gate gate_input = {lstm_2_s16_input_gate_input_multiplier, + lstm_2_s16_input_gate_input_shift, + &lstm_2_s16_input_gate_input_weights[0], + &input_data_kernel_sum[0], + lstm_2_s16_input_gate_hidden_multiplier, + lstm_2_s16_input_gate_hidden_shift, + &lstm_2_s16_input_gate_hidden_weights[0], + &input_hidden_kernel_sum[0], + &lstm_2_s16_input_gate_bias[0], + ARM_SIGMOID}; + + // FORGET GATE + const cmsis_nn_lstm_gate gate_forget = {lstm_2_s16_forget_gate_input_multiplier, + lstm_2_s16_forget_gate_input_shift, + &lstm_2_s16_forget_gate_input_weights[0], + &forget_data_kernel_sum[0], + lstm_2_s16_forget_gate_hidden_multiplier, + lstm_2_s16_forget_gate_hidden_shift, + &lstm_2_s16_forget_gate_hidden_weights[0], + &forget_hidden_kernel_sum[0], + &lstm_2_s16_forget_gate_bias[0], + ARM_SIGMOID}; + + // CELL GATE + const cmsis_nn_lstm_gate gate_cell = {lstm_2_s16_cell_gate_input_multiplier, + lstm_2_s16_cell_gate_input_shift, + &lstm_2_s16_cell_gate_input_weights[0], + &cell_data_kernel_sum[0], + lstm_2_s16_cell_gate_hidden_multiplier, + lstm_2_s16_cell_gate_hidden_shift, + &lstm_2_s16_cell_gate_hidden_weights[0], + &cell_hidden_kernel_sum[0], + &lstm_2_s16_cell_gate_bias[0], + ARM_TANH}; + + // OUTPUT GATE + const cmsis_nn_lstm_gate gate_output = {lstm_2_s16_output_gate_input_multiplier, + lstm_2_s16_output_gate_input_shift, + &lstm_2_s16_output_gate_input_weights[0], + &output_data_kernel_sum[0], + lstm_2_s16_output_gate_hidden_multiplier, + lstm_2_s16_output_gate_hidden_shift, + &lstm_2_s16_output_gate_hidden_weights[0], + &output_hidden_kernel_sum[0], + &lstm_2_s16_output_gate_bias[0], + ARM_SIGMOID}; + + // LSTM DATA + const cmsis_nn_lstm_params params = {lstm_2_s16_time_major, + lstm_2_s16_batch_size, + lstm_2_s16_time_steps, + lstm_2_s16_input_size, + lstm_2_s16_hidden_size, + lstm_2_s16_input_zero_point, + lstm_2_s16_forget_to_cell_multiplier, + lstm_2_s16_forget_to_cell_shift, + lstm_2_s16_input_to_cell_multiplier, + lstm_2_s16_input_to_cell_shift, + lstm_2_s16_cell_clip, + lstm_2_s16_cell_scale_power, + lstm_2_s16_output_multiplier, + lstm_2_s16_output_shift, + lstm_2_s16_output_zero_point, + gate_forget, + gate_input, + gate_cell, + gate_output}; + + cmsis_nn_lstm_context buffers; + buffers.temp1 = buffer1; + buffers.temp2 = buffer2; + buffers.cell_state = buffer3; + + arm_cmsis_nn_status result = arm_lstm_unidirectional_s16(lstm_2_s16_input, output, ¶ms, &buffers); + + TEST_ASSERT_EQUAL(expected, result); + TEST_ASSERT_TRUE(validate_s16(output, output_ref, output_ref_size)); +} +void lstm_one_time_step_s16(void) +{ + int16_t output[lstm_one_time_step_s16_batch_size * lstm_one_time_step_s16_time_steps * + lstm_one_time_step_s16_hidden_size] = {0}; + const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS; + const int16_t *output_ref = &lstm_one_time_step_s16_output[0]; + const int32_t output_ref_size = + lstm_one_time_step_s16_batch_size * lstm_one_time_step_s16_time_steps * lstm_one_time_step_s16_hidden_size; + + int64_t input_data_kernel_sum[lstm_one_time_step_s16_hidden_size]; + int64_t forget_data_kernel_sum[lstm_one_time_step_s16_hidden_size]; + int64_t cell_data_kernel_sum[lstm_one_time_step_s16_hidden_size]; + int64_t output_data_kernel_sum[lstm_one_time_step_s16_hidden_size]; + + int64_t input_hidden_kernel_sum[lstm_one_time_step_s16_hidden_size]; + int64_t forget_hidden_kernel_sum[lstm_one_time_step_s16_hidden_size]; + int64_t cell_hidden_kernel_sum[lstm_one_time_step_s16_hidden_size]; + int64_t output_hidden_kernel_sum[lstm_one_time_step_s16_hidden_size]; + + arm_vector_sum_s8_s64(&input_data_kernel_sum[0], + lstm_one_time_step_s16_input_size, + lstm_one_time_step_s16_hidden_size, + &lstm_one_time_step_s16_input_gate_input_weights[0], + lstm_one_time_step_s16_input_zero_point, + &lstm_one_time_step_s16_input_gate_bias[0]); + arm_vector_sum_s8_s64(&forget_data_kernel_sum[0], + lstm_one_time_step_s16_input_size, + lstm_one_time_step_s16_hidden_size, + &lstm_one_time_step_s16_forget_gate_input_weights[0], + lstm_one_time_step_s16_input_zero_point, + &lstm_one_time_step_s16_forget_gate_bias[0]); + arm_vector_sum_s8_s64(&cell_data_kernel_sum[0], + lstm_one_time_step_s16_input_size, + lstm_one_time_step_s16_hidden_size, + &lstm_one_time_step_s16_cell_gate_input_weights[0], + lstm_one_time_step_s16_input_zero_point, + &lstm_one_time_step_s16_cell_gate_bias[0]); + arm_vector_sum_s8_s64(&output_data_kernel_sum[0], + lstm_one_time_step_s16_input_size, + lstm_one_time_step_s16_hidden_size, + &lstm_one_time_step_s16_output_gate_input_weights[0], + lstm_one_time_step_s16_input_zero_point, + &lstm_one_time_step_s16_output_gate_bias[0]); + + arm_vector_sum_s8_s64(&input_hidden_kernel_sum[0], + lstm_one_time_step_s16_hidden_size, + lstm_one_time_step_s16_hidden_size, + &lstm_one_time_step_s16_input_gate_hidden_weights[0], + -lstm_one_time_step_s16_output_zero_point, + NULL); + arm_vector_sum_s8_s64(&forget_hidden_kernel_sum[0], + lstm_one_time_step_s16_hidden_size, + lstm_one_time_step_s16_hidden_size, + &lstm_one_time_step_s16_forget_gate_hidden_weights[0], + -lstm_one_time_step_s16_output_zero_point, + NULL); + arm_vector_sum_s8_s64(&cell_hidden_kernel_sum[0], + lstm_one_time_step_s16_hidden_size, + lstm_one_time_step_s16_hidden_size, + &lstm_one_time_step_s16_cell_gate_hidden_weights[0], + -lstm_one_time_step_s16_output_zero_point, + NULL); + arm_vector_sum_s8_s64(&output_hidden_kernel_sum[0], + lstm_one_time_step_s16_hidden_size, + lstm_one_time_step_s16_hidden_size, + &lstm_one_time_step_s16_output_gate_hidden_weights[0], + -lstm_one_time_step_s16_output_zero_point, + NULL); + + // INPUT GATE + const cmsis_nn_lstm_gate gate_input = {lstm_one_time_step_s16_input_gate_input_multiplier, + lstm_one_time_step_s16_input_gate_input_shift, + &lstm_one_time_step_s16_input_gate_input_weights[0], + &input_data_kernel_sum[0], + lstm_one_time_step_s16_input_gate_hidden_multiplier, + lstm_one_time_step_s16_input_gate_hidden_shift, + &lstm_one_time_step_s16_input_gate_hidden_weights[0], + &input_hidden_kernel_sum[0], + &lstm_one_time_step_s16_input_gate_bias[0], + ARM_SIGMOID}; + + // FORGET GATE + const cmsis_nn_lstm_gate gate_forget = {lstm_one_time_step_s16_forget_gate_input_multiplier, + lstm_one_time_step_s16_forget_gate_input_shift, + &lstm_one_time_step_s16_forget_gate_input_weights[0], + &forget_data_kernel_sum[0], + lstm_one_time_step_s16_forget_gate_hidden_multiplier, + lstm_one_time_step_s16_forget_gate_hidden_shift, + &lstm_one_time_step_s16_forget_gate_hidden_weights[0], + &forget_hidden_kernel_sum[0], + &lstm_one_time_step_s16_forget_gate_bias[0], + ARM_SIGMOID}; + + // CELL GATE + const cmsis_nn_lstm_gate gate_cell = {lstm_one_time_step_s16_cell_gate_input_multiplier, + lstm_one_time_step_s16_cell_gate_input_shift, + &lstm_one_time_step_s16_cell_gate_input_weights[0], + &cell_data_kernel_sum[0], + lstm_one_time_step_s16_cell_gate_hidden_multiplier, + lstm_one_time_step_s16_cell_gate_hidden_shift, + &lstm_one_time_step_s16_cell_gate_hidden_weights[0], + &cell_hidden_kernel_sum[0], + &lstm_one_time_step_s16_cell_gate_bias[0], + ARM_TANH}; + + // OUTPUT GATE + const cmsis_nn_lstm_gate gate_output = {lstm_one_time_step_s16_output_gate_input_multiplier, + lstm_one_time_step_s16_output_gate_input_shift, + &lstm_one_time_step_s16_output_gate_input_weights[0], + &output_data_kernel_sum[0], + lstm_one_time_step_s16_output_gate_hidden_multiplier, + lstm_one_time_step_s16_output_gate_hidden_shift, + &lstm_one_time_step_s16_output_gate_hidden_weights[0], + &output_hidden_kernel_sum[0], + &lstm_one_time_step_s16_output_gate_bias[0], + ARM_SIGMOID}; + + // LSTM DATA + const cmsis_nn_lstm_params params = {lstm_one_time_step_s16_time_major, + lstm_one_time_step_s16_batch_size, + lstm_one_time_step_s16_time_steps, + lstm_one_time_step_s16_input_size, + lstm_one_time_step_s16_hidden_size, + lstm_one_time_step_s16_input_zero_point, + lstm_one_time_step_s16_forget_to_cell_multiplier, + lstm_one_time_step_s16_forget_to_cell_shift, + lstm_one_time_step_s16_input_to_cell_multiplier, + lstm_one_time_step_s16_input_to_cell_shift, + lstm_one_time_step_s16_cell_clip, + lstm_one_time_step_s16_cell_scale_power, + lstm_one_time_step_s16_output_multiplier, + lstm_one_time_step_s16_output_shift, + lstm_one_time_step_s16_output_zero_point, + gate_forget, + gate_input, + gate_cell, + gate_output}; + + cmsis_nn_lstm_context buffers; + buffers.temp1 = buffer1; + buffers.temp2 = buffer2; + buffers.cell_state = buffer3; + + arm_cmsis_nn_status result = arm_lstm_unidirectional_s16(lstm_one_time_step_s16_input, output, ¶ms, &buffers); + + TEST_ASSERT_EQUAL(expected, result); + TEST_ASSERT_TRUE(validate_s16(output, output_ref, output_ref_size)); +} \ No newline at end of file