Skip to content

Commit

Permalink
Change SVDF MVE memmove to faster arm_memcpy_s8 (ARM-software#147)
Browse files Browse the repository at this point in the history
  • Loading branch information
AdrianLundell authored Oct 1, 2024
1 parent 21a5bba commit 411ff0d
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 10 deletions.
16 changes: 12 additions & 4 deletions Source/SVDFunctions/arm_svdf_s8.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
* Title: arm_svdf_s8.c
* Description: S8 basic SVDF layer function
*
* $Date: 14 Feb 2024
* $Revision: V.6.1.0
* $Date: 24 Sep 2024
* $Revision: V.6.1.1
*
* Target : Arm(R) M-Profile Architecture
*
Expand Down Expand Up @@ -110,9 +110,17 @@ arm_cmsis_nn_status arm_svdf_s8(const cmsis_nn_context *ctx,
int32_t *kernel_sum_data = (int32_t *)ctx->buf;

// Left shift state
memmove((int8_t *)state_data,
(int8_t *)state_data + 1,
// Using memcpy on overlapping data is in general undefined behaviour, but since the behaviour of arm_memcpy_s8 is
// known it is certain that the data has been copied before it is overwritten in this case.
#ifdef ARM_MATH_MVEI
arm_memcpy_s8(state_data,
state_data + 1,
(size_t)((input_batches * feature_batches * time_batches - 1) * (int32_t)sizeof(int8_t)));
#else
memmove(state_data,
state_data + 1,
(size_t)((input_batches * feature_batches * time_batches - 1) * (int32_t)sizeof(int8_t)));
#endif

// Matrix multiplication input * feature weight
for (int i_batch = 0; i_batch < input_batches; i_batch++)
Expand Down
20 changes: 14 additions & 6 deletions Source/SVDFunctions/arm_svdf_state_s16_s8.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright 2010-2023 Arm Limited and/or its affiliates <[email protected]>
* SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates <[email protected]>
*
* SPDX-License-Identifier: Apache-2.0
*
Expand All @@ -18,11 +18,11 @@

/* ----------------------------------------------------------------------
* Project: CMSIS NN Library
* Title: arm_svdf_s8.c
* Title: arm_svdf_state_s16_s8.c
* Description: S8 basic SVDF layer function with s16 state tensor
*
* $Date: 5 January 2023
* $Revision: V.3.1.0
* $Date: 24 Sep 2024
* $Revision: V.3.1.1
*
* Target : Arm(R) M-Profile Architecture
*
Expand Down Expand Up @@ -100,9 +100,17 @@ arm_cmsis_nn_status arm_svdf_state_s16_s8(const cmsis_nn_context *input_ctx,
int32_t *buffer_b = (int32_t *)output_ctx->buf;

// Left shift state
memmove((int16_t *)state_data,
(int16_t *)state_data + 1,
// Using memcpy on overlapping data is in general undefined behaviour, but since the behaviour of arm_memcpy_s8 is
// known it is certain that the data has been copied before it is overwritten in this case.
#ifdef ARM_MATH_MVEI
arm_memcpy_s8((int8_t *)state_data,
(int8_t *)(state_data + 1),
(size_t)((input_batches * feature_batches * time_batches - 1) * (int32_t)sizeof(int16_t)));
#else
memmove(state_data,
state_data + 1,
(size_t)((input_batches * feature_batches * time_batches - 1) * (int32_t)sizeof(int16_t)));
#endif

// Matrix multiplication input * feature weight
for (int i_batch = 0; i_batch < input_batches; i_batch++)
Expand Down

0 comments on commit 411ff0d

Please sign in to comment.