Change SVDF MVE memmove to faster arm_memcpy_s8 (ARM-software#147)

AdrianLundell · Oct 1, 2024 · 411ff0d · 411ff0d
1 parent 21a5bba
commit 411ff0d
Show file tree

Hide file tree

Showing 2 changed files with 26 additions and 10 deletions.
diff --git a/Source/SVDFunctions/arm_svdf_s8.c b/Source/SVDFunctions/arm_svdf_s8.c
@@ -21,8 +21,8 @@
  * Title:        arm_svdf_s8.c
  * Description:  S8 basic SVDF layer function
  *
- * $Date:        14 Feb 2024
- * $Revision:    V.6.1.0
+ * $Date:        24 Sep 2024
+ * $Revision:    V.6.1.1
  *
  * Target :  Arm(R) M-Profile Architecture
  *
@@ -110,9 +110,17 @@ arm_cmsis_nn_status arm_svdf_s8(const cmsis_nn_context *ctx,
     int32_t *kernel_sum_data = (int32_t *)ctx->buf;
 
     // Left shift state
-    memmove((int8_t *)state_data,
-            (int8_t *)state_data + 1,
+    // Using memcpy on overlapping data is in general undefined behaviour, but since the behaviour of arm_memcpy_s8 is
+    // known it is certain that the data has been copied before it is overwritten in this case.
+#ifdef ARM_MATH_MVEI
+    arm_memcpy_s8(state_data,
+                  state_data + 1,
+                  (size_t)((input_batches * feature_batches * time_batches - 1) * (int32_t)sizeof(int8_t)));
+#else
+    memmove(state_data,
+            state_data + 1,
             (size_t)((input_batches * feature_batches * time_batches - 1) * (int32_t)sizeof(int8_t)));
+#endif
 
     // Matrix multiplication input * feature weight
     for (int i_batch = 0; i_batch < input_batches; i_batch++)

diff --git a/Source/SVDFunctions/arm_svdf_state_s16_s8.c b/Source/SVDFunctions/arm_svdf_state_s16_s8.c
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright 2010-2023 Arm Limited and/or its affiliates <[email protected]>
+ * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates <[email protected]>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -18,11 +18,11 @@
 
 /* ----------------------------------------------------------------------
  * Project:      CMSIS NN Library
- * Title:        arm_svdf_s8.c
+ * Title:        arm_svdf_state_s16_s8.c
  * Description:  S8 basic SVDF layer function with s16 state tensor
  *
- * $Date:        5 January 2023
- * $Revision:    V.3.1.0
+ * $Date:        24 Sep 2024
+ * $Revision:    V.3.1.1
  *
  * Target :  Arm(R) M-Profile Architecture
  *
@@ -100,9 +100,17 @@ arm_cmsis_nn_status arm_svdf_state_s16_s8(const cmsis_nn_context *input_ctx,
     int32_t *buffer_b = (int32_t *)output_ctx->buf;
 
     // Left shift state
-    memmove((int16_t *)state_data,
-            (int16_t *)state_data + 1,
+    // Using memcpy on overlapping data is in general undefined behaviour, but since the behaviour of arm_memcpy_s8 is
+    // known it is certain that the data has been copied before it is overwritten in this case.
+#ifdef ARM_MATH_MVEI
+    arm_memcpy_s8((int8_t *)state_data,
+                  (int8_t *)(state_data + 1),
+                  (size_t)((input_batches * feature_batches * time_batches - 1) * (int32_t)sizeof(int16_t)));
+#else
+    memmove(state_data,
+            state_data + 1,
             (size_t)((input_batches * feature_batches * time_batches - 1) * (int32_t)sizeof(int16_t)));
+#endif
 
     // Matrix multiplication input * feature weight
     for (int i_batch = 0; i_batch < input_batches; i_batch++)