Skip to content

Commit

Permalink
Add int8 padding operator and unit tests
Browse files Browse the repository at this point in the history
Change-Id: I8d33d05a0389d9fac64545d3dbd68b9b683a0c96
  • Loading branch information
AdrianLundell committed Oct 15, 2024
1 parent 411ff0d commit 6e767ef
Show file tree
Hide file tree
Showing 22 changed files with 531 additions and 51 deletions.
1 change: 1 addition & 0 deletions ARM.CMSIS-NN.pdsc
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@
<file category="source" name="Source/SoftmaxFunctions/arm_softmax_s8_s16.c"/>
<file category="source" name="Source/SoftmaxFunctions/arm_softmax_s16.c"/>
<file category="source" name="Source/SoftmaxFunctions/arm_softmax_u8.c"/>
<file category="source" name="Source/PadFunctions/arm_pad_s8.c"/>
</files>
</component>
</components>
Expand Down
28 changes: 26 additions & 2 deletions Include/arm_nnfunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
* Title: arm_nnfunctions.h
* Description: Public header file for CMSIS NN Library
*
* $Date: 5 Sep 2024
* $Revision: V.17.0.0
* $Date: 19 Sep 2024
* $Revision: V.17.1.0
*
* Target : Arm(R) M-Profile Architecture
* -------------------------------------------------------------------- */
Expand Down Expand Up @@ -2780,6 +2780,30 @@ arm_cmsis_nn_status arm_batch_matmul_s16(const cmsis_nn_context *ctx,
const cmsis_nn_dims *output_dims,
int16_t *output);

/**
* @defgroup Pad Pad Layer Functions:
*
*/

/**
* @brief Expands the size of the input by adding constant values before and after the data, in all dimensions.
*
* @param[in] input Pointer to input data
* @param[out] output Pointer to output data
* @param[in] pad_value Value to pad with
* @param[in] input_size Input tensor dimensions
* @param[in] pre_pad Padding to apply before data in each dimension
* @param[in] post_pad Padding to apply after data in each dimension
*
* @return The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
*
*/
arm_cmsis_nn_status arm_pad_s8(const int8_t *input,
int8_t *output,
const int8_t pad_value,
const cmsis_nn_dims *input_size,
const cmsis_nn_dims *pre_pad,
const cmsis_nn_dims *post_pad);
#ifdef __cplusplus
}
#endif
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ Examples are Cortex-M55 or Cortex-M85 configured with MVE.
| Softmax | Yes | Yes | N/A | Yes | Yes | N/A | Yes | No | N/A |
| LSTM | Yes | Yes | No | Yes | Yes | No | Yes | Yes | No |
| SVDF | Yes | No | No | Yes | No | No | Yes | No | No |
| Pad | Yes | No | No | Yes | No | No | Yes | No | No |

* int4 weights + int8 activations

Expand Down Expand Up @@ -89,7 +90,7 @@ cmake .. -DCMAKE_TOOLCHAIN_FILE=</path/to/ethos-u-core-platform>/cmake/toolchain
```

### Compiler Options
Default optimization level is set at Ofast. This can be overwritten with CMake on command line by using <nobr>*"-DCMSIS_OPTIMIZATION_LEVEL"*</nobr>. Please change according to project needs.
Default optimization level is set at Ofast. This can be overwritten with CMake on command line by using <nobr>*"-DCMSIS_OPTIMIZATION_LEVEL"*</nobr>. Please change according to project needs.
Just bear in mind this can impact performance. With only optimization level -O0, *ARM_MATH_AUTOVECTORIZE* needs to be defined for processors with Helium
Technology.

Expand Down
5 changes: 5 additions & 0 deletions Source/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ option(BASICMATHSNN "Basic Maths for NN" ON)
option(RESHAPE "Reshape" ON)
option(SVDF "SVDF" ON)
option(LSTM "LSTM" ON)
option(PAD "Pad" ON)

# Always needed if any other module above is on.
option(NNSUPPORT "NN Support" ON)
Expand Down Expand Up @@ -81,6 +82,10 @@ if (RESHAPE)
add_subdirectory(ReshapeFunctions)
endif()

if (PAD)
add_subdirectory(PadFunctions)
endif()

# Keep NNSUPPORT at the end
if (NNSUPPORT)
add_subdirectory(NNSupportFunctions)
Expand Down
20 changes: 20 additions & 0 deletions Source/PadFunctions/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#
# SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates <[email protected]>
#
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the License); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an AS IS BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

file(GLOB SRC "./*_s8.c")
target_sources(cmsis-nn PRIVATE ${SRC})
117 changes: 117 additions & 0 deletions Source/PadFunctions/arm_pad_s8.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@

/*
* SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates <[email protected]>
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/* ----------------------------------------------------------------------
* Project: CMSIS NN Library
* Title: arm_pad_s8.c
* Description: Pad a s8 vector
*
* $Date: 19 Sep 2024
* $Revision: V.1.0.0
*
* Target : Arm(R) M-Profile Architecture
*
* -------------------------------------------------------------------- */

#include "arm_nn_types.h"
#include "arm_nnfunctions.h"
#include "arm_nnsupportfunctions.h"
/**
* @ingroup Public
*/

/**
* @addtogroup Pad
* @{
*/

/*
* Basic s8 pad function.
*
* Refer header file for details.
*
*/

arm_cmsis_nn_status arm_pad_s8(const int8_t *input,
int8_t *output,
const int8_t pad_value,
const cmsis_nn_dims *input_size,
const cmsis_nn_dims *pre_pad,
const cmsis_nn_dims *post_pad)
{

const cmsis_nn_dims output_size = {pre_pad->n + input_size->n + post_pad->n,
pre_pad->h + input_size->h + post_pad->h,
pre_pad->w + input_size->w + post_pad->w,
pre_pad->c + input_size->c + post_pad->c};

const int32_t batch_block_size = output_size.h * output_size.w * output_size.c;
const int32_t row_block_size = output_size.w * output_size.c;
const int32_t col_block_size = output_size.c;

arm_memset_s8(output, pad_value, batch_block_size * pre_pad->n);
output += batch_block_size * pre_pad->n;
for (int32_t b = 0; b < input_size->n; b++)
{

arm_memset_s8(output, pad_value, row_block_size * pre_pad->h);
output += row_block_size * pre_pad->h;
for (int32_t y = 0; y < input_size->h; y++)
{

arm_memset_s8(output, pad_value, col_block_size * pre_pad->w);
output += col_block_size * pre_pad->w;
if (input_size->c == output_size.c)
{
arm_memcpy_s8(output, input, input_size->w * input_size->c);
output += input_size->w * input_size->c;
input += input_size->w * input_size->c;
}
else
{
for (int32_t x = 0; x < input_size->w; x++)
{

arm_memset_s8(output, pad_value, pre_pad->c);
output += pre_pad->c;

arm_memcpy_s8(output, input, input_size->c);
output += input_size->c;
input += input_size->c;

arm_memset_s8(output, pad_value, post_pad->c);
output += post_pad->c;
}
}

arm_memset_s8(output, pad_value, col_block_size * post_pad->w);
output += col_block_size * post_pad->w;
}

arm_memset_s8(output, pad_value, row_block_size * post_pad->h);
output += row_block_size * post_pad->h;
}
arm_memset_s8(output, pad_value, batch_block_size * post_pad->n);

return ARM_CMSIS_NN_SUCCESS;
}

/**
* @} end of Pad group
*/
1 change: 1 addition & 0 deletions Tests/UnitTest/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ add_subdirectory(TestCases/test_arm_transpose_conv_s8)
add_subdirectory(TestCases/test_arm_lstm_unidirectional_s16)
add_subdirectory(TestCases/test_arm_batch_matmul_s8)
add_subdirectory(TestCases/test_arm_batch_matmul_s16)
add_subdirectory(TestCases/test_arm_pad_s8)

set(MAKE_CMD "python3")
set(MAKE_CMD_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/unittest_targets.py")
Expand Down
8 changes: 5 additions & 3 deletions Tests/UnitTest/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ If in a virtual environment just start by upgrading pip.
pip install --upgrade pip
```

After upgrading pip, the requirements file found in Tests/UnitTests can be installed. This contains all
After upgrading pip, the requirements file found in Tests/UnitTests can be installed. This contains all
python modules required to run all of the scripts. This will install tensorflow and keras to allow the use of
the generate_test_data.py script. If you have version specific requirements, it is reccomended to install this
requirements.txt in a virtual environment.
Expand Down Expand Up @@ -74,11 +74,11 @@ The easiest way to run the unit tests on Corstone-300 is to use the build_and_ru

Sample usage:
```
./build_and_run_tests.sh -c cortex-m3,cortex-m7,cortex-m55 -o '-Ofast'
./build_and_run_tests.sh -c cortex-m3,cortex-m7,cortex-m55 -o '-Ofast'
```
By default the script will download and target gcc. To use arm compiler ensure that arm compilers folder is located in path, export CC and use the -a option on the script.

Downloaded dependencies including python venv can be found in Tests/UnitTests/downloads. Test elfs can be found in Tests/UnitTests/build-($cpu) directories.
Downloaded dependencies including python venv can be found in Tests/UnitTests/downloads. Test elfs can be found in Tests/UnitTests/build-($cpu) directories.

Otherwise, you can build it manually:

Expand Down Expand Up @@ -150,6 +150,7 @@ Operator bit-exactness compability:
| add | x | x |
| mul | x | x |
| batch matmul | x | x |
| pad | x | x |

### Refactoring of generate_test_data.py
Test data generation is in progress of incrementally moving over to the cleaned up scripts placed in `RefactoredTestGen`.
Expand All @@ -176,6 +177,7 @@ Current progress:
| add | x | |
| mul | x | |
| batch matmul | | x |
| pad | | x |


## Overview of the Folders
Expand Down
69 changes: 69 additions & 0 deletions Tests/UnitTest/RefactoredTestGen/Lib/op_pad.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates <[email protected]>
#
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the License); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an AS IS BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import Lib.op_utils
import tensorflow as tf
import math
import numpy as np

from tensorflow.lite.python.interpreter import Interpreter
from tensorflow.lite.python.interpreter import OpResolverType
import tf_keras as keras

class Op_pad(Lib.op_utils.Op_type):

def get_shapes(params):
shapes = {}
shapes["input_tensor"] = (params["input_n"], params["input_h"], params["input_w"], params["input_c"])
shapes["representational_dataset"] = shapes["input_tensor"]

return shapes

def generate_keras_model(shapes, params):

model = keras.models.Sequential()
model.add(keras.layers.InputLayer(input_shape=shapes["input_tensor"][1:]))

if (params["pre_pad_n"] == params["post_pad_n"] == params["pre_pad_h"] == params["post_pad_h"] == 0):
model.add(keras.layers.ZeroPadding2D(padding=((params["pre_pad_w"], params["post_pad_w"]), (params["pre_pad_c"], params["post_pad_c"])), data_format="channels_first"))
elif (params["pre_pad_n"] == params["post_pad_n"] == params["pre_pad_c"] == params["post_pad_c"] == 0):
model.add(keras.layers.ZeroPadding2D(padding=((params["pre_pad_h"], params["post_pad_h"]), (params["pre_pad_w"], params["post_pad_w"])), data_format="channels_last"))
else:
raise ValueError(f"Keras can only generate padding for (h,w) or (w,c), the others must be zero.")

return model

def generate_data_tflite(tflite_fname, params):
tensors = {}
effective_scales = {}
scales = {}
generated_params = {}

generated_params["pad_value"] = -128

interpreter = Interpreter(str(tflite_fname), experimental_op_resolver_type=OpResolverType.BUILTIN_REF)
interpreter.allocate_tensors()

output_details = interpreter.get_output_details()
output_n = output_details[0]['shape'][3]
output_h = output_details[0]['shape'][2]
output_w = output_details[0]['shape'][1]
output_c = output_details[0]['shape'][0]

generated_params["output_size"] = output_n * output_h * output_w * output_c;

return Lib.op_utils.Generated_data(generated_params, tensors, scales, effective_scales)

3 changes: 3 additions & 0 deletions Tests/UnitTest/RefactoredTestGen/Lib/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import Lib.op_batch_matmul
import Lib.op_fully_connected
import Lib.op_pooling
import Lib.op_pad
import tensorflow as tf
import numpy as np
from tensorflow.lite.python.interpreter import Interpreter
Expand Down Expand Up @@ -185,6 +186,8 @@ def get_op_type(op_type_string):
return Lib.op_fully_connected.Op_fully_connected
if op_type_string == "avgpool" or op_type_string == "maxpool":
return Lib.op_pooling.Op_pooling
if op_type_string == "pad":
return Lib.op_pad.Op_pad
else:
raise ValueError(f"Unknown op type '{op_type_string}'")

Expand Down
Loading

0 comments on commit 6e767ef

Please sign in to comment.