diff --git a/README.md b/README.md
index 1c3ba33..71f654e 100644
--- a/README.md
+++ b/README.md
@@ -101,114 +101,214 @@
         <th width="90">Score (Accuracy)</th>
     </tr>
     <tr>
-        <td><a href="models/keyword_spotting/cnn_large/tflite_int8">CNN Large INT8 *</a></td>
+        <td><a href="models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_int8">CNN Large INT8 *</a></td>
         <td align="center">INT8</td>
         <td align="center">TensorFlow Lite</td>
         <td align="center">:heavy_check_mark: </td>
         <td align="center">:heavy_check_mark: </td>
         <td align="center">:heavy_check_mark: </td>
         <td align="center">:heavy_check_mark: </td>
-        <td align="center">0.931</td>
+        <td align="center">0.923</td>
     </tr>
     <tr>
-        <td><a href="models/keyword_spotting/cnn_medium/tflite_int8">CNN Medium INT8 *</a></td>
+        <td><a href="models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8">CNN Medium INT8 *</a></td>
         <td align="center">INT8</td>
         <td align="center">TensorFlow Lite</td>
         <td align="center">:heavy_check_mark: </td>
         <td align="center">:heavy_check_mark: </td>
         <td align="center">:heavy_check_mark: </td>
         <td align="center">:heavy_check_mark: </td>
-        <td align="center">0.911</td>
+        <td align="center">0.905</td>
     </tr>
     <tr>
-        <td><a href="models/keyword_spotting/cnn_small/tflite_int8">CNN Small INT8 *</a></td>
+        <td><a href="models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_int8">CNN Small INT8 *</a></td>
         <td align="center">INT8</td>
         <td align="center">TensorFlow Lite</td>
         <td align="center">:heavy_check_mark: </td>
         <td align="center">:heavy_check_mark: </td>
         <td align="center">:heavy_check_mark: </td>
         <td align="center">:heavy_check_mark: </td>
-        <td align="center">0.912</td>
+        <td align="center">0.902</td>
     </tr>
     <tr>
-        <td><a href="models/keyword_spotting/dnn_large/tflite_int8">DNN Large INT8 *</a></td>
+        <td><a href="models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_int8">DNN Large INT8 *</a></td>
         <td align="center">INT8</td>
         <td align="center">TensorFlow Lite</td>
         <td align="center">:heavy_check_mark: </td>
         <td align="center">:heavy_check_mark: </td>
         <td align="center">:heavy_check_mark: </td>
         <td align="center">:heavy_check_mark: </td>
-        <td align="center">0.863</td>
+        <td align="center">0.860</td>
     </tr>
     <tr>
-        <td><a href="models/keyword_spotting/dnn_medium/tflite_int8">DNN Medium INT8 *</a></td>
+        <td><a href="models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_int8">DNN Medium INT8 *</a></td>
         <td align="center">INT8</td>
         <td align="center">TensorFlow Lite</td>
         <td align="center">:heavy_check_mark: </td>
         <td align="center">:heavy_check_mark: </td>
         <td align="center">:heavy_check_mark: </td>
         <td align="center">:heavy_check_mark: </td>
-        <td align="center">0.844</td>
+        <td align="center">0.839</td>
     </tr>
     <tr>
-        <td><a href="models/keyword_spotting/dnn_small/tflite_int8">DNN Small INT8 *</a></td>
+        <td><a href="models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_int8">DNN Small INT8 *</a></td>
         <td align="center">INT8</td>
         <td align="center">TensorFlow Lite</td>
         <td align="center">:heavy_check_mark: </td>
         <td align="center">:heavy_check_mark: </td>
         <td align="center">:heavy_check_mark: </td>
         <td align="center">:heavy_check_mark: </td>
-        <td align="center">0.825</td>
+        <td align="center">0.821</td>
     </tr>
     <tr>
-        <td><a href="models/keyword_spotting/ds_cnn_large/tflite_clustered_fp32">DS-CNN Clustered FP32 *</a></td>
+        <td><a href="models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_fp32">DS-CNN Large Clustered FP32 *</a></td>
         <td align="center">FP32</td>
         <td align="center">TensorFlow Lite</td>
         <td align="center">:heavy_check_mark: </td>
-        <td align="center">:heavy_multiplication_x: </td>
+        <td align="center">:heavy_check_mark: </td>
         <td align="center">:heavy_check_mark: </td>
         <td align="center">:heavy_multiplication_x: </td>
-        <td align="center">0.950</td>
+        <td align="center">0.948</td>
     </tr>
     <tr>
-        <td><a href="models/keyword_spotting/ds_cnn_large/tflite_clustered_int8">DS-CNN Clustered INT8 *</a></td>
+        <td><a href="models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_int8">DS-CNN Large Clustered INT8 *</a></td>
         <td align="center">INT8</td>
         <td align="center">TensorFlow Lite</td>
-        <td align="center">:heavy_multiplication_x: </td>
         <td align="center">:heavy_check_mark: </td>
         <td align="center">:heavy_check_mark: </td>
         <td align="center">:heavy_check_mark: </td>
-        <td align="center">0.940</td>
+        <td align="center">:heavy_check_mark: </td>
+        <td align="center">0.939</td>
     </tr>
     <tr>
-        <td><a href="models/keyword_spotting/ds_cnn_large/tflite_int8">DS-CNN Large INT8 *</a></td>
+        <td><a href="models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_int8">DS-CNN Large INT8 *</a></td>
         <td align="center">INT8</td>
         <td align="center">TensorFlow Lite</td>
         <td align="center">:heavy_check_mark: </td>
         <td align="center">:heavy_check_mark: HERO</td>
         <td align="center">:heavy_check_mark: </td>
         <td align="center">:heavy_check_mark: </td>
-        <td align="center">0.946</td>
+        <td align="center">0.945</td>
     </tr>
     <tr>
-        <td><a href="models/keyword_spotting/ds_cnn_medium/tflite_int8">DS-CNN Medium INT8 *</a></td>
+        <td><a href="models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8">DS-CNN Medium INT8 *</a></td>
         <td align="center">INT8</td>
         <td align="center">TensorFlow Lite</td>
         <td align="center">:heavy_check_mark: </td>
         <td align="center">:heavy_check_mark: HERO</td>
         <td align="center">:heavy_check_mark: </td>
         <td align="center">:heavy_check_mark: </td>
-        <td align="center">0.941</td>
+        <td align="center">0.939</td>
     </tr>
     <tr>
-        <td><a href="models/keyword_spotting/ds_cnn_small/tflite_int8">DS-CNN Small INT8 *</a></td>
+        <td><a href="models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int8">DS-CNN Small INT8 *</a></td>
         <td align="center">INT8</td>
         <td align="center">TensorFlow Lite</td>
         <td align="center">:heavy_check_mark: </td>
         <td align="center">:heavy_check_mark: HERO</td>
         <td align="center">:heavy_check_mark: </td>
         <td align="center">:heavy_check_mark: </td>
-        <td align="center">0.935</td>
+        <td align="center">0.931</td>
+    </tr>
+        <tr>
+        <td><a href="models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int16">DS-CNN Small INT16 *</a></td>
+        <td align="center">INT16</td>
+        <td align="center">TensorFlow Lite</td>
+        <td align="center">:heavy_check_mark: </td>
+        <td align="center">:heavy_check_mark: HERO</td>
+        <td align="center">:heavy_check_mark: </td>
+        <td align="center">:heavy_check_mark: </td>
+        <td align="center">0.934</td>
+    </tr>
+    <tr>
+        <td><a href="models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32">CNN Large FP32 *</a></td>
+        <td align="center">FP32</td>
+        <td align="center">TensorFlow Lite</td>
+        <td align="center">:heavy_check_mark: </td>
+        <td align="center">:heavy_check_mark: </td>
+        <td align="center">:heavy_check_mark: </td>
+        <td align="center">:heavy_multiplication_x: </td>
+        <td align="center">0.934</td>
+    </tr>
+    <tr>
+        <td><a href="models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32">CNN Medium FP32 *</a></td>
+        <td align="center">FP32</td>
+        <td align="center">TensorFlow Lite</td>
+        <td align="center">:heavy_check_mark: </td>
+        <td align="center">:heavy_check_mark: </td>
+        <td align="center">:heavy_check_mark: </td>
+        <td align="center">:heavy_multiplication_x: </td>
+        <td align="center">0.918</td>
+    </tr>
+    <tr>
+        <td><a href="models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32">CNN Small FP32 *</a></td>
+        <td align="center">FP32</td>
+        <td align="center">TensorFlow Lite</td>
+        <td align="center">:heavy_check_mark: </td>
+        <td align="center">:heavy_check_mark: </td>
+        <td align="center">:heavy_check_mark: </td>
+        <td align="center">:heavy_multiplication_x: </td>
+        <td align="center">0.922</td>
+    </tr>
+    <tr>
+        <td><a href="models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_fp32">DNN Large FP32 *</a></td>
+        <td align="center">FP32</td>
+        <td align="center">TensorFlow Lite</td>
+        <td align="center">:heavy_check_mark: </td>
+        <td align="center">:heavy_check_mark: </td>
+        <td align="center">:heavy_check_mark: </td>
+        <td align="center">:heavy_multiplication_x: </td>
+        <td align="center">0.867</td>
+    </tr>
+    <tr>
+        <td><a href="models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32">DNN Medium FP32 *</a></td>
+        <td align="center">FP32</td>
+        <td align="center">TensorFlow Lite</td>
+        <td align="center">:heavy_check_mark: </td>
+        <td align="center">:heavy_check_mark: </td>
+        <td align="center">:heavy_check_mark: </td>
+        <td align="center">:heavy_multiplication_x: </td>
+        <td align="center">0.850</td>
+    </tr>
+    <tr>
+        <td><a href="models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_fp32">DNN Small FP32 *</a></td>
+        <td align="center">FP32</td>
+        <td align="center">TensorFlow Lite</td>
+        <td align="center">:heavy_check_mark: </td>
+        <td align="center">:heavy_check_mark: </td>
+        <td align="center">:heavy_check_mark: </td>
+        <td align="center">:heavy_multiplication_x: </td>
+        <td align="center">0.836</td>
+    </tr>
+    <tr>
+        <td><a href="models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32">DS-CNN Large FP32 *</a></td>
+        <td align="center">FP32</td>
+        <td align="center">TensorFlow Lite</td>
+        <td align="center">:heavy_check_mark: </td>
+        <td align="center">:heavy_check_mark: HERO</td>
+        <td align="center">:heavy_check_mark: </td>
+        <td align="center">:heavy_multiplication_x: </td>
+        <td align="center">0.950</td>
+    </tr>
+    <tr>
+        <td><a href="models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32">DS-CNN Medium FP32 *</a></td>
+        <td align="center">FP32</td>
+        <td align="center">TensorFlow Lite</td>
+        <td align="center">:heavy_check_mark: </td>
+        <td align="center">:heavy_check_mark: HERO</td>
+        <td align="center">:heavy_check_mark: </td>
+        <td align="center">:heavy_multiplication_x: </td>
+        <td align="center">0.943</td>
+    </tr>
+    <tr>
+        <td><a href="models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32">DS-CNN Small FP32 *</a></td>
+        <td align="center">FP32</td>
+        <td align="center">TensorFlow Lite</td>
+        <td align="center">:heavy_check_mark: </td>
+        <td align="center">:heavy_check_mark: HERO</td>
+        <td align="center">:heavy_check_mark: </td>
+        <td align="center">:heavy_multiplication_x: </td>
+        <td align="center">0.939</td>
     </tr>
     <tr>
         <td><a href="models/keyword_spotting/micronet_large/tflite_int8">MicroNet Large INT8 </a></td>
diff --git a/models/experimental/efficientnet_lite0_224/efficientnet_lite0_224.tflite b/models/experimental/efficientnet_lite0_224/efficientnet_lite0_224.tflite
index 9c9da85..a85250a 100644
Binary files a/models/experimental/efficientnet_lite0_224/efficientnet_lite0_224.tflite and b/models/experimental/efficientnet_lite0_224/efficientnet_lite0_224.tflite differ
diff --git a/models/experimental/har_cnn/har_int8.tflite b/models/experimental/har_cnn/har_int8.tflite
index 9d65d7e..a85b125 100644
Binary files a/models/experimental/har_cnn/har_int8.tflite and b/models/experimental/har_cnn/har_int8.tflite differ
diff --git a/models/experimental/ssd_mobilenet_v3_int8/ssd_mobilenet_v3_int8.tflite b/models/experimental/ssd_mobilenet_v3_int8/ssd_mobilenet_v3_int8.tflite
index f188cd2..65e2043 100644
Binary files a/models/experimental/ssd_mobilenet_v3_int8/ssd_mobilenet_v3_int8.tflite and b/models/experimental/ssd_mobilenet_v3_int8/ssd_mobilenet_v3_int8.tflite differ
diff --git a/models/experimental/yolov3_416_416_backbone_mltools_int8/yolov3_416_416_backbone_mltools_int8.tflite b/models/experimental/yolov3_416_416_backbone_mltools_int8/yolov3_416_416_backbone_mltools_int8.tflite
index 3270fe7..5a77ec3 100644
Binary files a/models/experimental/yolov3_416_416_backbone_mltools_int8/yolov3_416_416_backbone_mltools_int8.tflite and b/models/experimental/yolov3_416_416_backbone_mltools_int8/yolov3_416_416_backbone_mltools_int8.tflite differ
diff --git a/models/experimental/yolov3_tiny_int8_pruned_backbone_only/yolov3_tiny_int8_pruned_backbone_only.tflite b/models/experimental/yolov3_tiny_int8_pruned_backbone_only/yolov3_tiny_int8_pruned_backbone_only.tflite
index 5a45bf0..b879213 100644
Binary files a/models/experimental/yolov3_tiny_int8_pruned_backbone_only/yolov3_tiny_int8_pruned_backbone_only.tflite and b/models/experimental/yolov3_tiny_int8_pruned_backbone_only/yolov3_tiny_int8_pruned_backbone_only.tflite differ
diff --git a/models/keyword_spotting/cnn_large/model_package_tf/README.md b/models/keyword_spotting/cnn_large/model_package_tf/README.md
new file mode 100644
index 0000000..b0cbfe4
--- /dev/null
+++ b/models/keyword_spotting/cnn_large/model_package_tf/README.md
@@ -0,0 +1,115 @@
+# CNN Large model package
+
+This folder contains code that will allow you to recreate the CNN Large keyword spotting model from
+the [Hello Edge paper](https://arxiv.org/pdf/1711.07128.pdf).
+
+## License
+[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
+
+## Model Package Overview
+| Model           	|   CNN_Large                            	   |
+|:---------------:	|:------------------------------------------:|
+| <u>**Format**</u>:          	| Keras, Saved Model, TensorFlow Lite int8, TensorFlow Lite fp32 |
+| <u>**Feature**</u>:         	|   Keyword spotting for Arm Cortex-M CPUs   |
+| <u>**Architectural Delta w.r.t. Vanilla**</u>: |                    None                    |
+| <u>**Domain**</u>:         	|              Keyword spotting              |
+| <u>**Package Quality**</u>: 	|                 Optimised                  |
+
+## Model Recreation
+
+In order to recreate the model you will first need to be using ```Python3.7``` and install the requirements in ```requirements.txt```.
+
+Once you have these requirements satisfied you can execute the recreation script contained within this folder, just run:
+
+```bash
+bash ./recreate_model.sh
+```
+
+Running this script will use the pre-trained checkpoint files supplied in the ```./model_archive/model_source/weights``` folder
+to generate the TFLite files and perform evaluation on the test sets. Both an fp32 version and a quantized version will be produced.
+The quantized version will use post-training quantization to fully quantize it.
+
+If you want to run training from scratch you can do this by supplying ```--train``` when running the script. For example:
+
+```bash
+bash ./recreate_model.sh --train
+```
+
+Training is then performed and should produce a model to the stated accuracy in this repository.
+Note that exporting to TFLite will still happen with the pre-trained checkpoint files so you will need to re-run the script
+and this time supply the path to the new checkpoint files you want to use, for example:
+
+```bash
+bash ./recreate_model.sh --ckpt <checkpoint_path>
+```
+
+
+## Training
+
+To train a DNN with 3 fully-connected layers with 128 neurons in each layer, run:
+
+```
+python train.py --model_architecture dnn --model_size_info 128 128 128
+```
+The command line argument *--model_size_info* is used to pass the neural network layer
+dimensions such as number of layers, convolution filter size/stride as a list to models.py,
+which builds the TensorFlow graph based on the provided model architecture
+and layer dimensions. For more info on *model_size_info* for each network architecture see
+[models.py](models.py).
+
+The training commands with all the hyperparameters to reproduce the models shown in the
+[paper](https://arxiv.org/pdf/1711.07128.pdf) are given [here](recreate_model.sh).
+
+## Testing
+To run inference on the trained model from a checkpoint and get accuracy on validation and test sets, run:
+```
+python evaluation.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>
+```
+The parameters used here should match those used in the Training step.
+
+## Optimization
+
+We introduce a new *optional* step to optimize the trained keyword spotting model for deployment.
+
+Here we use TensorFlow's [weight clustering API](https://www.tensorflow.org/model_optimization/guide/clustering) to reduce the compressed model size and optimize inference on supported hardware. 32 weight clusters and kmeans++ cluster intialization method are used as the clustering hyperparameters.
+
+To optimize your trained model (e.g. a DNN), a trained model checkpoint is needed to run clustering and fine-tuning on.
+You can use the pre-trained checkpoints provided, or train your own model and use the resulting checkpoint.
+
+To apply the optimization and fine-tuning, run the following command:
+```
+python optimisations.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>
+```
+The parameters used here should match those used in the Training step, except for the number of training steps.
+The number of training steps is reduced since the optimization step only requires fine-tuning.
+
+This will generate a clustered model checkpoint that can be used in the quantization step to generate a quantized and clustered TFLite model.
+
+## Quantization and TFLite Conversion
+
+As part of the update we now use TensorFlow's
+[post training quantization](https://www.tensorflow.org/lite/performance/post_training_quantization) to
+make quantization of the trained models super simple.
+
+To quantize your trained model (e.g. a DNN) run:
+```
+python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> [--inference_type int8|int16]
+```
+The parameters used here should match those used in the Training step.
+
+The inference_type parameter is *optional* and to be used if a fully quantized model with inputs and outputs of type int8 or int16 is needed. It defaults to fp32.
+
+This step will produce a quantized TFLite file *dnn_quantized.tflite*.
+You can test the accuracy of this quantized model on the test set by running:
+```
+python evaluation.py --tflite_path dnn_quantized.tflite
+```
+The parameters used here should match those used in the Training step.
+
+`convert_to_tflite.py` uses post-training quantization to generate a quantized model by default. If you wish to convert to a floating point TFLite model, use the command below:
+
+```
+python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> --no-quantize
+```
+
+This will produce a floating point TFLite file *dnn.tflite*. You can test the accuracy of this floating point model using `evaluation.py` as above.
diff --git a/models/keyword_spotting/cnn_large/model_package_tf/cnn_l_inference_keras.py b/models/keyword_spotting/cnn_large/model_package_tf/cnn_l_inference_keras.py
new file mode 100644
index 0000000..db7694a
--- /dev/null
+++ b/models/keyword_spotting/cnn_large/model_package_tf/cnn_l_inference_keras.py
@@ -0,0 +1,76 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from data_processing.data_preprocessing import load_wav_file, calculate_mfcc
+
+import tensorflow as tf
+import argparse
+
+
+def load_labels(filename):
+    """Read in labels, one label per line."""
+    f = open(filename, "r")
+    return f.read().splitlines()
+
+
+def main():
+    window_size_samples = int(FLAGS.sample_rate * FLAGS.window_size_ms / 1000)
+    window_stride_samples = int(FLAGS.sample_rate * FLAGS.window_stride_ms / 1000)
+    decoded, sample = load_wav_file(FLAGS.wav, FLAGS.sample_rate)
+    x = calculate_mfcc(decoded, sample, window_size_samples, window_stride_samples, FLAGS.dct_coefficient_count)
+    x = tf.reshape(x, [1, -1])
+
+    model = tf.keras.models.load_model(FLAGS.keras_file_path)
+    predictions = model.predict(x)
+
+    # Sort to show labels in order of confidence
+    top_k = predictions[0].argsort()[-1:][::-1]
+    for node_id in top_k:
+        human_string = load_labels(FLAGS.labels)[int(node_id)]
+        score = predictions[0,node_id]
+        print(f'model predicted: {human_string} with score {score:.5f}')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--wav', type=str, default='', help='Audio file to be identified.')
+    parser.add_argument(
+        '--labels', type=str, default='', help='Path to file containing labels.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs', )
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint', )
+    parser.add_argument(
+        '--keras_file_path',
+        type=str,
+        default='',
+        help='Path to the .h5 Keras model file to use for testing.')
+    FLAGS, unparsed = parser.parse_known_args()
+    main()
diff --git a/models/keyword_spotting/cnn_large/model_package_tf/cnn_l_inference_tflite.py b/models/keyword_spotting/cnn_large/model_package_tf/cnn_l_inference_tflite.py
new file mode 100644
index 0000000..9f79d99
--- /dev/null
+++ b/models/keyword_spotting/cnn_large/model_package_tf/cnn_l_inference_tflite.py
@@ -0,0 +1,120 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from data_processing.data_preprocessing import load_wav_file, calculate_mfcc
+
+import tensorflow as tf
+import numpy as np
+import argparse
+
+
+def tflite_inference(input_data, tflite_path):
+    """Call forwards pass of TFLite file and returns the result.
+
+    Args:
+        input_data: Input data to use on forward pass.
+        tflite_path: Path to TFLite file to run.
+
+    Returns:
+        Output from inference.
+    """
+    supported_quant_dtypes = (np.int8, np.int16)
+    interpreter = tf.lite.Interpreter(model_path=tflite_path)
+    interpreter.allocate_tensors()
+
+    input_details = interpreter.get_input_details()
+    output_details = interpreter.get_output_details()
+
+    input_dtype = input_details[0]["dtype"]
+    output_dtype = output_details[0]["dtype"]
+
+    # Check if the input/output type is quantized,
+    # set scale and zero-point accordingly
+    if input_dtype in supported_quant_dtypes:
+        input_scale, input_zero_point = input_details[0]["quantization"]
+    else:
+        input_scale, input_zero_point = 1, 0
+
+    input_data = input_data / input_scale + input_zero_point
+    input_data = np.round(input_data) if input_dtype in supported_quant_dtypes else input_data
+
+    if output_dtype in supported_quant_dtypes:
+        output_scale, output_zero_point = output_details[0]["quantization"]
+    else:
+        output_scale, output_zero_point = 1, 0
+
+    interpreter.set_tensor(input_details[0]['index'], tf.cast(input_data, input_dtype))
+    interpreter.invoke()
+
+    output_data = interpreter.get_tensor(output_details[0]['index'])
+
+    output_data = output_scale * (output_data.astype(np.float32) - output_zero_point)
+
+    return output_data
+
+
+def load_labels(filename):
+    """Read in labels, one label per line."""
+    f = open(filename, "r")
+    return f.read().splitlines()
+
+
+def main():
+    window_size_samples = int(FLAGS.sample_rate * FLAGS.window_size_ms / 1000)
+    window_stride_samples = int(FLAGS.sample_rate * FLAGS.window_stride_ms / 1000)
+    decoded, sample = load_wav_file(FLAGS.wav, FLAGS.sample_rate)
+    x = calculate_mfcc(decoded, sample, window_size_samples, window_stride_samples, FLAGS.dct_coefficient_count)
+    x = tf.reshape(x, [1, -1])
+    predictions = tflite_inference(x, FLAGS.tflite_path)
+
+    # Sort to show labels in order of confidence
+    top_k = predictions[0].argsort()[-1:][::-1]
+    for node_id in top_k:
+        human_string = load_labels(FLAGS.labels)[int(node_id)]
+        score = predictions[0,node_id]
+        print(f'model predicted: {human_string} with score {score:.5f}')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--wav', type=str, default='', help='Audio file to be identified.')
+    parser.add_argument(
+        '--labels', type=str, default='', help='Path to file containing labels.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs', )
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint', )
+    parser.add_argument(
+        '--tflite_path',
+        type=str,
+        default='',
+        help='Path to TFLite file to use for testing.')
+    FLAGS, unparsed = parser.parse_known_args()
+    main()
diff --git a/models/keyword_spotting/cnn_large/model_package_tf/convert_to_tflite.py b/models/keyword_spotting/cnn_large/model_package_tf/convert_to_tflite.py
new file mode 100644
index 0000000..64ab8df
--- /dev/null
+++ b/models/keyword_spotting/cnn_large/model_package_tf/convert_to_tflite.py
@@ -0,0 +1,234 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for converting and quantizing a trained keyword spotting
+   model and saving to TFLite."""
+
+import argparse
+
+import tensorflow as tf
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+from evaluation import tflite_test
+
+NUM_REP_DATA_SAMPLES = 100  # How many samples to use for post training quantization.
+
+
+def convert(model_settings, audio_processor, checkpoint, quantize, inference_type, tflite_path):
+    """Load our trained floating point model and convert it.
+
+    TFLite conversion or post training quantization is performed and the
+    resulting model is saved as a TFLite file.
+    We use samples from the validation set to do post training quantization.
+
+    Args:
+        model_settings: Dictionary of common model settings.
+        audio_processor: Audio processor class object.
+        checkpoint: Path to training checkpoint to load.
+        quantize: Whether to quantize the model or convert to fp32 TFLite model.
+        inference_type: Input/output type of the quantized model.
+        tflite_path: Output TFLite file save path.
+    """
+    model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info, False)
+    model.load_weights(checkpoint).expect_partial()
+
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION).batch(1)
+
+    def _rep_dataset():
+        """Generator function to produce representative dataset."""
+        i = 0
+        for mfcc, label in val_data:
+            if i > NUM_REP_DATA_SAMPLES:
+                break
+            i += 1
+            yield [mfcc]
+
+    if quantize:
+        # Quantize model and save to disk.
+        tflite_model = post_training_quantize(model, inference_type, _rep_dataset)
+        with open(tflite_path, 'wb') as f:
+            f.write(tflite_model)
+        print(f'Quantized model saved to {tflite_path}.')
+    else:
+        converter = tf.lite.TFLiteConverter.from_keras_model(model)
+        tflite_model = converter.convert()
+        with open(tflite_path, 'wb') as f:
+            f.write(tflite_model)
+        print(f'Converted model saved to {tflite_path}.')
+
+
+def post_training_quantize(keras_model, inference_type, rep_dataset):
+    """Perform post training quantization and returns the TFLite model ready for saving.
+
+    See https://www.tensorflow.org/lite/performance/post_training_quantization#full_integer_quantization for
+    more details.
+
+    Args:
+        keras_model: The trained tf Keras model used for post training quantization.
+        inference_type: Input/output type of the quantized model.
+        rep_dataset: Function to use as a representative dataset, must be callable.
+
+    Returns:
+        Quantized TFLite model ready for saving to disk.
+    """
+    converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
+    converter.optimizations = [tf.lite.Optimize.DEFAULT]
+
+    if inference_type == 'int8':
+        converter.inference_input_type = tf.int8
+        converter.inference_output_type = tf.int8
+        supported_ops = tf.lite.OpsSet.TFLITE_BUILTINS_INT8
+    if inference_type == 'int16':
+        converter.inference_input_type = tf.int16
+        converter.inference_output_type = tf.int16
+        supported_ops = tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8
+
+    # Int8 post training quantization needs representative dataset.
+    converter.representative_dataset = rep_dataset
+    converter.target_spec.supported_ops = [supported_ops]
+
+    tflite_model = converter.convert()
+
+    return tflite_model
+
+
+def main():
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    if FLAGS.quantize:
+        tflite_path = f'{FLAGS.model_architecture}_quantized.tflite'
+    else:
+        tflite_path = f'{FLAGS.model_architecture}.tflite'
+
+    # Load floating point model from checkpoint and convert it.
+    convert(model_settings, audio_processor, FLAGS.checkpoint,
+            FLAGS.quantize, FLAGS.inference_type, tflite_path)
+
+    # Test the newly converted model on the test set.
+    tflite_test(model_settings, audio_processor, tflite_path)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+    parser.add_argument(
+        '--checkpoint',
+        type=str,
+        help='Checkpoint to load the weights from.')
+    parser.add_argument(
+        '--quantize',
+        dest='quantize',
+        action="store_true",
+        default=True,
+        help='Whether to quantize the model or convert to fp32 TFLite model. Defaults to True.')
+    parser.add_argument(
+        '--no-quantize',
+        dest='quantize',
+        action="store_false",
+        help='Whether to quantize the model or convert to fp32 TFLite model. Defaults to True.')
+    parser.add_argument(
+        '--inference_type',
+        type=str,
+        default='fp32',
+        help='If quantize is true, whether the model input and output is float32, int8 or int16')
+
+    FLAGS, _ = parser.parse_known_args()
+    main()
diff --git a/models/keyword_spotting/cnn_large/model_package_tf/data_processing/__init__.py b/models/keyword_spotting/cnn_large/model_package_tf/data_processing/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/models/keyword_spotting/cnn_large/model_package_tf/data_processing/data_preprocessing.py b/models/keyword_spotting/cnn_large/model_package_tf/data_processing/data_preprocessing.py
new file mode 100644
index 0000000..05cf5ba
--- /dev/null
+++ b/models/keyword_spotting/cnn_large/model_package_tf/data_processing/data_preprocessing.py
@@ -0,0 +1,462 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Modifications Copyright 2023 Arm Inc. All Rights Reserved.
+# Modified to use TensorFlow 2.0 and data pipelines.
+#
+"""Functions for loading and preparing data for keyword spotting."""
+
+import os
+import re
+import sys
+import urllib
+from pathlib import Path
+import tarfile
+import hashlib
+import random
+import math
+from enum import Enum
+
+import numpy as np
+import tensorflow as tf
+from tensorflow.python.ops import gen_audio_ops as audio_ops
+
+MAX_NUM_WAVS_PER_CLASS = 2**27 - 1  # ~134M
+RANDOM_SEED = 59185
+BACKGROUND_NOISE_DIR_NAME = '_background_noise_'
+SILENCE_LABEL = '_silence_'
+SILENCE_INDEX = 0
+UNKNOWN_WORD_INDEX = 1
+UNKNOWN_WORD_LABEL = '_unknown_'
+
+
+def load_wav_file(wav_filename, desired_samples):
+    """Loads and then decodes a given 16bit PCM wav file.
+
+    Decoded audio is scaled to the range [-1, 1] and padded or cropped to the desired number of samples.
+
+    Args:
+        wav_filename: 16bit PCM wav file to load.
+        desired_samples: Number of samples wanted from the audio file.
+
+    Returns:
+        Tuple consisting of the decoded audio and sample rate.
+    """
+    wav_file = tf.io.read_file(wav_filename)
+    decoded_wav = audio_ops.decode_wav(wav_file, desired_channels=1, desired_samples=desired_samples)
+
+    return decoded_wav.audio, decoded_wav.sample_rate
+
+
+def calculate_mfcc(audio_signal, audio_sample_rate, window_size, window_stride, num_mfcc):
+    """Returns Mel Frequency Cepstral Coefficients (MFCC) for a given audio signal.
+
+    Args:
+        audio_signal: Raw audio signal in range [-1, 1]
+        audio_sample_rate: Audio signal sample rate
+        window_size: Window size in samples for calculating spectrogram
+        window_stride: Window stride in samples for calculating spectrogram
+        num_mfcc: The number of MFCC features wanted.
+
+    Returns:
+        Calculated mffc features.
+    """
+    spectrogram = audio_ops.audio_spectrogram(input=audio_signal, window_size=window_size, stride=window_stride,
+                                              magnitude_squared=True)
+
+    mfcc_features = audio_ops.mfcc(spectrogram, audio_sample_rate, dct_coefficient_count=num_mfcc)
+
+    return mfcc_features
+
+
+def which_set(filename, validation_percentage, testing_percentage):
+    """Determines which data partition the file should belong to.
+
+    We want to keep files in the same training, validation, or testing sets even
+    if new ones are added over time. This makes it less likely that testing
+    samples will accidentally be reused in training when long runs are restarted
+    for example. To keep this stability, a hash of the filename is taken and used
+    to determine which set it should belong to. This determination only depends on
+    the name and the set proportions, so it won't change as other files are added.
+    It's also useful to associate particular files as related (for example words
+    spoken by the same person), so anything after '_nohash_' in a filename is
+    ignored for set determination. This ensures that 'bobby_nohash_0.wav' and
+    'bobby_nohash_1.wav' are always in the same set, for example.
+
+    Args:
+        filename: File path of the data sample.
+        validation_percentage: How much of the data set to use for validation.
+        testing_percentage: How much of the data set to use for testing.
+
+    Returns:
+        String, one of 'training', 'validation', or 'testing'.
+    """
+    base_name = os.path.basename(filename)
+    # We want to ignore anything after '_nohash_' in the file name when
+    # deciding which set to put a wav in, so the data set creator has a way of
+    # grouping wavs that are close variations of each other.
+    hash_name = re.sub(r'_nohash_.*$', '', base_name)
+    # This looks a bit magical, but we need to decide whether this file should
+    # go into the training, testing, or validation sets, and we want to keep
+    # existing files in the same set even if more files are subsequently
+    # added.
+    # To do that, we need a stable way of deciding based on just the file name
+    # itself, so we do a hash of that and then use that to generate a
+    # probability value that we use to assign it.
+    hash_name_hashed = hashlib.sha1(tf.compat.as_bytes(hash_name)).hexdigest()
+    percentage_hash = ((int(hash_name_hashed, 16) %
+                       (MAX_NUM_WAVS_PER_CLASS + 1)) *
+                       (100.0 / MAX_NUM_WAVS_PER_CLASS))
+    if percentage_hash < validation_percentage:
+        result = 'validation'
+    elif percentage_hash < (testing_percentage + validation_percentage):
+        result = 'testing'
+    else:
+        result = 'training'
+    return result
+
+
+def prepare_words_list(wanted_words):
+    """Prepends common tokens to the custom word list.
+
+    Args:
+        wanted_words: List of strings containing custom words to spot.
+
+    Returns:
+        List of words with silence and unknown tokens added.
+    """
+    return [SILENCE_LABEL, UNKNOWN_WORD_LABEL] + wanted_words
+
+
+class AudioProcessor:
+    """Handles loading, partitioning, and preparing audio training data."""
+
+    class Modes(Enum):
+        TRAINING = 1
+        VALIDATION = 2
+        TESTING = 3
+
+    def __init__(self, data_url, data_dir, silence_percentage, unknown_percentage,
+                 wanted_words, validation_percentage, testing_percentage, model_settings):
+        self.data_dir = Path(data_dir)
+        self.model_settings = model_settings
+        self.words_list = prepare_words_list(wanted_words)
+
+        self._tf_datasets = {}
+        self.background_data = None
+        self._set_size = {'training': 0, 'validation': 0, 'testing': 0}
+
+        self._download_and_extract_data(data_url, data_dir)
+        self._prepare_datasets(silence_percentage, unknown_percentage, wanted_words,
+                               validation_percentage, testing_percentage)
+        self._prepare_background_data()
+
+    def get_data(self, mode, background_frequency=0, background_volume_range=0, time_shift=0):
+        """Returns the train, validation or test set for KWS as a TF Dataset.
+
+        Args:
+            mode: The set to return, see AudioProcessor.Modes enumeration.
+            background_frequency: How many of the samples have background noise mixed in.
+            background_volume_range: How loud the background noise should be, between 0 and 1.
+            time_shift: Range to randomly shift the training audio by in time.
+
+        Returns:
+            TF dataset that will generate tuples containing an mfcc and corresponding label.
+
+        Raises:
+            ValueError: If mode is not recognised.
+        """
+        if mode == AudioProcessor.Modes.TRAINING:
+            dataset = self._tf_datasets['training']
+        elif mode == AudioProcessor.Modes.VALIDATION:
+            dataset = self._tf_datasets['validation']
+        elif mode == AudioProcessor.Modes.TESTING:
+            dataset = self._tf_datasets['testing']
+        else:
+            ValueError("Incorrect dataset type given")
+
+        use_background = (self.background_data is not None) and (mode == AudioProcessor.Modes.TRAINING)
+        dataset = dataset.map(lambda path, label: self._process_path(path, label, self.model_settings,
+                                                                     background_frequency, background_volume_range,
+                                                                     time_shift, use_background, self.background_data),
+                              num_parallel_calls=tf.data.experimental.AUTOTUNE)
+
+        return dataset
+
+    def set_size(self, mode):
+        """Get the number of samples in the requested dataset partition.
+
+        Args:
+            mode: Which partition, see AudioProcessor.Modes enumeration.
+
+        Returns:
+            Number of samples in the partition.
+
+        Raises:
+            ValueError: If mode is not recognised.
+        """
+        if mode == AudioProcessor.Modes.TRAINING:
+            return self._set_size['training']
+        elif mode == AudioProcessor.Modes.VALIDATION:
+            return self._set_size['validation']
+        elif mode == AudioProcessor.Modes.TESTING:
+            return self._set_size['testing']
+        else:
+            ValueError('Incorrect dataset type given')
+
+    @staticmethod
+    def _process_path(path, label, model_settings, background_frequency, background_volume_range, time_shift_samples,
+                      use_background, background_data):
+        """Load wav files and calculate mfcc features.
+
+        Random shifting of samples and adding in background noise is done within this function as well.
+        This function is meant to be mapped onto a TF Dataset by using a lambda function.
+
+        Args:
+            path: Path to the wav file to load.
+            label: Integer label for classifying the audio clip.
+            model_settings: Dictionary of settings for model being trained.
+            background_frequency: How many clips will have background noise, 0.0 to 1.0.
+            background_volume_range: How loud the background noise will be.
+            time_shift_samples: How much to randomly shift the clips by.
+            use_background: Add in background noise to audio clips or not.
+            background_data: Ragged tensor of loaded background noise samples.
+
+        Returns:
+            Tuple of calculated flattened mfcc and its class label.
+        """
+
+        desired_samples = model_settings['desired_samples']
+        audio, sample_rate = load_wav_file(path, desired_samples=desired_samples)
+
+        # Make our own silence audio data.
+        if label == SILENCE_INDEX:
+            audio = tf.multiply(audio, 0)
+
+        # Shift samples start position and pad any gaps with zeros.
+        if time_shift_samples > 0:
+            time_shift_amount = tf.random.uniform(shape=(), minval=-time_shift_samples, maxval=time_shift_samples,
+                                                  dtype=tf.int32)
+        else:
+            time_shift_amount = 0
+        if time_shift_amount > 0:
+            time_shift_padding = [[time_shift_amount, 0], [0, 0]]
+            time_shift_offset = [0, 0]
+        else:
+            time_shift_padding = [[0, -time_shift_amount], [0, 0]]
+            time_shift_offset = [-time_shift_amount, 0]
+
+        padded_foreground = tf.pad(audio, time_shift_padding, mode='CONSTANT')
+        sliced_foreground = tf.slice(padded_foreground, time_shift_offset, [desired_samples, -1])
+
+        # Get a random section of background noise.
+        if use_background:
+            background_index = tf.random.uniform(shape=(), maxval=background_data.shape[0], dtype=tf.int32)
+            background_sample = background_data[background_index]
+            background_offset = tf.random.uniform(shape=(), maxval=len(background_sample)-desired_samples,
+                                                  dtype=tf.int32)
+            background_clipped = background_sample[background_offset:(background_offset + desired_samples)]
+            background_reshaped = tf.reshape(background_clipped, [desired_samples, 1])
+            if tf.random.uniform(shape=(), maxval=1) < background_frequency:
+                background_volume = tf.random.uniform(shape=(), maxval=background_volume_range)
+            else:
+                background_volume = tf.constant(0, dtype='float32')
+        else:
+            background_reshaped = np.zeros([desired_samples, 1], dtype=np.float32)
+            background_volume = tf.constant(0, dtype='float32')
+
+        # Mix in background noise.
+        background_mul = tf.multiply(background_reshaped, background_volume)
+        background_add = tf.add(background_mul, sliced_foreground)
+        background_clamp = tf.clip_by_value(background_add, -1.0, 1.0)
+
+        mfcc = calculate_mfcc(background_clamp, sample_rate, model_settings['window_size_samples'],
+                              model_settings['window_stride_samples'],
+                              model_settings['dct_coefficient_count'])
+        mfcc = tf.reshape(mfcc, [-1])
+
+        return mfcc, label
+
+    def _download_and_extract_data(self, data_url, target_directory):
+        """Downloads and extracts file to target directory.
+
+        If the file does not already exist download it and then untar into the target directory.
+
+        Args:
+            data_url: Web link to the tarred data to download.
+            target_directory: Directory to download and extract to.
+        """
+        target_directory = Path(target_directory)
+        target_directory.mkdir(exist_ok=True)
+
+        filename = data_url.split('/')[-1]
+        filepath = target_directory / filename
+
+        if not filepath.exists():
+            def _report_hook(block_num, block_size, total_size):
+                """Function to track download progress in urllib"""
+                read_so_far = block_num * block_size
+                percent = (read_so_far / total_size) * 100.0
+
+                s = f"\rDownloading {filename} {percent:.1f}%"
+
+                sys.stdout.write(s)
+                sys.stdout.flush()
+
+            filepath, _ = urllib.request.urlretrieve(data_url, filepath, _report_hook)
+            print()
+
+        print(f'Untarring {filename}...')
+        tarfile.open(filepath, 'r:gz').extractall(target_directory)
+
+    def _prepare_datasets(self, silence_percentage, unknown_percentage, wanted_words,
+                          validation_percentage, testing_percentage):
+        """Split the data into train, validation and testing sets.
+
+        Silence and unknown data is added, then sets are converted to TF Datasets.
+
+        Args:
+            silence_percentage: Percent of words should be silence.
+            unknown_percentage: Percent of words that should be unknown.
+            wanted_words: List of words wanted to classify.
+            validation_percentage: Percent to split off for validation.
+            testing_percentage: Percent to split off for testing.
+        """
+        # Make sure the shuffling and picking of unknowns is deterministic.
+        random.seed(RANDOM_SEED)
+        wanted_words_index = {}
+
+        for index, wanted_word in enumerate(wanted_words):
+            wanted_words_index[wanted_word] = index + 2
+
+        # Find all wav files in subfolders.
+        search_path = self.data_dir / '*' / '*.wav'
+        data_index, unknown_index, all_words = self._find_and_sort_wavs(search_path, validation_percentage,
+                                                                        testing_percentage, wanted_words_index)
+
+        for index, wanted_word in enumerate(wanted_words):
+            if wanted_word not in all_words:
+                raise Exception(f'Tried to find {wanted_word} in labels but only found: {", ".join(all_words.keys())}')
+
+        word_to_index = {}
+        for word in all_words:
+            if word in wanted_words_index:
+                word_to_index[word] = wanted_words_index[word]
+            else:
+                word_to_index[word] = UNKNOWN_WORD_INDEX
+        word_to_index[SILENCE_LABEL] = SILENCE_INDEX
+
+        # We need an arbitrary file to load as the input for the silence samples.
+        # It's multiplied by zero later, so the content doesn't matter.
+        silence_wav_path = data_index['training'][0]['file']
+        for set_index in ['validation', 'testing', 'training']:
+            set_size = len(data_index[set_index])  # Size before adding silence and unknown samples.
+            silence_size = int(math.ceil(set_size * silence_percentage / 100))
+            for _ in range(silence_size):
+                data_index[set_index].append({
+                    'label': SILENCE_LABEL,
+                    'file': silence_wav_path
+                })
+            # Pick some unknowns to add to each partition of the data set.
+            random.shuffle(unknown_index[set_index])
+            unknown_size = int(math.ceil(set_size * unknown_percentage / 100))
+            data_index[set_index].extend(unknown_index[set_index][:unknown_size])
+
+            self._set_size[set_index] = len(data_index[set_index])  # Size after adding silence and unknown samples.
+
+            # Make sure the ordering is random.
+            random.shuffle(data_index[set_index])
+
+            # Transform into TF Datasets ready for easier processing later.
+            labels, paths = list(zip(*[d.values() for d in data_index[set_index]]))
+            labels = [word_to_index[label] for label in labels]
+            self._tf_datasets[set_index] = tf.data.Dataset.from_tensor_slices((list(paths), labels))
+
+    def _find_and_sort_wavs(self, search_pattern, validation_percentage, testing_percentage, wanted_words_index):
+        """Find and sort wav files into known and unknown word sets.
+
+        Known words are files containing words in the list of wanted words.
+        Any other clip goes to the unknown label set. Labels come from the folder names.
+        All clips are also assigned to train, test and validation sets.
+
+        Args:
+            search_pattern: Path pattern used by glob to find wav files.
+            validation_percentage: Percent to split off for validation.
+            testing_percentage: Percent to split off for testing.
+            wanted_words_index: Dict mapping wanted words to their label index.
+
+        Returns:
+            3-tuple of known words, unknown words and mapping of all word labels.
+        """
+        data_index = {'validation': [], 'testing': [], 'training': []}
+        unknown_index = {'validation': [], 'testing': [], 'training': []}
+        all_words = {}
+
+        for wav_path in sorted(tf.io.gfile.glob(str(search_pattern))):
+            word = Path(wav_path).parent.name.lower()
+
+            # Treat the '_background_noise_' folder as a special case, since we expect
+            # it to contain long audio samples we mix in to improve training.
+            if word == BACKGROUND_NOISE_DIR_NAME:
+                continue
+
+            all_words[word] = True
+            set_index = which_set(wav_path, validation_percentage, testing_percentage)
+            # If it's a known class, store its detail, otherwise add it to the list
+            # we'll use to train the unknown label.
+            if word in wanted_words_index:
+                data_index[set_index].append({'label': word, 'file': wav_path})
+            else:
+                unknown_index[set_index].append({'label': word, 'file': wav_path})
+        if not all_words:
+            raise Exception('No .wavs found at ' + str(search_pattern))
+
+        return data_index, unknown_index, all_words
+
+    def _prepare_background_data(self):
+        """Searches a folder for background noise audio, and loads it into memory.
+
+        It's expected that the background audio samples will be in a subdirectory
+        named '_background_noise_' inside the 'data_dir' folder, as .wavs that match
+        the sample rate of the training data, but can be much longer in duration.
+
+        If the '_background_noise_' folder doesn't exist at all, this isn't an
+        error, it's just taken to mean that no background noise augmentation should
+        be used. If the folder does exist, but it's empty, that's treated as an
+        error.
+
+        Returns:
+          Ragged tensor of raw PCM-encoded audio samples of background noise.
+          None if '_background_noise_' folder doesnt exist.
+
+        Raises:
+          Exception: If files aren't found in the folder.
+        """
+        background_data = []
+        background_dir = Path(self.data_dir / BACKGROUND_NOISE_DIR_NAME)
+        if not background_dir.exists():
+            self.background_data = None
+            return
+
+        search_path = Path(background_dir / '*.wav')
+        for wav_path in tf.io.gfile.glob(str(search_path)):
+            wav_data, _ = load_wav_file(wav_path, desired_samples=-1)
+            background_data.append(tf.reshape(wav_data, [-1]))
+
+        if not background_data:
+            raise Exception('No background wav files were found in ' + str(search_path))
+
+        # Ragged tensor as we cant use lists in tf dataset map functions.
+        self.background_data = tf.ragged.stack(background_data)
diff --git a/models/keyword_spotting/cnn_large/model_package_tf/evaluation.py b/models/keyword_spotting/cnn_large/model_package_tf/evaluation.py
new file mode 100644
index 0000000..1bec940
--- /dev/null
+++ b/models/keyword_spotting/cnn_large/model_package_tf/evaluation.py
@@ -0,0 +1,250 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for testing trained keyword spotting models from checkpoint files and TFLite files."""
+
+import argparse
+
+import numpy as np
+import tensorflow as tf
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+from cnn_l_inference_tflite import tflite_inference
+
+
+def tflite_test(model_settings, audio_processor, tflite_path):
+    """Calculate accuracy and confusion matrices on the validation and test sets.
+
+    A TFLite model is used for doing testing.
+
+    Args:
+        model_settings: Dictionary of common model settings.
+        audio_processor: Audio processor class object.
+        tflite_path: Path to TFLite file to use for inference.
+    """
+    # Evaluate on validation set.
+    print("Running TFLite evaluation on validation set...")
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION).batch(1)
+    expected_indices = np.concatenate([y for x, y in val_data])
+    predicted_indices = []
+
+    for mfcc, label in val_data:
+        prediction = tflite_inference(mfcc, tflite_path)
+        predicted_indices.append(np.squeeze(tf.argmax(prediction, axis=1)))
+
+    val_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+
+    print(confusion_matrix.numpy())
+    print(f'Validation accuracy = {val_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.VALIDATION)})')
+
+    # Evaluate on testing set.
+    print("Running TFLite evaluation on test set...")
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING).batch(1)
+    expected_indices = np.concatenate([y for x, y in test_data])
+    predicted_indices = []
+
+    for mfcc, label in test_data:
+        prediction = tflite_inference(mfcc, tflite_path)
+        predicted_indices.append(np.squeeze(tf.argmax(prediction, axis=1)))
+
+    test_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+
+    print(confusion_matrix.numpy())
+    print(f'Test accuracy = {test_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.TESTING)})')
+
+
+def keras_test(model_settings, audio_processor, model):
+    """Calculate accuracy and confusion matrices on the validation and test sets.
+
+    A loaded keras model is used for doing testing.
+
+    Args:
+        model_settings: Dictionary of common model settings.
+        audio_processor: Audio processor class object.
+        model: Loaded keras model.
+    """
+    # Evaluate on validation set.
+    print("Running TF evaluation on validation set...")
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION).batch(FLAGS.batch_size)
+    expected_indices = np.concatenate([y for x, y in val_data])
+
+    predictions = model.predict(val_data)
+    predicted_indices = tf.argmax(predictions, axis=1)
+
+    val_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+    print(confusion_matrix.numpy())
+    print(f'Validation accuracy = {val_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.VALIDATION)})')
+
+    # Evaluate on testing set.
+    print("Running TF evaluation on test set...")
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING).batch(FLAGS.batch_size)
+    expected_indices = np.concatenate([y for x, y in test_data])
+
+    predictions = model.predict(test_data)
+    predicted_indices = tf.argmax(predictions, axis=1)
+
+    test_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+    print(confusion_matrix.numpy())
+    print(f'Test accuracy = {test_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.TESTING)})')
+
+
+def calculate_accuracy(predicted_indices, expected_indices):
+    """Calculates and returns accuracy.
+
+    Args:
+        predicted_indices: List of predicted integer indices.
+        expected_indices: List of expected integer indices.
+
+    Returns:
+        Accuracy value between 0 and 1.
+    """
+    correct_prediction = tf.equal(predicted_indices, expected_indices)
+    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
+    return accuracy
+
+
+def evaluate():
+    """Calculate accuracy and confusion matrices on validation and test sets.
+
+    Model is created and weights loaded from supplied command line arguments.
+    """
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    if FLAGS.tflite_path:
+        tflite_test(model_settings, audio_processor, FLAGS.tflite_path)
+
+    if FLAGS.checkpoint:
+        model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info, False)
+        model.load_weights(FLAGS.checkpoint).expect_partial()
+        keras_test(model_settings, audio_processor, model)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--batch_size',
+        type=int,
+        default=100,
+        help='How many items to train with at once',)
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--checkpoint',
+        type=str,
+        help='Checkpoint to load the weights from')
+    parser.add_argument(
+        '--tflite_path',
+        type=str,
+        help='Path to TFLite file to use for evaluation')
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+
+    FLAGS, _ = parser.parse_known_args()
+    evaluate()
diff --git a/models/keyword_spotting/cnn_large/model_package_tf/how_to_guidance.ipynb b/models/keyword_spotting/cnn_large/model_package_tf/how_to_guidance.ipynb
new file mode 100644
index 0000000..d818b93
--- /dev/null
+++ b/models/keyword_spotting/cnn_large/model_package_tf/how_to_guidance.ipynb
@@ -0,0 +1,428 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Copyright (C) 2023 Arm Limited or its affiliates. All rights reserved.\n",
+    "#\n",
+    "# SPDX-License-Identifier: Apache-2.0\n",
+    "#\n",
+    "# Licensed under the Apache License, Version 2.0 (the License); you may\n",
+    "# not use this file except in compliance with the License.\n",
+    "# You may obtain a copy of the License at\n",
+    "#\n",
+    "# www.apache.org/licenses/LICENSE-2.0\n",
+    "#\n",
+    "# Unless required by applicable law or agreed to in writing, software\n",
+    "# distributed under the License is distributed on an AS IS BASIS, WITHOUT\n",
+    "# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+    "# See the License for the specific language governing permissions and\n",
+    "# limitations under the License."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# CNN_Large - Optimised\n",
+    "\n",
+    "Here we reproduce the models with our established codebase and ModelPackage approach for your convenience.\n",
+    "\n",
+    "## Model-Package Overview:\n",
+    "\n",
+    "| Model           \t| CNN_Large                            \t|\n",
+    "|:---------------:\t|:---------------------------------------------------------------:\t|\n",
+    "| <u>**Format**</u>:          \t| Keras, Saved Model, TensorFlow Lite int8, TensorFlow Lite fp32 |\n",
+    "| <u>**Feature**</u>:         \t| Keyword spotting for Arm Cortex-M CPUs |\n",
+    "| <u>**Architectural Delta w.r.t. Vanilla**</u>: | None |\n",
+    "| <u>**Domain**</u>:         \t| Keyword spotting |\n",
+    "| <u>**Package Quality**</u>: \t| Optimised |"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Table of contents <a name=\"index_page\"></a>\n",
+    "\n",
+    "This how-to guidance presents the key steps to reproduce everything in this package. The contents are organised as below. We provided the internal navigation links for users to easy-jump among different sections.  \n",
+    "\n",
+    "    \n",
+    "* [1.0 Model recreation](#model_recreation)\n",
+    "\n",
+    "* [2.0 Training](#training)\n",
+    "\n",
+    "* [3.0 Testing](#testing)\n",
+    "\n",
+    "* [4.0 Optimization](#optimization)\n",
+    "\n",
+    "* [5.0 Quantization and TFLite conversion](#tflite_conversion)\n",
+    "\n",
+    "* [6.0 Inference the TFLite model files](#tflite_inference)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1.0 Model Recreation<a name=\"model_recreation\"></a>\n",
+    "\n",
+    "In order to recreate the model you will first need to be using ```Python3.7``` and install the requirements in ```requirements.txt```.\n",
+    "\n",
+    "Once you have these requirements satisfied you can execute the recreation script contained within this folder, just run:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-01-31 12:11:37.988637: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "Untarring speech_commands_v0.02.tar.gz...\n",
+      "2023-01-31 12:12:28.656297: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1\n",
+      "2023-01-31 12:12:28.695168: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 12:12:28.695203: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 12:12:28.715771: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublas.so.11\n",
+      "2023-01-31 12:12:28.715835: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublasLt.so.11\n",
+      "2023-01-31 12:12:28.718556: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcufft.so.10\n",
+      "2023-01-31 12:12:28.718828: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcurand.so.10\n",
+      "2023-01-31 12:12:28.719402: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusolver.so.11\n",
+      "2023-01-31 12:12:28.720115: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusparse.so.11\n",
+      "2023-01-31 12:12:28.720266: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudnn.so.8\n",
+      "2023-01-31 12:12:28.720628: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 12:12:28.720911: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2023-01-31 12:12:28.721608: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 12:12:28.721996: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 12:12:28.722060: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 12:12:29.189512: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 12:12:29.189552: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 12:12:29.189560: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 12:12:29.190094: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 11007 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.\n",
+      "2023-01-31 12:12:30.746072: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.\n",
+      "2023-01-31 12:12:31.596489: I tensorflow/core/grappler/devices.cc:69] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1\n",
+      "2023-01-31 12:12:31.596713: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session\n",
+      "2023-01-31 12:12:31.597272: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 12:12:31.597524: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 12:12:31.597556: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 12:12:31.597566: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 12:12:31.597575: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 12:12:31.597851: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 11007 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "2023-01-31 12:12:31.615526: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 3492140000 Hz\n",
+      "2023-01-31 12:12:31.619233: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1144] Optimization results for grappler item: graph_to_optimize\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.019ms.\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.003ms.\n",
+      "\n",
+      "2023-01-31 12:12:31.702242: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:345] Ignored output_format.\n",
+      "2023-01-31 12:12:31.702286: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:348] Ignored drop_control_dependency.\n",
+      "2023-01-31 12:12:31.707954: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:210] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n",
+      "2023-01-31 12:12:31.710595: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 12:12:31.710946: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 12:12:31.710984: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 12:12:31.710993: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 12:12:31.711005: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 12:12:31.711361: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 11007 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "Converted model saved to cnn.tflite.\n",
+      "Running TFLite evaluation on validation set...\n",
+      "2023-01-31 12:12:31.770147: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)\n",
+      "[[371   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  0 301   3   6   3  16   7   5  10   3   4  13]\n",
+      " [  0   1 383   1   1   1   5   2   0   0   0   3]\n",
+      " [  0   8   3 362   1  13   3   0   1   1   2  12]\n",
+      " [  0   2   1   0 322   0   2   0   5  10   5   3]\n",
+      " [  0   2   0   8   0 360   0   0   0   1   1   5]\n",
+      " [  0   1   8   4   0   1 336   1   1   0   0   0]\n",
+      " [  0   6   0   0   1   0   1 353   0   1   1   0]\n",
+      " [  1   3   0   1   4   1   0   0 342   7   1   3]\n",
+      " [  0   3   0   1  19   1   2   0   4 338   4   1]\n",
+      " [  1   1   2   0   7   1   1   0   2   1 334   0]\n",
+      " [  0   5   0   9   1   7   0   1   1   3   1 344]]\n",
+      "Validation accuracy = 93.27%(N=4445)\n",
+      "Running TFLite evaluation on test set...\n",
+      "[[408   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  0 350   1   5   5   8   9   5   8   0   6  11]\n",
+      " [  0   9 401   0   0   1   3   0   0   1   0   4]\n",
+      " [  0   2   1 375   0   8   5   0   0   0   0  14]\n",
+      " [  0   8   0   2 388   2   0   0   5  13   4   3]\n",
+      " [  0   4   1   8   1 378   1   0   2   0   1  10]\n",
+      " [  0   5   7   1   2   0 396   0   0   0   1   0]\n",
+      " [  0  11   0   0   0   1   5 377   0   0   1   1]\n",
+      " [  0   5   0   0   4   4   0   0 363  14   2   4]\n",
+      " [  0   4   0   2  12   0   1   0   6 374   1   2]\n",
+      " [  0   0   0   0   5   5   0   0   0   1 400   0]\n",
+      " [  0   4   2  13   3  13   3   1   0   3   1 359]]\n",
+      "Test accuracy = 93.44%(N=4890)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-01-31 12:13:11.688023: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "Untarring speech_commands_v0.02.tar.gz...\n",
+      "2023-01-31 12:14:02.193138: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1\n",
+      "2023-01-31 12:14:02.228847: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 12:14:02.228887: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 12:14:02.249127: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublas.so.11\n",
+      "2023-01-31 12:14:02.249193: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublasLt.so.11\n",
+      "2023-01-31 12:14:02.251962: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcufft.so.10\n",
+      "2023-01-31 12:14:02.252223: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcurand.so.10\n",
+      "2023-01-31 12:14:02.252782: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusolver.so.11\n",
+      "2023-01-31 12:14:02.253506: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusparse.so.11\n",
+      "2023-01-31 12:14:02.253657: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudnn.so.8\n",
+      "2023-01-31 12:14:02.254137: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 12:14:02.254437: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2023-01-31 12:14:02.255267: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 12:14:02.255838: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 12:14:02.255907: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 12:14:02.712898: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 12:14:02.712937: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 12:14:02.712946: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 12:14:02.713547: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 11007 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.\n",
+      "2023-01-31 12:14:04.312064: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.\n",
+      "2023-01-31 12:14:05.110529: I tensorflow/core/grappler/devices.cc:69] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1\n",
+      "2023-01-31 12:14:05.110622: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session\n",
+      "2023-01-31 12:14:05.111243: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 12:14:05.111519: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 12:14:05.111551: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 12:14:05.111562: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 12:14:05.111570: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 12:14:05.111865: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 11007 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "2023-01-31 12:14:05.131485: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 3492140000 Hz\n",
+      "2023-01-31 12:14:05.133498: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1144] Optimization results for grappler item: graph_to_optimize\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.009ms.\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.002ms.\n",
+      "\n",
+      "2023-01-31 12:14:05.210179: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:345] Ignored output_format.\n",
+      "2023-01-31 12:14:05.210218: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:348] Ignored drop_control_dependency.\n",
+      "2023-01-31 12:14:05.215177: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:210] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n",
+      "2023-01-31 12:14:05.217453: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 12:14:05.217717: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 12:14:05.217748: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 12:14:05.217758: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 12:14:05.217766: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 12:14:05.218054: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 11007 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "2023-01-31 12:14:05.257830: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)\n",
+      "fully_quantize: 0, inference_type: 6, input_inference_type: 9, output_inference_type: 9\n",
+      "Quantized model saved to cnn_quantized.tflite.\n",
+      "Running TFLite evaluation on validation set...\n",
+      "[[371   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  0 304   3   4   3  14   7   6   9   4   4  13]\n",
+      " [  0   2 382   2   1   0   4   2   0   1   0   3]\n",
+      " [  0   7   3 356   5  11   3   0   1   1   3  16]\n",
+      " [  0   2   1   0 318   1   2   0   5  10   8   3]\n",
+      " [  0   2   0   8   1 354   1   0   0   0   4   7]\n",
+      " [  0   2   6   3   3   1 333   2   1   0   0   1]\n",
+      " [  0   7   0   0   1   0   3 349   0   2   1   0]\n",
+      " [  1   4   0   2   4   1   0   0 341   6   1   3]\n",
+      " [  0   3   1   1  24   1   4   0   6 328   3   2]\n",
+      " [  1   3   2   0  10   3   0   0   0   1 330   0]\n",
+      " [  0   5   0   8   2   8   0   1   1   3   2 342]]\n",
+      "Validation accuracy = 92.42%(N=4445)\n",
+      "Running TFLite evaluation on test set...\n",
+      "[[408   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  0 351   2   4   4   7   9   6   9   0   6  10]\n",
+      " [  0  12 392   0   1   1   9   0   0   0   2   2]\n",
+      " [  0   5   1 366   2   8   6   1   0   0   1  15]\n",
+      " [  0   8   1   2 379   3   2   2   7  10   9   2]\n",
+      " [  0   7   1  10   1 370   1   1   1   0   4  10]\n",
+      " [  0   8   7   2   4   0 387   2   0   0   2   0]\n",
+      " [  0  10   0   0   1   0   8 372   0   1   2   2]\n",
+      " [  1  12   0   0   6   4   0   1 356  11   0   5]\n",
+      " [  0   5   0   2  15   0   0   1   6 368   2   3]\n",
+      " [  0   0   0   2   4   4   0   0   0   0 399   2]\n",
+      " [  0   5   0  12   4  15   4   1   1   1   4 355]]\n",
+      "Test accuracy = 92.09%(N=4890)\n"
+     ]
+    }
+   ],
+   "source": [
+    "!bash ./recreate_model.sh"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Running this script will use the pre-trained checkpoint files supplied in the ```./model_archive/model_source/weights``` folder to generate the TFLite files and perform evaluation on the test set. Both an fp32 version and a quantized version will be produced. The quantized version will use post-training quantization to fully quantize it.\n",
+    "\n",
+    "If you want to run training from scratch you can do this by supplying ```--train``` when running the script. For example:\n",
+    "\n",
+    "```bash\n",
+    "bash ./recreate_model.sh --train\n",
+    "```\n",
+    "\n",
+    "Training is then performed and should produce a model to the stated accuracy in this repository. Note that exporting to TFLite will still happen with the baseline pre-trained checkpoint files, so you will need to re-run the script and this time supply the path to the new checkpoint files you want to use, for example:\n",
+    "\n",
+    "```bash\n",
+    "bash ./recreate_model.sh --ckpt <checkpoint_path>\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2.0 Training<a name=\"training\"></a>\n",
+    "\n",
+    "The training scripts can be used to recreate any of the models from the [Hello Edge paper](https://arxiv.org/pdf/1711.07128.pdf) provided the right hyperparameters are used. The training commands with all the hyperparameters to reproduce the model in this repository are given [here](recreate_model.sh). The model in this part of the repository represents just one variation of the models from the paper, other varieties are covered in other parts of the repository.\n",
+    "\n",
+    "\n",
+    "As a general example of how to train a DNN with 3 fully-connected layers with 128 neurons in each layer, run:\n",
+    "```\n",
+    "python train.py --model_architecture dnn --model_size_info 128 128 128\n",
+    "```\n",
+    "\n",
+    "The command line argument *--model_size_info* is used to pass the neural network layer\n",
+    "dimensions such as number of layers, convolution filter size/stride as a list to models.py,\n",
+    "which builds the TensorFlow graph based on the provided model architecture\n",
+    "and layer dimensions. For more info on *model_size_info* for each network architecture see\n",
+    "[models.py](model_core_utils/models.py).\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3.0 Testing<a name=\"testing\"></a>\n",
+    "To run inference on the trained model from a checkpoint and get accuracy on validation and test sets, run:\n",
+    "```\n",
+    "python evaluation.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>\n",
+    "```\n",
+    "**The model and feature extraction parameters passed to this script should match those used in the Training step.**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4.0 Optimization<a name=\"optimization\"></a>\n",
+    "\n",
+    "We introduce an *optional* step to optimize the trained keyword spotting model for deployment.\n",
+    "\n",
+    "Here we use TensorFlow's [weight clustering API](https://www.tensorflow.org/model_optimization/guide/clustering) to reduce the compressed model size and optimize inference on supported hardware. 32 weight clusters and kmeans++ cluster intialization method are used as the clustering hyperparameters.\n",
+    "\n",
+    "To optimize your trained model (e.g. a DNN), a trained model checkpoint is needed to run clustering and fine-tuning on.\n",
+    "You can use the pre-trained checkpoints provided, or train your own model and use the resulting checkpoint.\n",
+    "\n",
+    "To apply the optimization and fine-tuning, run the following command:\n",
+    "```\n",
+    "python optimisations.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>\n",
+    "```\n",
+    "**The model and feature extraction parameters used here should match those used in the Training step, except for the number of training steps.\n",
+    "The number of training steps is reduced since the optimization step only requires fine-tuning.**\n",
+    "\n",
+    "This will generate a clustered model checkpoint that can be used in the quantization step to generate a quantized and clustered TFLite model."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5.0 Quantization and TFLite Conversion<a name=\"tflite_conversion\"></a>\n",
+    "\n",
+    "You can now use TensorFlow's\n",
+    "[post training quantization](https://www.tensorflow.org/lite/performance/post_training_quantization) to\n",
+    "make quantization of the trained models super simple.\n",
+    "\n",
+    "To quantize your trained model (e.g. a DNN) run:\n",
+    "```\n",
+    "python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> [--inference_type int8|int16]\n",
+    "```\n",
+    "**The model and feature extraction parameters used here should match those used in the Training step.**\n",
+    "\n",
+    "The ```inference_type``` parameter is *optional* and to be used if a fully quantized model with inputs and outputs of type int8 or int16 is needed. It defaults to fp32.\n",
+    "\n",
+    "In this example, this step will produce a quantized TFLite file *dnn_quantized.tflite*."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can test the accuracy of this quantized model on the test set by running:\n",
+    "```\n",
+    "python evaluation.py --tflite_path dnn_quantized.tflite\n",
+    "```\n",
+    "**The model and feature extraction parameters used here should match those used in the Training step.**\n",
+    "\n",
+    "`convert_to_tflite.py` uses post-training quantization to generate a quantized model by default. If you wish to convert to a floating point TFLite model, use the command below:\n",
+    "\n",
+    "```\n",
+    "python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> --no-quantize\n",
+    "```\n",
+    "\n",
+    "This will produce a floating point TFLite file *dnn.tflite*. You can test the accuracy of this floating point model using `evaluation.py` as above.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 6.0 Single inference of the TFLite model files <a name=\"tflite_inference\"></a>\n",
+    "\n",
+    "You can conduct TFLite inference for .fp32 and .int8 model files by using the following command: \n",
+    "\n",
+    "```python cnn_l_inference_tflite.py --labels validation_utils/labels.txt --wav <path_to_wav_file> --tflite_path <path_to_tflite_file>```\n",
+    "\n",
+    "**The feature extraction parameters used here should match those used in the Training step.**\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/README.md b/models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/README.md
new file mode 100644
index 0000000..fdb2fcc
--- /dev/null
+++ b/models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/README.md
@@ -0,0 +1,62 @@
+# keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32
+
+## Description
+This is a floating point fp32 version of the CNN Large model developed by Arm, from the Hello Edge paper.
+
+## License
+[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
+
+## Network Information
+| Network Information | Value |
+|---------------------|-------|
+|  Framework          | TensorFlow Lite |
+|  Datatype           | fp32 |
+|  SHA-1 Hash         | e77e0f185dd6b7b9adcb9d867279a6c0a0ecbf02 |
+|  Size (Bytes)       | 1908316 |
+|  Provenance         | https://arxiv.org/abs/1711.07128 |
+|  Training           | Trained by Arm |
+|  Paper | https://arxiv.org/abs/1711.07128 |
+
+## DataSet
+| Dataset Information | Value |
+|--------|-------|
+| Name | Google Speech Commands test set |
+
+## Accuracy
+
+| Metric | Value |
+|--------|-------|
+| accuracy | 93.44% |
+
+## HW Support
+| HW Support   | Value |
+|--------------|-------|
+| Cortex-A |:heavy_check_mark:         |
+| Cortex-M |:heavy_check_mark:         |
+| Mali GPU |:heavy_check_mark:         |
+| Ethos U  |:heavy_multiplication_x:          |
+
+### Key
+* :heavy_check_mark: - Will run on this platform.
+* :heavy_multiplication_x: - Will not run on this platform.
+
+## Network Quality
+| Network Quality         | Value |
+|-------------------------|-------|
+|  Recreate               | :heavy_check_mark:    |
+|  Quality level          | Deployable    |
+|  Vanilla                | :heavy_check_mark:    |
+|  Clustered              | :heavy_multiplication_x:    |
+|  Pruned                 | :heavy_multiplication_x:    |
+|  Quantization - default | :heavy_multiplication_x:    |
+|  Quantization - full    | :heavy_multiplication_x:    |
+
+## Network Inputs
+| Input Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| input | (1, 490) | fp32 | models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input | fp32 | [1, 490] | The input is a processed MFCCs |
+
+## Network Outputs
+| Output Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| Identity | (1, 12) | fp32 | models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity | fp32 | [1, 12] | The probability on 12 keywords |
\ No newline at end of file
diff --git a/models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/cnn_l.tflite b/models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/cnn_l.tflite
new file mode 100644
index 0000000..cab79f2
--- /dev/null
+++ b/models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/cnn_l.tflite
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1a82f9c75ab57bafccbe9a154454d228c9610bd66cb186a69bab4fcc9958558
+size 1908316
diff --git a/models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/definition.yaml b/models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/definition.yaml
new file mode 100644
index 0000000..9404113
--- /dev/null
+++ b/models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/definition.yaml
@@ -0,0 +1,64 @@
+benchmark:
+  benchmark_metrics:
+    accuracy: 93.44%
+  benchmark_name: Google Speech Commands test set
+description: This is a floating point fp32 version of the CNN Large model developed
+  by Arm, from the Hello Edge paper.
+license:
+- Apache-2.0
+network:
+  datatype: fp32
+  file_size_bytes: 1908316
+  filename: cnn_l.tflite
+  framework: TensorFlow Lite
+  hash:
+    algorithm: sha1
+    value: e77e0f185dd6b7b9adcb9d867279a6c0a0ecbf02
+  provenance: https://arxiv.org/abs/1711.07128
+  training: Trained by Arm
+network_parameters:
+  input_nodes:
+  - description: The input is a processed MFCCs of shape (1, 490)
+    example_input:
+      path: models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input
+      shape:
+      - 1
+      - 490
+      type: fp32
+      use_case: Random input for model regression.
+    input_datatype: fp32
+    name: input
+    shape:
+    - 1
+    - 490
+  output_nodes:
+  - description: The probability on 12 keywords.
+    example_output:
+      path: models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity
+      shape:
+      - 1
+      - 12
+      type: fp32
+      use_case: output for model regression.
+    name: Identity
+    output_datatype: fp32
+    shape:
+    - 1
+    - 12
+network_quality:
+  clustered: false
+  is_vanilla: true
+  pruned: false
+  quality_level: Deployable
+  quality_level_hero_hw: cortex_m
+  quantization_default: false
+  quantization_full: false
+  recreate: true
+operators:
+  TensorFlow Lite:
+  - CONV_2D
+  - FULLY_CONNECTED
+  - RELU
+  - RESHAPE
+  - SOFTMAX
+paper: https://arxiv.org/abs/1711.07128
\ No newline at end of file
diff --git a/models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input/0.npy b/models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input/0.npy
new file mode 100644
index 0000000..4b93b40
--- /dev/null
+++ b/models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input/0.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c0d3177ad9e25a08e300d6dab37303348cc99cda9137a0ed98bfe4ecabb4cbe2
+size 2088
diff --git a/models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity/0.npy b/models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity/0.npy
new file mode 100644
index 0000000..cca051a
--- /dev/null
+++ b/models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity/0.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84727ee69c9018fcd7295ca5646c29a982b948ce3abd7c4a9c44c7203c699b24
+size 176
diff --git a/models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_int8/README.md b/models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_int8/README.md
new file mode 100644
index 0000000..8befb51
--- /dev/null
+++ b/models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_int8/README.md
@@ -0,0 +1,62 @@
+# keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_int8
+
+## Description
+This is a fully quantized int8 version of the CNN Large model developed by Arm, from the Hello Edge paper.
+
+## License
+[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
+
+## Network Information
+| Network Information | Value |
+|---------------------|-------|
+|  Framework          | TensorFlow Lite |
+|  Datatype           | int8 |
+|  SHA-1 Hash         | a61ab748ae8f52f78ab568342db67a792c6ecf34 |
+|  Size (Bytes)       | 484600 |
+|  Provenance         | https://arxiv.org/abs/1711.07128 |
+|  Training           | Trained by Arm |
+|  Paper | https://arxiv.org/abs/1711.07128 |
+
+## DataSet
+| Dataset Information | Value |
+|--------|-------|
+| Name | Google Speech Commands test set |
+
+## Accuracy
+
+| Metric | Value |
+|--------|-------|
+| Accuracy | 92.27% |
+
+## HW Support
+| HW Support   | Value |
+|--------------|-------|
+| Cortex-A |:heavy_check_mark:          |
+| Cortex-M |:heavy_check_mark:          |
+| Mali GPU |:heavy_check_mark:          |
+| Ethos U  |:heavy_check_mark:          |
+
+### Key
+* :heavy_check_mark: - Will run on this platform.
+* :heavy_multiplication_x: - Will not run on this platform.
+
+## Network Quality
+| Network Quality         | Value |
+|-------------------------|-------|
+|  Recreate               | :heavy_check_mark:    |
+|  Quality level          | Deployable    |
+|  Vanilla                | :heavy_check_mark:    |
+|  Clustered              | :heavy_multiplication_x:    |
+|  Pruned                 | :heavy_multiplication_x:    |
+|  Quantization - default | :heavy_multiplication_x:    |
+|  Quantization - full    | :heavy_check_mark:    |
+
+## Network Inputs
+| Input Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| input | (1, 490) | int8 | models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input | int8 | [1, 490] | The input is a processed MFCCs |
+
+## Network Outputs
+| Output Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| Identity | (1, 12) | int8 | models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity | int8 | [1, 12] | The probability on 12 keywords |
\ No newline at end of file
diff --git a/models/keyword_spotting/cnn_large/tflite_int8/cnn_l_quantized.tflite b/models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_int8/cnn_l_quantized.tflite
similarity index 100%
rename from models/keyword_spotting/cnn_large/tflite_int8/cnn_l_quantized.tflite
rename to models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_int8/cnn_l_quantized.tflite
diff --git a/models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_int8/definition.yaml b/models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_int8/definition.yaml
new file mode 100644
index 0000000..32429b1
--- /dev/null
+++ b/models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_int8/definition.yaml
@@ -0,0 +1,64 @@
+benchmark:
+  benchmark_metrics:
+    accuracy: 92.27%
+  benchmark_name: Google Speech Commands test set
+description: This is a fully quantized int8 version of the CNN Large model developed
+  by Arm, from the Hello Edge paper.
+license:
+- Apache-2.0
+network:
+  datatype: int8
+  file_size_bytes: 484600
+  filename: cnn_l_quantized.tflite
+  framework: TensorFlow Lite
+  hash:
+    algorithm: sha1
+    value: a61ab748ae8f52f78ab568342db67a792c6ecf34
+  provenance: https://arxiv.org/abs/1711.07128
+  training: Trained by Arm
+network_parameters:
+  input_nodes:
+  - description: The input is a processed MFCCs of shape (1, 490)
+    example_input:
+      path: models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input
+      shape:
+      - 1
+      - 490
+      type: int8
+      use_case: Random input for model regression.
+    input_datatype: int8
+    name: input
+    shape:
+    - 1
+    - 490
+  output_nodes:
+  - description: The probability on 12 keywords.
+    example_output:
+      path: models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity
+      shape:
+      - 1
+      - 12
+      type: int8
+      use_case: output for model regression.
+    name: Identity
+    output_datatype: int8
+    shape:
+    - 1
+    - 12
+network_quality:
+  clustered: false
+  is_vanilla: true
+  pruned: false
+  quality_level: Deployable
+  quality_level_hero_hw: cortex_m
+  quantization_default: false
+  quantization_full: true
+  recreate: true
+operators:
+  TensorFlow Lite:
+  - CONV_2D
+  - FULLY_CONNECTED
+  - RELU
+  - RESHAPE
+  - SOFTMAX
+paper: https://arxiv.org/abs/1711.07128
\ No newline at end of file
diff --git a/models/keyword_spotting/cnn_large/tflite_int8/testing_input/input/0.npy b/models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input/0.npy
similarity index 100%
rename from models/keyword_spotting/cnn_large/tflite_int8/testing_input/input/0.npy
rename to models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input/0.npy
diff --git a/models/keyword_spotting/cnn_large/tflite_int8/testing_output/Identity/0.npy b/models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity/0.npy
similarity index 100%
rename from models/keyword_spotting/cnn_large/tflite_int8/testing_output/Identity/0.npy
rename to models/keyword_spotting/cnn_large/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity/0.npy
diff --git a/models/keyword_spotting/cnn_large/model_package_tf/model_archive/model_source/saved_model/cnn_large/keras_metadata.pb b/models/keyword_spotting/cnn_large/model_package_tf/model_archive/model_source/saved_model/cnn_large/keras_metadata.pb
new file mode 100644
index 0000000..95bf328
--- /dev/null
+++ b/models/keyword_spotting/cnn_large/model_package_tf/model_archive/model_source/saved_model/cnn_large/keras_metadata.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4200839672e3d67af379cc06349ee6af8ab3b53c966562595b31473afc252c6d
+size 28876
diff --git a/models/keyword_spotting/cnn_large/model_package_tf/model_archive/model_source/saved_model/cnn_large/saved_model.pb b/models/keyword_spotting/cnn_large/model_package_tf/model_archive/model_source/saved_model/cnn_large/saved_model.pb
new file mode 100644
index 0000000..ff4b1b6
--- /dev/null
+++ b/models/keyword_spotting/cnn_large/model_package_tf/model_archive/model_source/saved_model/cnn_large/saved_model.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4d0494f8fe5b99a8b92217809d33d287f855e9281465548650037906c57912a2
+size 302218
diff --git a/models/keyword_spotting/cnn_large/model_package_tf/model_archive/model_source/saved_model/cnn_large/variables/variables.data-00000-of-00001 b/models/keyword_spotting/cnn_large/model_package_tf/model_archive/model_source/saved_model/cnn_large/variables/variables.data-00000-of-00001
new file mode 100644
index 0000000..d05f350
--- /dev/null
+++ b/models/keyword_spotting/cnn_large/model_package_tf/model_archive/model_source/saved_model/cnn_large/variables/variables.data-00000-of-00001
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4d8519182ae8e5d3dbf4762e2db5c1ac27472e95e9ef4aa0772aec6991020ffd
+size 1917320
diff --git a/models/keyword_spotting/cnn_large/model_package_tf/model_archive/model_source/saved_model/cnn_large/variables/variables.index b/models/keyword_spotting/cnn_large/model_package_tf/model_archive/model_source/saved_model/cnn_large/variables/variables.index
new file mode 100644
index 0000000..f6645fe
--- /dev/null
+++ b/models/keyword_spotting/cnn_large/model_package_tf/model_archive/model_source/saved_model/cnn_large/variables/variables.index
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:398bc377f651632cfde25ca4c1e372d04fe199868080ec162f482db3a7d8399e
+size 1478
diff --git a/models/keyword_spotting/cnn_large/tflite_int8/ckpt/checkpoint b/models/keyword_spotting/cnn_large/model_package_tf/model_archive/model_source/weights/checkpoint
similarity index 100%
rename from models/keyword_spotting/cnn_large/tflite_int8/ckpt/checkpoint
rename to models/keyword_spotting/cnn_large/model_package_tf/model_archive/model_source/weights/checkpoint
diff --git a/models/keyword_spotting/cnn_large/tflite_int8/ckpt/cnn_0.94_ckpt.data-00000-of-00001 b/models/keyword_spotting/cnn_large/model_package_tf/model_archive/model_source/weights/cnn_0.94_ckpt.data-00000-of-00001
similarity index 100%
rename from models/keyword_spotting/cnn_large/tflite_int8/ckpt/cnn_0.94_ckpt.data-00000-of-00001
rename to models/keyword_spotting/cnn_large/model_package_tf/model_archive/model_source/weights/cnn_0.94_ckpt.data-00000-of-00001
diff --git a/models/keyword_spotting/cnn_large/tflite_int8/ckpt/cnn_0.94_ckpt.index b/models/keyword_spotting/cnn_large/model_package_tf/model_archive/model_source/weights/cnn_0.94_ckpt.index
similarity index 100%
rename from models/keyword_spotting/cnn_large/tflite_int8/ckpt/cnn_0.94_ckpt.index
rename to models/keyword_spotting/cnn_large/model_package_tf/model_archive/model_source/weights/cnn_0.94_ckpt.index
diff --git a/models/keyword_spotting/cnn_large/model_package_tf/model_core_utils/__init__.py b/models/keyword_spotting/cnn_large/model_package_tf/model_core_utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/models/keyword_spotting/cnn_large/model_package_tf/model_core_utils/models.py b/models/keyword_spotting/cnn_large/model_package_tf/model_core_utils/models.py
new file mode 100644
index 0000000..1978136
--- /dev/null
+++ b/models/keyword_spotting/cnn_large/model_package_tf/model_core_utils/models.py
@@ -0,0 +1,327 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Model definitions for simple keyword spotting."""
+
+import math
+
+import tensorflow as tf
+
+
+def prepare_model_settings(label_count, sample_rate, clip_duration_ms,
+                           window_size_ms, window_stride_ms,
+                           dct_coefficient_count):
+    """Calculates common settings needed for all models.
+
+    Args:
+        label_count: How many classes are to be recognized.
+        sample_rate: Number of audio samples per second.
+        clip_duration_ms: Length of each audio clip to be analyzed.
+        window_size_ms: Duration of frequency analysis window.
+        window_stride_ms: How far to move in time between frequency windows.
+        dct_coefficient_count: Number of frequency bins to use for analysis.
+
+    Returns:
+        Dictionary containing common settings.
+    """
+    desired_samples = int(sample_rate * clip_duration_ms / 1000)
+    window_size_samples = int(sample_rate * window_size_ms / 1000)
+    window_stride_samples = int(sample_rate * window_stride_ms / 1000)
+    length_minus_window = (desired_samples - window_size_samples)
+    if length_minus_window < 0:
+        spectrogram_length = 0
+    else:
+        spectrogram_length = 1 + int(length_minus_window / window_stride_samples)
+    fingerprint_size = dct_coefficient_count * spectrogram_length
+
+    return {
+        'desired_samples': desired_samples,
+        'window_size_samples': window_size_samples,
+        'window_stride_samples': window_stride_samples,
+        'spectrogram_length': spectrogram_length,
+        'dct_coefficient_count': dct_coefficient_count,
+        'fingerprint_size': fingerprint_size,
+        'label_count': label_count,
+        'sample_rate': sample_rate,
+    }
+
+
+def create_model(model_settings, model_architecture, model_size_info, is_training):
+    """Builds a tf.keras model of the requested architecture compatible with the settings.
+
+    Args:
+        model_settings: Dictionary of information about the model.
+        model_architecture: String specifying which kind of model to create.
+        model_size_info: Array with specific information for the chosen architecture
+            (e.g convolutional parameters, number of layers).
+
+    Returns:
+        A tf.keras Model with the requested architecture.
+
+    Raises:
+        Exception: If the architecture type isn't recognized.
+    """
+
+    if model_architecture == 'dnn':
+        return create_dnn_model(model_settings, model_size_info)
+
+    elif model_architecture == 'cnn':
+        return create_cnn_model(model_settings, model_size_info)
+
+    elif model_architecture == 'ds_cnn':
+        return create_ds_cnn_model(model_settings, model_size_info)
+    elif model_architecture == 'single_fc':
+        return create_single_fc_model(model_settings)
+    elif model_architecture == 'basic_lstm':
+        return create_basic_lstm_model(model_settings, model_size_info, is_training)
+    else:
+        raise Exception(f'model_architecture argument {model_architecture} not recognized'
+                        f', should be one of, "dnn", "cnn", "ds_cnn" ')
+
+
+def create_single_fc_model(model_settings):
+    """Builds a model with a single fully-connected layer.
+
+    For details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+
+    Returns:
+        tf.keras Model of the 'SINGLE_FC' architecture.
+    """
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size'],), name='input')
+    # Fully connected layer
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(inputs)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_basic_lstm_model(model_settings, model_size_info, is_training):
+    """Builds a model with a basic lstm layer.
+
+        For details see https://arxiv.org/abs/1711.07128.
+
+        Args:
+            model_settings: Dict of different settings for model training.
+            model_size_info: Length of the array defines the number of hidden-layers and
+                each element in the array represent the number of neurons in that layer.
+            is_training: Determining whether the use of the model is for training or for something else.
+
+        Returns:
+            tf.keras Model of the 'Basic_LSTM' architecture.
+        """
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size'], ), name='input')
+
+    input_frequency_size = model_settings['dct_coefficient_count']
+    input_time_size = model_settings['spectrogram_length']
+    x = tf.reshape(inputs, shape=(-1, input_time_size, input_frequency_size))
+
+    # LSTM layer, and unrolling depending on whether you are training or not
+    if is_training:
+        x = tf.keras.layers.LSTM(units=model_size_info[0], time_major=False, unroll=False)(x)
+    else:
+        x = tf.keras.layers.LSTM(units=model_size_info[0], time_major=False, unroll=True)(x)
+
+    # Outputs a fully connected layer
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_dnn_model(model_settings, model_size_info):
+    """Builds a model with multiple hidden fully-connected layers.
+
+    For details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+        model_size_info: Length of the array defines the number of hidden-layers and
+            each element in the array represent the number of neurons in that layer.
+
+    Returns:
+        tf.keras Model of the 'DNN' architecture.
+    """
+
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size'], ), name='input')
+
+    # First fully connected layer.
+    x = tf.keras.layers.Dense(units=model_size_info[0], activation='relu')(inputs)
+
+    # Hidden layers with ReLU activations.
+    for i in range(1, len(model_size_info)):
+        x = tf.keras.layers.Dense(units=model_size_info[i], activation='relu')(x)
+
+    # Output fully connected layer.
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_cnn_model(model_settings, model_size_info):
+    """Builds a model with 2 convolution layers followed by a linear layer and a hidden fully-connected layer.
+
+    For details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+        model_size_info: Defines the first and second convolution parameters in
+            {number of conv features, conv filter height, width, stride in y,x dir.},
+            followed by linear layer size and fully-connected layer size.
+
+    Returns:
+        tf.keras Model of the 'CNN' architecture.
+    """
+
+    input_frequency_size = model_settings['dct_coefficient_count']
+    input_time_size = model_settings['spectrogram_length']
+
+    first_filter_count = model_size_info[0]
+    first_filter_height = model_size_info[1]  # Time axis.
+    first_filter_width = model_size_info[2]  # Frequency axis.
+    first_filter_stride_y = model_size_info[3]  # Time axis.
+    first_filter_stride_x = model_size_info[4]  # Frequency_axis.
+
+    second_filter_count = model_size_info[5]
+    second_filter_height = model_size_info[6]  # Time axis.
+    second_filter_width = model_size_info[7]  # Frequency axis.
+    second_filter_stride_y = model_size_info[8]  # Time axis.
+    second_filter_stride_x = model_size_info[9]  # Frequency axis.
+
+    linear_layer_size = model_size_info[10]
+    fc_size = model_size_info[11]
+
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size']), name='input')
+
+    # Reshape the flattened input.
+    x = tf.reshape(inputs, shape=(-1, input_time_size, input_frequency_size, 1))
+
+    # First convolution.
+    x = tf.keras.layers.Conv2D(filters=first_filter_count,
+                               kernel_size=(first_filter_height, first_filter_width),
+                               strides=(first_filter_stride_y, first_filter_stride_x),
+                               padding='VALID')(x)
+    x = tf.keras.layers.BatchNormalization()(x)
+    x = tf.keras.layers.ReLU()(x)
+    x = tf.keras.layers.Dropout(rate=0)(x)
+
+    # Second convolution.
+    x = tf.keras.layers.Conv2D(filters=second_filter_count,
+                               kernel_size=(second_filter_height, second_filter_width),
+                               strides=(second_filter_stride_y, second_filter_stride_x),
+                               padding='VALID')(x)
+    x = tf.keras.layers.BatchNormalization()(x)
+    x = tf.keras.layers.ReLU()(x)
+    x = tf.keras.layers.Dropout(rate=0)(x)
+
+    # Flatten for fully connected layers.
+    x = tf.keras.layers.Flatten()(x)
+
+    # Fully connected layer with no activation.
+    x = tf.keras.layers.Dense(units=linear_layer_size)(x)
+
+    # Fully connected layer with ReLU activation.
+    x = tf.keras.layers.Dense(units=fc_size)(x)
+    x = tf.keras.layers.BatchNormalization()(x)
+    x = tf.keras.layers.ReLU()(x)
+    x = tf.keras.layers.Dropout(rate=0)(x)
+
+    # Output fully connected.
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_ds_cnn_model(model_settings, model_size_info):
+    """Builds a model with convolutional & depthwise separable convolutional layers.
+
+    For more details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+        model_size_info: Defines number of layers, followed by the DS-Conv layer
+            parameters in the order {number of conv features, conv filter height,
+            width and stride in y,x dir.} for each of the layers.
+
+    Returns:
+        tf.keras Model of the 'DS-CNN' architecture.
+    """
+
+    label_count = model_settings['label_count']
+    input_frequency_size = model_settings['dct_coefficient_count']
+    input_time_size = model_settings['spectrogram_length']
+
+    t_dim = input_time_size
+    f_dim = input_frequency_size
+
+    # Extract model dimensions from model_size_info.
+    num_layers = model_size_info[0]
+    conv_feat = [None]*num_layers
+    conv_kt = [None]*num_layers
+    conv_kf = [None]*num_layers
+    conv_st = [None]*num_layers
+    conv_sf = [None]*num_layers
+
+    i = 1
+    for layer_no in range(0, num_layers):
+        conv_feat[layer_no] = model_size_info[i]
+        i += 1
+        conv_kt[layer_no] = model_size_info[i]
+        i += 1
+        conv_kf[layer_no] = model_size_info[i]
+        i += 1
+        conv_st[layer_no] = model_size_info[i]
+        i += 1
+        conv_sf[layer_no] = model_size_info[i]
+        i += 1
+
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size']), name='input')
+
+    # Reshape the flattened input.
+    x = tf.reshape(inputs, shape=(-1, input_time_size, input_frequency_size, 1))
+
+    # Depthwise separable convolutions.
+    for layer_no in range(0, num_layers):
+        if layer_no == 0:
+            # First convolution.
+            x = tf.keras.layers.Conv2D(filters=conv_feat[0],
+                                       kernel_size=(conv_kt[0], conv_kf[0]),
+                                       strides=(conv_st[0], conv_sf[0]),
+                                       padding='SAME')(x)
+            x = tf.keras.layers.BatchNormalization()(x)
+            x = tf.keras.layers.ReLU()(x)
+        else:
+            # Depthwise convolution.
+            x = tf.keras.layers.DepthwiseConv2D(kernel_size=(conv_kt[layer_no], conv_kf[layer_no]),
+                                                strides=(conv_sf[layer_no], conv_st[layer_no]),
+                                                padding='SAME')(x)
+            x = tf.keras.layers.BatchNormalization()(x)
+            x = tf.keras.layers.ReLU()(x)
+
+            # Pointwise convolution.
+            x = tf.keras.layers.Conv2D(filters=conv_feat[layer_no], kernel_size=(1, 1))(x)
+            x = tf.keras.layers.BatchNormalization()(x)
+            x = tf.keras.layers.ReLU()(x)
+
+        t_dim = math.ceil(t_dim/float(conv_st[layer_no]))
+        f_dim = math.ceil(f_dim/float(conv_sf[layer_no]))
+
+    # Global average pool.
+    x = tf.keras.layers.AveragePooling2D(pool_size=(t_dim, f_dim), strides=1)(x)
+
+    # Squeeze before passing to output fully connected layer.
+    x = tf.reshape(x, shape=(-1, conv_feat[layer_no]))
+
+    # Output connected layer.
+    output = tf.keras.layers.Dense(units=label_count, activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
diff --git a/models/keyword_spotting/cnn_large/model_package_tf/optimisations.py b/models/keyword_spotting/cnn_large/model_package_tf/optimisations.py
new file mode 100644
index 0000000..16b6f4c
--- /dev/null
+++ b/models/keyword_spotting/cnn_large/model_package_tf/optimisations.py
@@ -0,0 +1,259 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for optimizing simple keyword spotting models using clustering API."""
+
+import argparse
+from pathlib import Path
+
+import tensorflow as tf
+import numpy as np
+import tensorflow_model_optimization as tfmot
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+
+
+def print_model_weight_clusters(model):
+
+    for layer in model.layers:
+        if isinstance(layer, tf.keras.layers.Wrapper):
+            weights = layer.trainable_weights
+        else:
+            weights = layer.weights
+        for weight in weights:
+            if "kernel" in weight.name:
+                unique_count = len(np.unique(weight))
+                print(
+                    f"{layer.name}/{weight.name}: {unique_count} clusters "
+                )
+
+
+def optimize():
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    # Create the model to optimize from checkpoint.
+    model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info)
+    model.load_weights(FLAGS.checkpoint).expect_partial()
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    # We decay learning rate in a constant piecewise way to help learning.
+    training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(',')))
+    learning_rates_list = list(map(float, FLAGS.learning_rate.split(',')))
+    lr_boundary_list = training_steps_list[:-1]  # Only need the values at which to change lr.
+    lr_schedule = tf.keras.optimizers.schedules.PiecewiseConstantDecay(boundaries=lr_boundary_list,
+                                                                       values=learning_rates_list)
+
+    cluster_weights = tfmot.clustering.keras.cluster_weights
+    CentroidInitialization = tfmot.clustering.keras.CentroidInitialization
+
+    clustering_params = {
+        'number_of_clusters': 32,
+        'cluster_centroids_init': CentroidInitialization.KMEANS_PLUS_PLUS}
+
+    clustered_model = cluster_weights(model, **clustering_params)
+
+    # Specify the optimizer configurations.
+    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
+    clustered_model.compile(optimizer=optimizer,
+                            loss=tf.keras.losses.SparseCategoricalCrossentropy(),
+                            metrics=['accuracy'])
+
+    train_data = audio_processor.get_data(audio_processor.Modes.TRAINING,
+                                          FLAGS.background_frequency, FLAGS.background_volume,
+                                          int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000))
+    train_data = train_data.repeat().batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION)
+    val_data = val_data.batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+
+    # We train for a max number of iterations so need to calculate how many 'epochs' this will be.
+    training_steps_max = np.sum(training_steps_list)
+    training_epoch_max = int(np.ceil(training_steps_max / FLAGS.eval_step_interval))
+
+    # Train the model with clustering applied.
+    clustered_model.fit(x=train_data,
+                        steps_per_epoch=FLAGS.eval_step_interval,
+                        epochs=training_epoch_max,
+                        validation_data=val_data)
+
+    stripped_clustered_model = tfmot.clustering.keras.strip_clustering(clustered_model)
+
+    print_model_weight_clusters(stripped_clustered_model)
+
+    # Save the clustered model weights
+    train_dir = Path(FLAGS.train_dir) / "optimized"
+    train_dir.mkdir(parents=True, exist_ok=True)
+
+    stripped_clustered_model.save_weights((train_dir /
+                                          (FLAGS.model_architecture +
+                                           "_clustered_ckpt")))
+
+    # Test the model.
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING)
+    test_data = test_data.batch(FLAGS.batch_size)
+
+    stripped_clustered_model.compile(optimizer=optimizer,
+                                     loss=tf.keras.losses.SparseCategoricalCrossentropy(),
+                                     metrics=['accuracy'])
+
+    test_loss, test_acc = stripped_clustered_model.evaluate(x=test_data)
+    print(f'Final test accuracy: {test_acc*100:.2f}%')
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--background_volume',
+        type=float,
+        default=0.1,
+        help="""\
+        How loud the background noise should be, between 0 and 1.
+        """)
+    parser.add_argument(
+        '--background_frequency',
+        type=float,
+        default=0.8,
+        help="""\
+        How many of the training samples have background noise mixed in.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--time_shift_ms',
+        type=float,
+        default=100.0,
+        help="""\
+        Range to randomly shift the training audio by in time.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--how_many_training_steps',
+        type=str,
+        default='3750,750',
+        help='How many training loops to run',)
+    parser.add_argument(
+        '--eval_step_interval',
+        type=int,
+        default=400,
+        help='How often to evaluate the training results.')
+    parser.add_argument(
+        '--learning_rate',
+        type=str,
+        default='0.001,0.0001',
+        help='How large a learning rate to use when training.')
+    parser.add_argument(
+        '--batch_size',
+        type=int,
+        default=100,
+        help='How many items to train with at once',)
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--train_dir',
+        type=str,
+        default='/tmp/speech_commands_train',
+        help='Directory to write event logs and checkpoint.')
+    parser.add_argument(
+        '--save_step_interval',
+        type=int,
+        default=100,
+        help='Save model checkpoint every save_steps.')
+    parser.add_argument(
+        '--checkpoint',
+        type=str,
+        help='Checkpoint to load the weights from before fine-tuning.')
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+
+    FLAGS, _ = parser.parse_known_args()
+    optimize()
diff --git a/models/keyword_spotting/cnn_large/model_package_tf/recreate_model.sh b/models/keyword_spotting/cnn_large/model_package_tf/recreate_model.sh
new file mode 100644
index 0000000..1ea0506
--- /dev/null
+++ b/models/keyword_spotting/cnn_large/model_package_tf/recreate_model.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+# Copyright (C) 2023 Arm Limited or its affiliates. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+ckpt_path=model_archive/model_source/weights/cnn_0.94_ckpt
+train=false
+
+# Parse command line args
+while (( $# >= 1 )); do 
+    case $1 in
+    --ckpt)
+       if [ "$2" ]; then
+           ckpt_path=$2
+           shift
+       else
+           printf 'ERROR: "--ckpt" requires a path to be supplied.\n'
+           exit 1
+       fi
+       ;;
+    --train) 
+    	train=true
+	break;;
+    *) shift;
+    esac;
+done
+
+
+# CNN Large training
+if [ "$train" = true ]
+then
+python train.py --model_architecture cnn --model_size_info 60 10 4 1 1 76 10 4 2 1 58 128 --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --learning_rate 0.0005,0.0001,0.00002 --how_many_training_steps 10000,10000,10000 --summaries_dir work/CNN/CNN_L/retrain_logs --train_dir work/CNN/CNN_L/training
+fi
+
+# Conversion to TFLite fp32
+python convert_to_tflite.py --model_architecture cnn --model_size_info 60 10 4 1 1 76 10 4 2 1 58 128 --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --checkpoint $ckpt_path --no-quantize
+
+# Conversion to TFLite int8
+python convert_to_tflite.py --model_architecture cnn --model_size_info 60 10 4 1 1 76 10 4 2 1 58 128 --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --checkpoint $ckpt_path --inference_type int8
+
diff --git a/models/keyword_spotting/cnn_large/model_package_tf/requirements.txt b/models/keyword_spotting/cnn_large/model_package_tf/requirements.txt
new file mode 100644
index 0000000..3448cff
--- /dev/null
+++ b/models/keyword_spotting/cnn_large/model_package_tf/requirements.txt
@@ -0,0 +1,3 @@
+numpy == 1.19.5
+tensorflow == 2.5.0
+tensorflow-model-optimization == 0.6.0
\ No newline at end of file
diff --git a/models/keyword_spotting/cnn_large/model_package_tf/train.py b/models/keyword_spotting/cnn_large/model_package_tf/train.py
new file mode 100644
index 0000000..8c488b3
--- /dev/null
+++ b/models/keyword_spotting/cnn_large/model_package_tf/train.py
@@ -0,0 +1,227 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for training simple keyword spotting models."""
+
+import argparse
+from pathlib import Path
+
+import tensorflow as tf
+import numpy as np
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+
+
+def train():
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    # Create the model.
+    model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info, True)
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    # We decay learning rate in a constant piecewise way to help learning.
+    training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(',')))
+    learning_rates_list = list(map(float, FLAGS.learning_rate.split(',')))
+    lr_boundary_list = training_steps_list[:-1]  # Only need the values at which to change lr.
+    lr_schedule = tf.keras.optimizers.schedules.PiecewiseConstantDecay(boundaries=lr_boundary_list,
+                                                                       values=learning_rates_list)
+
+    # Specify the optimizer configurations.
+    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
+    model.compile(optimizer=optimizer,
+                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
+                  metrics=['accuracy'])
+
+    train_data = audio_processor.get_data(audio_processor.Modes.TRAINING,
+                                          FLAGS.background_frequency, FLAGS.background_volume,
+                                          int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000))
+    train_data = train_data.repeat().batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION)
+    val_data = val_data.batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+
+    # We train for a max number of iterations so need to calculate how many 'epochs' this will be.
+    training_steps_max = np.sum(training_steps_list)
+    training_epoch_max = int(np.ceil(training_steps_max / FLAGS.eval_step_interval))
+
+    # Callbacks.
+    train_dir = Path(FLAGS.train_dir) / "best"
+    train_dir.mkdir(parents=True, exist_ok=True)
+    model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
+        filepath=(train_dir / (FLAGS.model_architecture + "_{val_accuracy:.3f}_ckpt")),
+        save_weights_only=True,
+        monitor='val_accuracy',
+        mode='max',
+        save_best_only=True)
+    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=FLAGS.summaries_dir)
+
+    # Train the model.
+    model.fit(x=train_data,
+              steps_per_epoch=FLAGS.eval_step_interval,
+              epochs=training_epoch_max,
+              validation_data=val_data,
+              callbacks=[model_checkpoint_callback, tensorboard_callback])
+
+    # Test and save the model.
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING)
+    test_data = test_data.batch(FLAGS.batch_size)
+
+    test_loss, test_acc = model.evaluate(x=test_data)
+    print(f'Final test accuracy: {test_acc*100:.2f}%')
+    model.save(f'saved_model/{FLAGS.model_architecture}')
+    model.save(f'keras/{FLAGS.model_architecture}.h5')
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--background_volume',
+        type=float,
+        default=0.1,
+        help="""\
+        How loud the background noise should be, between 0 and 1.
+        """)
+    parser.add_argument(
+        '--background_frequency',
+        type=float,
+        default=0.8,
+        help="""\
+        How many of the training samples have background noise mixed in.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--time_shift_ms',
+        type=float,
+        default=100.0,
+        help="""\
+        Range to randomly shift the training audio by in time.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--how_many_training_steps',
+        type=str,
+        default='15000,3000',
+        help='How many training loops to run',)
+    parser.add_argument(
+        '--eval_step_interval',
+        type=int,
+        default=400,
+        help='How often to evaluate the training results.')
+    parser.add_argument(
+        '--learning_rate',
+        type=str,
+        default='0.001,0.0001',
+        help='How large a learning rate to use when training.')
+    parser.add_argument(
+        '--batch_size',
+        type=int,
+        default=100,
+        help='How many items to train with at once',)
+    parser.add_argument(
+        '--summaries_dir',
+        type=str,
+        default='/tmp/retrain_logs',
+        help='Where to save summary logs for TensorBoard.')
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--train_dir',
+        type=str,
+        default='/tmp/speech_commands_train',
+        help='Directory to write event logs and checkpoint.')
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+
+    FLAGS, _ = parser.parse_known_args()
+    train()
diff --git a/models/keyword_spotting/cnn_large/model_package_tf/validation_utils/labels.txt b/models/keyword_spotting/cnn_large/model_package_tf/validation_utils/labels.txt
new file mode 100644
index 0000000..ba41645
--- /dev/null
+++ b/models/keyword_spotting/cnn_large/model_package_tf/validation_utils/labels.txt
@@ -0,0 +1,12 @@
+_silence_
+_unknown_
+yes
+no
+up
+down
+left
+right
+on
+off
+stop
+go
\ No newline at end of file
diff --git a/models/keyword_spotting/cnn_large/tflite_int8/README.md b/models/keyword_spotting/cnn_large/tflite_int8/README.md
deleted file mode 100644
index 479133f..0000000
--- a/models/keyword_spotting/cnn_large/tflite_int8/README.md
+++ /dev/null
@@ -1,58 +0,0 @@
-# CNN Large INT8
-
-## Description
-This is a fully quantized version (asymmetrical int8) of the CNN Large model developed by Arm, with training checkpoints, from the Hello Edge paper. Code to recreate this model can be found here: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m
-
-## License
-[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
-
-## Related Materials
-### Class Labels
-The class labels associated with this model can be downloaded by running the script `get_class_labels.sh`.
-
-### Model Recreation Code
-Code to recreate this model can be found here: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m.
-
-## Network Information
-| Network Information |  Value         |
-|---------------------|------------------|
-|  Framework          | TensorFlow Lite |
-|  SHA-1 Hash         | a61ab748ae8f52f78ab568342db67a792c6ecf34 |
-|  Size (Bytes)       | 484600 |
-|  Provenance         | https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m |
-|  Paper              | https://arxiv.org/abs/1711.07128 |
-
-## Accuracy
-Dataset: Google Speech Commands Test Set
-
-| Metric | Value |
-|--------|-------|
-| Accuracy | 0.931 |
-
-## Performance
-| Platform | Optimized |
-|----------|:---------:|
-| Cortex-A |:heavy_check_mark:         |
-| Cortex-M |:heavy_check_mark:         |
-| Mali GPU |:heavy_check_mark:         |
-| Ethos U  |:heavy_check_mark:         |
-
-### Key
-* :heavy_check_mark: - Will run on this platform.
-* :heavy_multiplication_x: - Will not run on this platform.
-
-
-## Optimizations
-| Optimization |  Value  |
-|-----------------|---------|
-| Quantization | INT8 |
-
-## Network Inputs
-| Input Node Name |  Shape  | Description |
-|-----------------|---------|-------------|
-| input | (1, 490) | The input is a processed MFCCs of shape (1, 490) |
-
-## Network Outputs
-| Output Node Name |  Shape  | Description |
-|------------------|---------|-------------|
-| Identity | (1, 12) | The probability on 12 keywords. |
diff --git a/models/keyword_spotting/cnn_large/tflite_int8/definition.yaml b/models/keyword_spotting/cnn_large/tflite_int8/definition.yaml
deleted file mode 100644
index 63dcf0d..0000000
--- a/models/keyword_spotting/cnn_large/tflite_int8/definition.yaml
+++ /dev/null
@@ -1,43 +0,0 @@
-benchmark:
-  Google Speech Commands test set:
-    Accuracy: 93.09%
-description: 'This is a fully quantized version (asymmetrical int8) of the CNN Large
-  model developed by Arm, with training checkpoints, from the Hello Edge paper. Code
-  to recreate this model can be found here: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m'
-license:
-- Apache-2.0
-network:
-  file_size_bytes: 484600
-  filename: cnn_l_quantized.tflite
-  framework: TensorFlow Lite
-  hash:
-    algorithm: sha1
-    value: a61ab748ae8f52f78ab568342db67a792c6ecf34
-  provenance: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m
-  quality_level: null
-network_parameters:
-  input_nodes:
-  - description: The input is a processed MFCCs of shape (1, 490)
-    example_input:
-      path: models/keyword_spotting/cnn_large/tflite_int8/testing_input/input
-    name: input
-    shape:
-    - 1
-    - 490
-  output_nodes:
-  - description: The probability on 12 keywords.
-    name: Identity
-    shape:
-    - 1
-    - 12
-    test_output_path: models/keyword_spotting/cnn_large/tflite_int8/testing_output/Identity
-operators:
-  TensorFlow Lite:
-  - CONV_2D
-  - DEQUANTIZE
-  - FULLY_CONNECTED
-  - QUANTIZE
-  - RELU
-  - RESHAPE
-  - SOFTMAX
-paper: https://arxiv.org/abs/1711.07128
diff --git a/models/keyword_spotting/cnn_large/tflite_int8/get_class_labels.sh b/models/keyword_spotting/cnn_large/tflite_int8/get_class_labels.sh
deleted file mode 100755
index e59caf5..0000000
--- a/models/keyword_spotting/cnn_large/tflite_int8/get_class_labels.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (C) 2021 Arm Limited or its affiliates. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the License); you may
-# not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an AS IS BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#!/usr/bin/env bash
-
-wget https://raw.githubusercontent.com/ARM-software/ML-KWS-for-MCU/e9cf319e9aa2ff71d433e111477dd95329fb94cb/Pretrained_models/labels.txt
-mv labels.txt labelmappings.txt
\ No newline at end of file
diff --git a/models/keyword_spotting/cnn_medium/model_package_tf/README.md b/models/keyword_spotting/cnn_medium/model_package_tf/README.md
new file mode 100644
index 0000000..bb7380f
--- /dev/null
+++ b/models/keyword_spotting/cnn_medium/model_package_tf/README.md
@@ -0,0 +1,115 @@
+# CNN Medium model package
+
+This folder contains code that will allow you to recreate the CNN Medium keyword spotting model from
+the [Hello Edge paper](https://arxiv.org/pdf/1711.07128.pdf).
+
+## License
+[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
+
+## Model Package Overview
+| Model           	|            CNN_Medium                            	             |
+|:---------------:	|:--------------------------------------------------------------:|
+| <u>**Format**</u>:          	| Keras, Saved Model, TensorFlow Lite int8, TensorFlow Lite fp32 |
+| <u>**Feature**</u>:         	|             Keyword spotting for Arm Cortex-M CPUs             |
+| <u>**Architectural Delta w.r.t. Vanilla**</u>: |                              None                              |
+| <u>**Domain**</u>:         	|                        Keyword spotting                        |
+| <u>**Package Quality**</u>: 	|                           Optimised                            |
+
+## Model Recreation
+
+In order to recreate the model you will first need to be using ```Python3.7``` and install the requirements in ```requirements.txt```.
+
+Once you have these requirements satisfied you can execute the recreation script contained within this folder, just run:
+
+```bash
+bash ./recreate_model.sh
+```
+
+Running this script will use the pre-trained checkpoint files supplied in the ```./model_archive/model_source/weights``` folder
+to generate the TFLite files and perform evaluation on the test sets. Both an fp32 version and a quantized version will be produced.
+The quantized version will use post-training quantization to fully quantize it.
+
+If you want to run training from scratch you can do this by supplying ```--train``` when running the script. For example:
+
+```bash
+bash ./recreate_model.sh --train
+```
+
+Training is then performed and should produce a model to the stated accuracy in this repository.
+Note that exporting to TFLite will still happen with the pre-trained checkpoint files so you will need to re-run the script
+and this time supply the path to the new checkpoint files you want to use, for example:
+
+```bash
+bash ./recreate_model.sh --ckpt <checkpoint_path>
+```
+
+
+## Training
+
+To train a DNN with 3 fully-connected layers with 128 neurons in each layer, run:
+
+```
+python train.py --model_architecture dnn --model_size_info 128 128 128
+```
+The command line argument *--model_size_info* is used to pass the neural network layer
+dimensions such as number of layers, convolution filter size/stride as a list to models.py,
+which builds the TensorFlow graph based on the provided model architecture
+and layer dimensions. For more info on *model_size_info* for each network architecture see
+[models.py](models.py).
+
+The training commands with all the hyperparameters to reproduce the models shown in the
+[paper](https://arxiv.org/pdf/1711.07128.pdf) are given [here](recreate_model.sh).
+
+## Testing
+To run inference on the trained model from a checkpoint and get accuracy on validation and test sets, run:
+```
+python evaluation.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>
+```
+The parameters used here should match those used in the Training step.
+
+## Optimization
+
+We introduce a new *optional* step to optimize the trained keyword spotting model for deployment.
+
+Here we use TensorFlow's [weight clustering API](https://www.tensorflow.org/model_optimization/guide/clustering) to reduce the compressed model size and optimize inference on supported hardware. 32 weight clusters and kmeans++ cluster intialization method are used as the clustering hyperparameters.
+
+To optimize your trained model (e.g. a DNN), a trained model checkpoint is needed to run clustering and fine-tuning on.
+You can use the pre-trained checkpoints provided, or train your own model and use the resulting checkpoint.
+
+To apply the optimization and fine-tuning, run the following command:
+```
+python optimisations.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>
+```
+The parameters used here should match those used in the Training step, except for the number of training steps.
+The number of training steps is reduced since the optimization step only requires fine-tuning.
+
+This will generate a clustered model checkpoint that can be used in the quantization step to generate a quantized and clustered TFLite model.
+
+## Quantization and TFLite Conversion
+
+As part of the update we now use TensorFlow's
+[post training quantization](https://www.tensorflow.org/lite/performance/post_training_quantization) to
+make quantization of the trained models super simple.
+
+To quantize your trained model (e.g. a DNN) run:
+```
+python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> [--inference_type int8|int16]
+```
+The parameters used here should match those used in the Training step.
+
+The inference_type parameter is *optional* and to be used if a fully quantized model with inputs and outputs of type int8 or int16 is needed. It defaults to fp32.
+
+This step will produce a quantized TFLite file *dnn_quantized.tflite*.
+You can test the accuracy of this quantized model on the test set by running:
+```
+python evaluation.py --tflite_path dnn_quantized.tflite
+```
+The parameters used here should match those used in the Training step.
+
+`convert_to_tflite.py` uses post-training quantization to generate a quantized model by default. If you wish to convert to a floating point TFLite model, use the command below:
+
+```
+python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> --no-quantize
+```
+
+This will produce a floating point TFLite file *dnn.tflite*. You can test the accuracy of this floating point model using `evaluation.py` as above.
diff --git a/models/keyword_spotting/cnn_medium/model_package_tf/cnn_m_inference_keras.py b/models/keyword_spotting/cnn_medium/model_package_tf/cnn_m_inference_keras.py
new file mode 100644
index 0000000..db7694a
--- /dev/null
+++ b/models/keyword_spotting/cnn_medium/model_package_tf/cnn_m_inference_keras.py
@@ -0,0 +1,76 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from data_processing.data_preprocessing import load_wav_file, calculate_mfcc
+
+import tensorflow as tf
+import argparse
+
+
+def load_labels(filename):
+    """Read in labels, one label per line."""
+    f = open(filename, "r")
+    return f.read().splitlines()
+
+
+def main():
+    window_size_samples = int(FLAGS.sample_rate * FLAGS.window_size_ms / 1000)
+    window_stride_samples = int(FLAGS.sample_rate * FLAGS.window_stride_ms / 1000)
+    decoded, sample = load_wav_file(FLAGS.wav, FLAGS.sample_rate)
+    x = calculate_mfcc(decoded, sample, window_size_samples, window_stride_samples, FLAGS.dct_coefficient_count)
+    x = tf.reshape(x, [1, -1])
+
+    model = tf.keras.models.load_model(FLAGS.keras_file_path)
+    predictions = model.predict(x)
+
+    # Sort to show labels in order of confidence
+    top_k = predictions[0].argsort()[-1:][::-1]
+    for node_id in top_k:
+        human_string = load_labels(FLAGS.labels)[int(node_id)]
+        score = predictions[0,node_id]
+        print(f'model predicted: {human_string} with score {score:.5f}')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--wav', type=str, default='', help='Audio file to be identified.')
+    parser.add_argument(
+        '--labels', type=str, default='', help='Path to file containing labels.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs', )
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint', )
+    parser.add_argument(
+        '--keras_file_path',
+        type=str,
+        default='',
+        help='Path to the .h5 Keras model file to use for testing.')
+    FLAGS, unparsed = parser.parse_known_args()
+    main()
diff --git a/models/keyword_spotting/cnn_medium/model_package_tf/cnn_m_inference_tflite.py b/models/keyword_spotting/cnn_medium/model_package_tf/cnn_m_inference_tflite.py
new file mode 100644
index 0000000..9f79d99
--- /dev/null
+++ b/models/keyword_spotting/cnn_medium/model_package_tf/cnn_m_inference_tflite.py
@@ -0,0 +1,120 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from data_processing.data_preprocessing import load_wav_file, calculate_mfcc
+
+import tensorflow as tf
+import numpy as np
+import argparse
+
+
+def tflite_inference(input_data, tflite_path):
+    """Call forwards pass of TFLite file and returns the result.
+
+    Args:
+        input_data: Input data to use on forward pass.
+        tflite_path: Path to TFLite file to run.
+
+    Returns:
+        Output from inference.
+    """
+    supported_quant_dtypes = (np.int8, np.int16)
+    interpreter = tf.lite.Interpreter(model_path=tflite_path)
+    interpreter.allocate_tensors()
+
+    input_details = interpreter.get_input_details()
+    output_details = interpreter.get_output_details()
+
+    input_dtype = input_details[0]["dtype"]
+    output_dtype = output_details[0]["dtype"]
+
+    # Check if the input/output type is quantized,
+    # set scale and zero-point accordingly
+    if input_dtype in supported_quant_dtypes:
+        input_scale, input_zero_point = input_details[0]["quantization"]
+    else:
+        input_scale, input_zero_point = 1, 0
+
+    input_data = input_data / input_scale + input_zero_point
+    input_data = np.round(input_data) if input_dtype in supported_quant_dtypes else input_data
+
+    if output_dtype in supported_quant_dtypes:
+        output_scale, output_zero_point = output_details[0]["quantization"]
+    else:
+        output_scale, output_zero_point = 1, 0
+
+    interpreter.set_tensor(input_details[0]['index'], tf.cast(input_data, input_dtype))
+    interpreter.invoke()
+
+    output_data = interpreter.get_tensor(output_details[0]['index'])
+
+    output_data = output_scale * (output_data.astype(np.float32) - output_zero_point)
+
+    return output_data
+
+
+def load_labels(filename):
+    """Read in labels, one label per line."""
+    f = open(filename, "r")
+    return f.read().splitlines()
+
+
+def main():
+    window_size_samples = int(FLAGS.sample_rate * FLAGS.window_size_ms / 1000)
+    window_stride_samples = int(FLAGS.sample_rate * FLAGS.window_stride_ms / 1000)
+    decoded, sample = load_wav_file(FLAGS.wav, FLAGS.sample_rate)
+    x = calculate_mfcc(decoded, sample, window_size_samples, window_stride_samples, FLAGS.dct_coefficient_count)
+    x = tf.reshape(x, [1, -1])
+    predictions = tflite_inference(x, FLAGS.tflite_path)
+
+    # Sort to show labels in order of confidence
+    top_k = predictions[0].argsort()[-1:][::-1]
+    for node_id in top_k:
+        human_string = load_labels(FLAGS.labels)[int(node_id)]
+        score = predictions[0,node_id]
+        print(f'model predicted: {human_string} with score {score:.5f}')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--wav', type=str, default='', help='Audio file to be identified.')
+    parser.add_argument(
+        '--labels', type=str, default='', help='Path to file containing labels.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs', )
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint', )
+    parser.add_argument(
+        '--tflite_path',
+        type=str,
+        default='',
+        help='Path to TFLite file to use for testing.')
+    FLAGS, unparsed = parser.parse_known_args()
+    main()
diff --git a/models/keyword_spotting/cnn_medium/model_package_tf/convert_to_tflite.py b/models/keyword_spotting/cnn_medium/model_package_tf/convert_to_tflite.py
new file mode 100644
index 0000000..64ab8df
--- /dev/null
+++ b/models/keyword_spotting/cnn_medium/model_package_tf/convert_to_tflite.py
@@ -0,0 +1,234 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for converting and quantizing a trained keyword spotting
+   model and saving to TFLite."""
+
+import argparse
+
+import tensorflow as tf
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+from evaluation import tflite_test
+
+NUM_REP_DATA_SAMPLES = 100  # How many samples to use for post training quantization.
+
+
+def convert(model_settings, audio_processor, checkpoint, quantize, inference_type, tflite_path):
+    """Load our trained floating point model and convert it.
+
+    TFLite conversion or post training quantization is performed and the
+    resulting model is saved as a TFLite file.
+    We use samples from the validation set to do post training quantization.
+
+    Args:
+        model_settings: Dictionary of common model settings.
+        audio_processor: Audio processor class object.
+        checkpoint: Path to training checkpoint to load.
+        quantize: Whether to quantize the model or convert to fp32 TFLite model.
+        inference_type: Input/output type of the quantized model.
+        tflite_path: Output TFLite file save path.
+    """
+    model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info, False)
+    model.load_weights(checkpoint).expect_partial()
+
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION).batch(1)
+
+    def _rep_dataset():
+        """Generator function to produce representative dataset."""
+        i = 0
+        for mfcc, label in val_data:
+            if i > NUM_REP_DATA_SAMPLES:
+                break
+            i += 1
+            yield [mfcc]
+
+    if quantize:
+        # Quantize model and save to disk.
+        tflite_model = post_training_quantize(model, inference_type, _rep_dataset)
+        with open(tflite_path, 'wb') as f:
+            f.write(tflite_model)
+        print(f'Quantized model saved to {tflite_path}.')
+    else:
+        converter = tf.lite.TFLiteConverter.from_keras_model(model)
+        tflite_model = converter.convert()
+        with open(tflite_path, 'wb') as f:
+            f.write(tflite_model)
+        print(f'Converted model saved to {tflite_path}.')
+
+
+def post_training_quantize(keras_model, inference_type, rep_dataset):
+    """Perform post training quantization and returns the TFLite model ready for saving.
+
+    See https://www.tensorflow.org/lite/performance/post_training_quantization#full_integer_quantization for
+    more details.
+
+    Args:
+        keras_model: The trained tf Keras model used for post training quantization.
+        inference_type: Input/output type of the quantized model.
+        rep_dataset: Function to use as a representative dataset, must be callable.
+
+    Returns:
+        Quantized TFLite model ready for saving to disk.
+    """
+    converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
+    converter.optimizations = [tf.lite.Optimize.DEFAULT]
+
+    if inference_type == 'int8':
+        converter.inference_input_type = tf.int8
+        converter.inference_output_type = tf.int8
+        supported_ops = tf.lite.OpsSet.TFLITE_BUILTINS_INT8
+    if inference_type == 'int16':
+        converter.inference_input_type = tf.int16
+        converter.inference_output_type = tf.int16
+        supported_ops = tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8
+
+    # Int8 post training quantization needs representative dataset.
+    converter.representative_dataset = rep_dataset
+    converter.target_spec.supported_ops = [supported_ops]
+
+    tflite_model = converter.convert()
+
+    return tflite_model
+
+
+def main():
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    if FLAGS.quantize:
+        tflite_path = f'{FLAGS.model_architecture}_quantized.tflite'
+    else:
+        tflite_path = f'{FLAGS.model_architecture}.tflite'
+
+    # Load floating point model from checkpoint and convert it.
+    convert(model_settings, audio_processor, FLAGS.checkpoint,
+            FLAGS.quantize, FLAGS.inference_type, tflite_path)
+
+    # Test the newly converted model on the test set.
+    tflite_test(model_settings, audio_processor, tflite_path)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+    parser.add_argument(
+        '--checkpoint',
+        type=str,
+        help='Checkpoint to load the weights from.')
+    parser.add_argument(
+        '--quantize',
+        dest='quantize',
+        action="store_true",
+        default=True,
+        help='Whether to quantize the model or convert to fp32 TFLite model. Defaults to True.')
+    parser.add_argument(
+        '--no-quantize',
+        dest='quantize',
+        action="store_false",
+        help='Whether to quantize the model or convert to fp32 TFLite model. Defaults to True.')
+    parser.add_argument(
+        '--inference_type',
+        type=str,
+        default='fp32',
+        help='If quantize is true, whether the model input and output is float32, int8 or int16')
+
+    FLAGS, _ = parser.parse_known_args()
+    main()
diff --git a/models/keyword_spotting/cnn_medium/model_package_tf/data_processing/__init__.py b/models/keyword_spotting/cnn_medium/model_package_tf/data_processing/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/models/keyword_spotting/cnn_medium/model_package_tf/data_processing/data_preprocessing.py b/models/keyword_spotting/cnn_medium/model_package_tf/data_processing/data_preprocessing.py
new file mode 100644
index 0000000..05cf5ba
--- /dev/null
+++ b/models/keyword_spotting/cnn_medium/model_package_tf/data_processing/data_preprocessing.py
@@ -0,0 +1,462 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Modifications Copyright 2023 Arm Inc. All Rights Reserved.
+# Modified to use TensorFlow 2.0 and data pipelines.
+#
+"""Functions for loading and preparing data for keyword spotting."""
+
+import os
+import re
+import sys
+import urllib
+from pathlib import Path
+import tarfile
+import hashlib
+import random
+import math
+from enum import Enum
+
+import numpy as np
+import tensorflow as tf
+from tensorflow.python.ops import gen_audio_ops as audio_ops
+
+MAX_NUM_WAVS_PER_CLASS = 2**27 - 1  # ~134M
+RANDOM_SEED = 59185
+BACKGROUND_NOISE_DIR_NAME = '_background_noise_'
+SILENCE_LABEL = '_silence_'
+SILENCE_INDEX = 0
+UNKNOWN_WORD_INDEX = 1
+UNKNOWN_WORD_LABEL = '_unknown_'
+
+
+def load_wav_file(wav_filename, desired_samples):
+    """Loads and then decodes a given 16bit PCM wav file.
+
+    Decoded audio is scaled to the range [-1, 1] and padded or cropped to the desired number of samples.
+
+    Args:
+        wav_filename: 16bit PCM wav file to load.
+        desired_samples: Number of samples wanted from the audio file.
+
+    Returns:
+        Tuple consisting of the decoded audio and sample rate.
+    """
+    wav_file = tf.io.read_file(wav_filename)
+    decoded_wav = audio_ops.decode_wav(wav_file, desired_channels=1, desired_samples=desired_samples)
+
+    return decoded_wav.audio, decoded_wav.sample_rate
+
+
+def calculate_mfcc(audio_signal, audio_sample_rate, window_size, window_stride, num_mfcc):
+    """Returns Mel Frequency Cepstral Coefficients (MFCC) for a given audio signal.
+
+    Args:
+        audio_signal: Raw audio signal in range [-1, 1]
+        audio_sample_rate: Audio signal sample rate
+        window_size: Window size in samples for calculating spectrogram
+        window_stride: Window stride in samples for calculating spectrogram
+        num_mfcc: The number of MFCC features wanted.
+
+    Returns:
+        Calculated mffc features.
+    """
+    spectrogram = audio_ops.audio_spectrogram(input=audio_signal, window_size=window_size, stride=window_stride,
+                                              magnitude_squared=True)
+
+    mfcc_features = audio_ops.mfcc(spectrogram, audio_sample_rate, dct_coefficient_count=num_mfcc)
+
+    return mfcc_features
+
+
+def which_set(filename, validation_percentage, testing_percentage):
+    """Determines which data partition the file should belong to.
+
+    We want to keep files in the same training, validation, or testing sets even
+    if new ones are added over time. This makes it less likely that testing
+    samples will accidentally be reused in training when long runs are restarted
+    for example. To keep this stability, a hash of the filename is taken and used
+    to determine which set it should belong to. This determination only depends on
+    the name and the set proportions, so it won't change as other files are added.
+    It's also useful to associate particular files as related (for example words
+    spoken by the same person), so anything after '_nohash_' in a filename is
+    ignored for set determination. This ensures that 'bobby_nohash_0.wav' and
+    'bobby_nohash_1.wav' are always in the same set, for example.
+
+    Args:
+        filename: File path of the data sample.
+        validation_percentage: How much of the data set to use for validation.
+        testing_percentage: How much of the data set to use for testing.
+
+    Returns:
+        String, one of 'training', 'validation', or 'testing'.
+    """
+    base_name = os.path.basename(filename)
+    # We want to ignore anything after '_nohash_' in the file name when
+    # deciding which set to put a wav in, so the data set creator has a way of
+    # grouping wavs that are close variations of each other.
+    hash_name = re.sub(r'_nohash_.*$', '', base_name)
+    # This looks a bit magical, but we need to decide whether this file should
+    # go into the training, testing, or validation sets, and we want to keep
+    # existing files in the same set even if more files are subsequently
+    # added.
+    # To do that, we need a stable way of deciding based on just the file name
+    # itself, so we do a hash of that and then use that to generate a
+    # probability value that we use to assign it.
+    hash_name_hashed = hashlib.sha1(tf.compat.as_bytes(hash_name)).hexdigest()
+    percentage_hash = ((int(hash_name_hashed, 16) %
+                       (MAX_NUM_WAVS_PER_CLASS + 1)) *
+                       (100.0 / MAX_NUM_WAVS_PER_CLASS))
+    if percentage_hash < validation_percentage:
+        result = 'validation'
+    elif percentage_hash < (testing_percentage + validation_percentage):
+        result = 'testing'
+    else:
+        result = 'training'
+    return result
+
+
+def prepare_words_list(wanted_words):
+    """Prepends common tokens to the custom word list.
+
+    Args:
+        wanted_words: List of strings containing custom words to spot.
+
+    Returns:
+        List of words with silence and unknown tokens added.
+    """
+    return [SILENCE_LABEL, UNKNOWN_WORD_LABEL] + wanted_words
+
+
+class AudioProcessor:
+    """Handles loading, partitioning, and preparing audio training data."""
+
+    class Modes(Enum):
+        TRAINING = 1
+        VALIDATION = 2
+        TESTING = 3
+
+    def __init__(self, data_url, data_dir, silence_percentage, unknown_percentage,
+                 wanted_words, validation_percentage, testing_percentage, model_settings):
+        self.data_dir = Path(data_dir)
+        self.model_settings = model_settings
+        self.words_list = prepare_words_list(wanted_words)
+
+        self._tf_datasets = {}
+        self.background_data = None
+        self._set_size = {'training': 0, 'validation': 0, 'testing': 0}
+
+        self._download_and_extract_data(data_url, data_dir)
+        self._prepare_datasets(silence_percentage, unknown_percentage, wanted_words,
+                               validation_percentage, testing_percentage)
+        self._prepare_background_data()
+
+    def get_data(self, mode, background_frequency=0, background_volume_range=0, time_shift=0):
+        """Returns the train, validation or test set for KWS as a TF Dataset.
+
+        Args:
+            mode: The set to return, see AudioProcessor.Modes enumeration.
+            background_frequency: How many of the samples have background noise mixed in.
+            background_volume_range: How loud the background noise should be, between 0 and 1.
+            time_shift: Range to randomly shift the training audio by in time.
+
+        Returns:
+            TF dataset that will generate tuples containing an mfcc and corresponding label.
+
+        Raises:
+            ValueError: If mode is not recognised.
+        """
+        if mode == AudioProcessor.Modes.TRAINING:
+            dataset = self._tf_datasets['training']
+        elif mode == AudioProcessor.Modes.VALIDATION:
+            dataset = self._tf_datasets['validation']
+        elif mode == AudioProcessor.Modes.TESTING:
+            dataset = self._tf_datasets['testing']
+        else:
+            ValueError("Incorrect dataset type given")
+
+        use_background = (self.background_data is not None) and (mode == AudioProcessor.Modes.TRAINING)
+        dataset = dataset.map(lambda path, label: self._process_path(path, label, self.model_settings,
+                                                                     background_frequency, background_volume_range,
+                                                                     time_shift, use_background, self.background_data),
+                              num_parallel_calls=tf.data.experimental.AUTOTUNE)
+
+        return dataset
+
+    def set_size(self, mode):
+        """Get the number of samples in the requested dataset partition.
+
+        Args:
+            mode: Which partition, see AudioProcessor.Modes enumeration.
+
+        Returns:
+            Number of samples in the partition.
+
+        Raises:
+            ValueError: If mode is not recognised.
+        """
+        if mode == AudioProcessor.Modes.TRAINING:
+            return self._set_size['training']
+        elif mode == AudioProcessor.Modes.VALIDATION:
+            return self._set_size['validation']
+        elif mode == AudioProcessor.Modes.TESTING:
+            return self._set_size['testing']
+        else:
+            ValueError('Incorrect dataset type given')
+
+    @staticmethod
+    def _process_path(path, label, model_settings, background_frequency, background_volume_range, time_shift_samples,
+                      use_background, background_data):
+        """Load wav files and calculate mfcc features.
+
+        Random shifting of samples and adding in background noise is done within this function as well.
+        This function is meant to be mapped onto a TF Dataset by using a lambda function.
+
+        Args:
+            path: Path to the wav file to load.
+            label: Integer label for classifying the audio clip.
+            model_settings: Dictionary of settings for model being trained.
+            background_frequency: How many clips will have background noise, 0.0 to 1.0.
+            background_volume_range: How loud the background noise will be.
+            time_shift_samples: How much to randomly shift the clips by.
+            use_background: Add in background noise to audio clips or not.
+            background_data: Ragged tensor of loaded background noise samples.
+
+        Returns:
+            Tuple of calculated flattened mfcc and its class label.
+        """
+
+        desired_samples = model_settings['desired_samples']
+        audio, sample_rate = load_wav_file(path, desired_samples=desired_samples)
+
+        # Make our own silence audio data.
+        if label == SILENCE_INDEX:
+            audio = tf.multiply(audio, 0)
+
+        # Shift samples start position and pad any gaps with zeros.
+        if time_shift_samples > 0:
+            time_shift_amount = tf.random.uniform(shape=(), minval=-time_shift_samples, maxval=time_shift_samples,
+                                                  dtype=tf.int32)
+        else:
+            time_shift_amount = 0
+        if time_shift_amount > 0:
+            time_shift_padding = [[time_shift_amount, 0], [0, 0]]
+            time_shift_offset = [0, 0]
+        else:
+            time_shift_padding = [[0, -time_shift_amount], [0, 0]]
+            time_shift_offset = [-time_shift_amount, 0]
+
+        padded_foreground = tf.pad(audio, time_shift_padding, mode='CONSTANT')
+        sliced_foreground = tf.slice(padded_foreground, time_shift_offset, [desired_samples, -1])
+
+        # Get a random section of background noise.
+        if use_background:
+            background_index = tf.random.uniform(shape=(), maxval=background_data.shape[0], dtype=tf.int32)
+            background_sample = background_data[background_index]
+            background_offset = tf.random.uniform(shape=(), maxval=len(background_sample)-desired_samples,
+                                                  dtype=tf.int32)
+            background_clipped = background_sample[background_offset:(background_offset + desired_samples)]
+            background_reshaped = tf.reshape(background_clipped, [desired_samples, 1])
+            if tf.random.uniform(shape=(), maxval=1) < background_frequency:
+                background_volume = tf.random.uniform(shape=(), maxval=background_volume_range)
+            else:
+                background_volume = tf.constant(0, dtype='float32')
+        else:
+            background_reshaped = np.zeros([desired_samples, 1], dtype=np.float32)
+            background_volume = tf.constant(0, dtype='float32')
+
+        # Mix in background noise.
+        background_mul = tf.multiply(background_reshaped, background_volume)
+        background_add = tf.add(background_mul, sliced_foreground)
+        background_clamp = tf.clip_by_value(background_add, -1.0, 1.0)
+
+        mfcc = calculate_mfcc(background_clamp, sample_rate, model_settings['window_size_samples'],
+                              model_settings['window_stride_samples'],
+                              model_settings['dct_coefficient_count'])
+        mfcc = tf.reshape(mfcc, [-1])
+
+        return mfcc, label
+
+    def _download_and_extract_data(self, data_url, target_directory):
+        """Downloads and extracts file to target directory.
+
+        If the file does not already exist download it and then untar into the target directory.
+
+        Args:
+            data_url: Web link to the tarred data to download.
+            target_directory: Directory to download and extract to.
+        """
+        target_directory = Path(target_directory)
+        target_directory.mkdir(exist_ok=True)
+
+        filename = data_url.split('/')[-1]
+        filepath = target_directory / filename
+
+        if not filepath.exists():
+            def _report_hook(block_num, block_size, total_size):
+                """Function to track download progress in urllib"""
+                read_so_far = block_num * block_size
+                percent = (read_so_far / total_size) * 100.0
+
+                s = f"\rDownloading {filename} {percent:.1f}%"
+
+                sys.stdout.write(s)
+                sys.stdout.flush()
+
+            filepath, _ = urllib.request.urlretrieve(data_url, filepath, _report_hook)
+            print()
+
+        print(f'Untarring {filename}...')
+        tarfile.open(filepath, 'r:gz').extractall(target_directory)
+
+    def _prepare_datasets(self, silence_percentage, unknown_percentage, wanted_words,
+                          validation_percentage, testing_percentage):
+        """Split the data into train, validation and testing sets.
+
+        Silence and unknown data is added, then sets are converted to TF Datasets.
+
+        Args:
+            silence_percentage: Percent of words should be silence.
+            unknown_percentage: Percent of words that should be unknown.
+            wanted_words: List of words wanted to classify.
+            validation_percentage: Percent to split off for validation.
+            testing_percentage: Percent to split off for testing.
+        """
+        # Make sure the shuffling and picking of unknowns is deterministic.
+        random.seed(RANDOM_SEED)
+        wanted_words_index = {}
+
+        for index, wanted_word in enumerate(wanted_words):
+            wanted_words_index[wanted_word] = index + 2
+
+        # Find all wav files in subfolders.
+        search_path = self.data_dir / '*' / '*.wav'
+        data_index, unknown_index, all_words = self._find_and_sort_wavs(search_path, validation_percentage,
+                                                                        testing_percentage, wanted_words_index)
+
+        for index, wanted_word in enumerate(wanted_words):
+            if wanted_word not in all_words:
+                raise Exception(f'Tried to find {wanted_word} in labels but only found: {", ".join(all_words.keys())}')
+
+        word_to_index = {}
+        for word in all_words:
+            if word in wanted_words_index:
+                word_to_index[word] = wanted_words_index[word]
+            else:
+                word_to_index[word] = UNKNOWN_WORD_INDEX
+        word_to_index[SILENCE_LABEL] = SILENCE_INDEX
+
+        # We need an arbitrary file to load as the input for the silence samples.
+        # It's multiplied by zero later, so the content doesn't matter.
+        silence_wav_path = data_index['training'][0]['file']
+        for set_index in ['validation', 'testing', 'training']:
+            set_size = len(data_index[set_index])  # Size before adding silence and unknown samples.
+            silence_size = int(math.ceil(set_size * silence_percentage / 100))
+            for _ in range(silence_size):
+                data_index[set_index].append({
+                    'label': SILENCE_LABEL,
+                    'file': silence_wav_path
+                })
+            # Pick some unknowns to add to each partition of the data set.
+            random.shuffle(unknown_index[set_index])
+            unknown_size = int(math.ceil(set_size * unknown_percentage / 100))
+            data_index[set_index].extend(unknown_index[set_index][:unknown_size])
+
+            self._set_size[set_index] = len(data_index[set_index])  # Size after adding silence and unknown samples.
+
+            # Make sure the ordering is random.
+            random.shuffle(data_index[set_index])
+
+            # Transform into TF Datasets ready for easier processing later.
+            labels, paths = list(zip(*[d.values() for d in data_index[set_index]]))
+            labels = [word_to_index[label] for label in labels]
+            self._tf_datasets[set_index] = tf.data.Dataset.from_tensor_slices((list(paths), labels))
+
+    def _find_and_sort_wavs(self, search_pattern, validation_percentage, testing_percentage, wanted_words_index):
+        """Find and sort wav files into known and unknown word sets.
+
+        Known words are files containing words in the list of wanted words.
+        Any other clip goes to the unknown label set. Labels come from the folder names.
+        All clips are also assigned to train, test and validation sets.
+
+        Args:
+            search_pattern: Path pattern used by glob to find wav files.
+            validation_percentage: Percent to split off for validation.
+            testing_percentage: Percent to split off for testing.
+            wanted_words_index: Dict mapping wanted words to their label index.
+
+        Returns:
+            3-tuple of known words, unknown words and mapping of all word labels.
+        """
+        data_index = {'validation': [], 'testing': [], 'training': []}
+        unknown_index = {'validation': [], 'testing': [], 'training': []}
+        all_words = {}
+
+        for wav_path in sorted(tf.io.gfile.glob(str(search_pattern))):
+            word = Path(wav_path).parent.name.lower()
+
+            # Treat the '_background_noise_' folder as a special case, since we expect
+            # it to contain long audio samples we mix in to improve training.
+            if word == BACKGROUND_NOISE_DIR_NAME:
+                continue
+
+            all_words[word] = True
+            set_index = which_set(wav_path, validation_percentage, testing_percentage)
+            # If it's a known class, store its detail, otherwise add it to the list
+            # we'll use to train the unknown label.
+            if word in wanted_words_index:
+                data_index[set_index].append({'label': word, 'file': wav_path})
+            else:
+                unknown_index[set_index].append({'label': word, 'file': wav_path})
+        if not all_words:
+            raise Exception('No .wavs found at ' + str(search_pattern))
+
+        return data_index, unknown_index, all_words
+
+    def _prepare_background_data(self):
+        """Searches a folder for background noise audio, and loads it into memory.
+
+        It's expected that the background audio samples will be in a subdirectory
+        named '_background_noise_' inside the 'data_dir' folder, as .wavs that match
+        the sample rate of the training data, but can be much longer in duration.
+
+        If the '_background_noise_' folder doesn't exist at all, this isn't an
+        error, it's just taken to mean that no background noise augmentation should
+        be used. If the folder does exist, but it's empty, that's treated as an
+        error.
+
+        Returns:
+          Ragged tensor of raw PCM-encoded audio samples of background noise.
+          None if '_background_noise_' folder doesnt exist.
+
+        Raises:
+          Exception: If files aren't found in the folder.
+        """
+        background_data = []
+        background_dir = Path(self.data_dir / BACKGROUND_NOISE_DIR_NAME)
+        if not background_dir.exists():
+            self.background_data = None
+            return
+
+        search_path = Path(background_dir / '*.wav')
+        for wav_path in tf.io.gfile.glob(str(search_path)):
+            wav_data, _ = load_wav_file(wav_path, desired_samples=-1)
+            background_data.append(tf.reshape(wav_data, [-1]))
+
+        if not background_data:
+            raise Exception('No background wav files were found in ' + str(search_path))
+
+        # Ragged tensor as we cant use lists in tf dataset map functions.
+        self.background_data = tf.ragged.stack(background_data)
diff --git a/models/keyword_spotting/cnn_medium/model_package_tf/evaluation.py b/models/keyword_spotting/cnn_medium/model_package_tf/evaluation.py
new file mode 100644
index 0000000..e5dcf30
--- /dev/null
+++ b/models/keyword_spotting/cnn_medium/model_package_tf/evaluation.py
@@ -0,0 +1,250 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for testing trained keyword spotting models from checkpoint files and TFLite files."""
+
+import argparse
+
+import numpy as np
+import tensorflow as tf
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+from cnn_m_inference_tflite import tflite_inference
+
+
+def tflite_test(model_settings, audio_processor, tflite_path):
+    """Calculate accuracy and confusion matrices on the validation and test sets.
+
+    A TFLite model is used for doing testing.
+
+    Args:
+        model_settings: Dictionary of common model settings.
+        audio_processor: Audio processor class object.
+        tflite_path: Path to TFLite file to use for inference.
+    """
+    # Evaluate on validation set.
+    print("Running TFLite evaluation on validation set...")
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION).batch(1)
+    expected_indices = np.concatenate([y for x, y in val_data])
+    predicted_indices = []
+
+    for mfcc, label in val_data:
+        prediction = tflite_inference(mfcc, tflite_path)
+        predicted_indices.append(np.squeeze(tf.argmax(prediction, axis=1)))
+
+    val_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+
+    print(confusion_matrix.numpy())
+    print(f'Validation accuracy = {val_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.VALIDATION)})')
+
+    # Evaluate on testing set.
+    print("Running TFLite evaluation on test set...")
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING).batch(1)
+    expected_indices = np.concatenate([y for x, y in test_data])
+    predicted_indices = []
+
+    for mfcc, label in test_data:
+        prediction = tflite_inference(mfcc, tflite_path)
+        predicted_indices.append(np.squeeze(tf.argmax(prediction, axis=1)))
+
+    test_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+
+    print(confusion_matrix.numpy())
+    print(f'Test accuracy = {test_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.TESTING)})')
+
+
+def keras_test(model_settings, audio_processor, model):
+    """Calculate accuracy and confusion matrices on the validation and test sets.
+
+    A loaded keras model is used for doing testing.
+
+    Args:
+        model_settings: Dictionary of common model settings.
+        audio_processor: Audio processor class object.
+        model: Loaded keras model.
+    """
+    # Evaluate on validation set.
+    print("Running TF evaluation on validation set...")
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION).batch(FLAGS.batch_size)
+    expected_indices = np.concatenate([y for x, y in val_data])
+
+    predictions = model.predict(val_data)
+    predicted_indices = tf.argmax(predictions, axis=1)
+
+    val_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+    print(confusion_matrix.numpy())
+    print(f'Validation accuracy = {val_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.VALIDATION)})')
+
+    # Evaluate on testing set.
+    print("Running TF evaluation on test set...")
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING).batch(FLAGS.batch_size)
+    expected_indices = np.concatenate([y for x, y in test_data])
+
+    predictions = model.predict(test_data)
+    predicted_indices = tf.argmax(predictions, axis=1)
+
+    test_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+    print(confusion_matrix.numpy())
+    print(f'Test accuracy = {test_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.TESTING)})')
+
+
+def calculate_accuracy(predicted_indices, expected_indices):
+    """Calculates and returns accuracy.
+
+    Args:
+        predicted_indices: List of predicted integer indices.
+        expected_indices: List of expected integer indices.
+
+    Returns:
+        Accuracy value between 0 and 1.
+    """
+    correct_prediction = tf.equal(predicted_indices, expected_indices)
+    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
+    return accuracy
+
+
+def evaluate():
+    """Calculate accuracy and confusion matrices on validation and test sets.
+
+    Model is created and weights loaded from supplied command line arguments.
+    """
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    if FLAGS.tflite_path:
+        tflite_test(model_settings, audio_processor, FLAGS.tflite_path)
+
+    if FLAGS.checkpoint:
+        model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info, False)
+        model.load_weights(FLAGS.checkpoint).expect_partial()
+        keras_test(model_settings, audio_processor, model)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--batch_size',
+        type=int,
+        default=100,
+        help='How many items to train with at once',)
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--checkpoint',
+        type=str,
+        help='Checkpoint to load the weights from')
+    parser.add_argument(
+        '--tflite_path',
+        type=str,
+        help='Path to TFLite file to use for evaluation')
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+
+    FLAGS, _ = parser.parse_known_args()
+    evaluate()
diff --git a/models/keyword_spotting/cnn_medium/model_package_tf/how_to_guidance.ipynb b/models/keyword_spotting/cnn_medium/model_package_tf/how_to_guidance.ipynb
new file mode 100644
index 0000000..34a8579
--- /dev/null
+++ b/models/keyword_spotting/cnn_medium/model_package_tf/how_to_guidance.ipynb
@@ -0,0 +1,428 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Copyright (C) 2023 Arm Limited or its affiliates. All rights reserved.\n",
+    "#\n",
+    "# SPDX-License-Identifier: Apache-2.0\n",
+    "#\n",
+    "# Licensed under the Apache License, Version 2.0 (the License); you may\n",
+    "# not use this file except in compliance with the License.\n",
+    "# You may obtain a copy of the License at\n",
+    "#\n",
+    "# www.apache.org/licenses/LICENSE-2.0\n",
+    "#\n",
+    "# Unless required by applicable law or agreed to in writing, software\n",
+    "# distributed under the License is distributed on an AS IS BASIS, WITHOUT\n",
+    "# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+    "# See the License for the specific language governing permissions and\n",
+    "# limitations under the License."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# CNN_Medium - Optimised\n",
+    "\n",
+    "Here we reproduce the models with our established codebase and ModelPackage approach for your convenience.\n",
+    "\n",
+    "## Model-Package Overview:\n",
+    "\n",
+    "| Model           \t| CNN_Medium                            \t|\n",
+    "|:---------------:\t|:---------------------------------------------------------------:\t|\n",
+    "| <u>**Format**</u>:          \t| Keras, Saved Model, TensorFlow Lite int8, TensorFlow Lite fp32 |\n",
+    "| <u>**Feature**</u>:         \t| Keyword spotting for Arm Cortex-M CPUs |\n",
+    "| <u>**Architectural Delta w.r.t. Vanilla**</u>: | None |\n",
+    "| <u>**Domain**</u>:         \t| Keyword spotting |\n",
+    "| <u>**Package Quality**</u>: \t| Optimised |"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Table of contents <a name=\"index_page\"></a>\n",
+    "\n",
+    "This how-to guidance presents the key steps to reproduce everything in this package. The contents are organised as below. We provided the internal navigation links for users to easy-jump among different sections.  \n",
+    "\n",
+    "    \n",
+    "* [1.0 Model recreation](#model_recreation)\n",
+    "\n",
+    "* [2.0 Training](#training)\n",
+    "\n",
+    "* [3.0 Testing](#testing)\n",
+    "\n",
+    "* [4.0 Optimization](#optimization)\n",
+    "\n",
+    "* [5.0 Quantization and TFLite conversion](#tflite_conversion)\n",
+    "\n",
+    "* [6.0 Inference the TFLite model files](#tflite_inference)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1.0 Model Recreation<a name=\"model_recreation\"></a>\n",
+    "\n",
+    "In order to recreate the model you will first need to be using ```Python3.7``` and install the requirements in ```requirements.txt```.\n",
+    "\n",
+    "Once you have these requirements satisfied you can execute the recreation script contained within this folder, just run:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-01-31 12:28:00.950084: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "Untarring speech_commands_v0.02.tar.gz...\n",
+      "2023-01-31 12:28:52.604010: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1\n",
+      "2023-01-31 12:28:52.642244: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 12:28:52.642282: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 12:28:52.661881: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublas.so.11\n",
+      "2023-01-31 12:28:52.661959: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublasLt.so.11\n",
+      "2023-01-31 12:28:52.664744: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcufft.so.10\n",
+      "2023-01-31 12:28:52.665058: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcurand.so.10\n",
+      "2023-01-31 12:28:52.665625: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusolver.so.11\n",
+      "2023-01-31 12:28:52.666342: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusparse.so.11\n",
+      "2023-01-31 12:28:52.666491: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudnn.so.8\n",
+      "2023-01-31 12:28:52.666964: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 12:28:52.667239: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2023-01-31 12:28:52.668032: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 12:28:52.668409: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 12:28:52.668474: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 12:28:53.120304: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 12:28:53.120344: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 12:28:53.120355: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 12:28:53.120872: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10987 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.\n",
+      "2023-01-31 12:28:54.678368: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.\n",
+      "2023-01-31 12:28:55.540021: I tensorflow/core/grappler/devices.cc:69] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1\n",
+      "2023-01-31 12:28:55.540187: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session\n",
+      "2023-01-31 12:28:55.540624: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 12:28:55.540870: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 12:28:55.540900: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 12:28:55.540909: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 12:28:55.540916: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 12:28:55.541191: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10987 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "2023-01-31 12:28:55.559442: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 3492140000 Hz\n",
+      "2023-01-31 12:28:55.561433: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1144] Optimization results for grappler item: graph_to_optimize\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.011ms.\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.001ms.\n",
+      "\n",
+      "2023-01-31 12:28:55.642998: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:345] Ignored output_format.\n",
+      "2023-01-31 12:28:55.643041: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:348] Ignored drop_control_dependency.\n",
+      "2023-01-31 12:28:55.647105: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:210] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n",
+      "2023-01-31 12:28:55.649478: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 12:28:55.649793: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 12:28:55.649827: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 12:28:55.649839: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 12:28:55.649846: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 12:28:55.650184: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10987 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "Converted model saved to cnn.tflite.\n",
+      "Running TFLite evaluation on validation set...\n",
+      "2023-01-31 12:28:55.708536: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)\n",
+      "[[371   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  0 307   3   8   0  15   9   4  10   5   1   9]\n",
+      " [  0   2 384   0   0   2   6   1   0   0   0   2]\n",
+      " [  1   5   3 368   1  12   3   0   1   0   2  10]\n",
+      " [  0   1   1   2 324   0   3   0   0  12   6   1]\n",
+      " [  0   3   0  12   0 352   2   1   0   1   1   5]\n",
+      " [  0   5   8   1   1   0 334   2   0   1   0   0]\n",
+      " [  0   3   0   1   1   1   1 352   1   2   0   1]\n",
+      " [  1   7   0   0   5   0   0   0 337   9   1   3]\n",
+      " [  0   7   1   0  16   0   1   0   2 342   3   1]\n",
+      " [  1   2   1   0   9   2   1   0   1   2 330   1]\n",
+      " [  0   5   0  11   1   6   1   0   2   3   3 340]]\n",
+      "Validation accuracy = 93.16%(N=4445)\n",
+      "Running TFLite evaluation on test set...\n",
+      "[[408   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  0 341   3   6   5   6   8   6  12   3   3  15]\n",
+      " [  0   6 395   2   0   2  12   0   0   0   0   2]\n",
+      " [  0   8   2 363   0  11   9   0   0   0   0  12]\n",
+      " [  0   8   0   1 386   1   1   0   3  15   8   2]\n",
+      " [  0   6   3  12   1 371   3   0   3   0   1   6]\n",
+      " [  0   4   6   1   1   1 394   3   0   0   2   0]\n",
+      " [  0  13   0   0   1   0   6 372   0   1   1   2]\n",
+      " [  1   9   0   0   4   7   1   0 356  17   1   0]\n",
+      " [  0   5   0   1  14   0   3   1   5 364   1   8]\n",
+      " [  0   0   0   0   9   3   1   0   0   1 392   5]\n",
+      " [  0   8   1  24   3   6   2   0   0   4   5 349]]\n",
+      "Test accuracy = 91.84%(N=4890)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-01-31 12:29:24.873900: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "Untarring speech_commands_v0.02.tar.gz...\n",
+      "2023-01-31 12:30:17.291981: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1\n",
+      "2023-01-31 12:30:17.332661: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 12:30:17.332698: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 12:30:17.352880: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublas.so.11\n",
+      "2023-01-31 12:30:17.352950: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublasLt.so.11\n",
+      "2023-01-31 12:30:17.355747: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcufft.so.10\n",
+      "2023-01-31 12:30:17.356015: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcurand.so.10\n",
+      "2023-01-31 12:30:17.356577: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusolver.so.11\n",
+      "2023-01-31 12:30:17.357311: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusparse.so.11\n",
+      "2023-01-31 12:30:17.357465: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudnn.so.8\n",
+      "2023-01-31 12:30:17.357965: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 12:30:17.358267: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2023-01-31 12:30:17.358989: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 12:30:17.359555: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 12:30:17.359642: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 12:30:17.803416: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 12:30:17.803457: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 12:30:17.803465: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 12:30:17.803976: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10960 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.\n",
+      "2023-01-31 12:30:19.386735: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.\n",
+      "2023-01-31 12:30:20.196203: I tensorflow/core/grappler/devices.cc:69] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1\n",
+      "2023-01-31 12:30:20.196287: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session\n",
+      "2023-01-31 12:30:20.196874: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 12:30:20.197122: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 12:30:20.197152: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 12:30:20.197161: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 12:30:20.197168: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 12:30:20.197458: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10960 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "2023-01-31 12:30:20.215456: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 3492140000 Hz\n",
+      "2023-01-31 12:30:20.218487: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1144] Optimization results for grappler item: graph_to_optimize\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.015ms.\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.003ms.\n",
+      "\n",
+      "2023-01-31 12:30:20.293490: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:345] Ignored output_format.\n",
+      "2023-01-31 12:30:20.293531: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:348] Ignored drop_control_dependency.\n",
+      "2023-01-31 12:30:20.297417: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:210] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n",
+      "2023-01-31 12:30:20.299779: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 12:30:20.300054: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 12:30:20.300091: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 12:30:20.300104: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 12:30:20.300114: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 12:30:20.300414: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10960 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "2023-01-31 12:30:20.327055: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)\n",
+      "fully_quantize: 0, inference_type: 6, input_inference_type: 9, output_inference_type: 9\n",
+      "Quantized model saved to cnn_quantized.tflite.\n",
+      "Running TFLite evaluation on validation set...\n",
+      "[[371   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  0 305   3   9   0  14  12   4   7   5   1  11]\n",
+      " [  1   3 380   0   0   1   7   3   0   0   1   1]\n",
+      " [  1  14   3 349   1  10   7   0   1   0   4  16]\n",
+      " [  0   4   1   1 310   2   3   0   0  15  13   1]\n",
+      " [  0   6   0  12   0 341   2   1   1   1   8   5]\n",
+      " [  0   5   9   1   3   0 327   3   0   0   4   0]\n",
+      " [  0   7   0   0   3   0   3 346   0   2   0   2]\n",
+      " [  1  12   0   1   2   0   0   0 333   9   1   4]\n",
+      " [  0   7   1   0  20   0   2   0   1 331   8   3]\n",
+      " [  1   2   1   0  11   2   2   0   1   2 326   2]\n",
+      " [  0   5   0  12   2   7   1   0   4   2   5 334]]\n",
+      "Validation accuracy = 91.18%(N=4445)\n",
+      "Running TFLite evaluation on test set...\n",
+      "[[408   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  1 335   5   6   6   9   9   5  12   3   2  15]\n",
+      " [  0   9 390   3   0   3  10   0   0   1   0   3]\n",
+      " [  0   8   2 358   3  10   8   1   0   0   2  13]\n",
+      " [  0  10   0   1 380   2   5   1   2  14   9   1]\n",
+      " [  0   7   3  12   2 361   3   0   3   0   4  11]\n",
+      " [  0   7   7   1   1   0 391   5   0   0   0   0]\n",
+      " [  0  14   0   0   2   1   6 367   0   2   1   3]\n",
+      " [  2  13   0   0   6   7   2   0 349  16   1   0]\n",
+      " [  0   6   0   1  13   0   5   1   4 360   3   9]\n",
+      " [  0   1   0   1   8   9   1   0   0   1 382   8]\n",
+      " [  0  10   0  29   3   8   3   2   0   5   6 336]]\n",
+      "Test accuracy = 90.33%(N=4890)\n"
+     ]
+    }
+   ],
+   "source": [
+    "!bash ./recreate_model.sh"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Running this script will use the pre-trained checkpoint files supplied in the ```./model_archive/model_source/weights``` folder to generate the TFLite files and perform evaluation on the test set. Both an fp32 version and a quantized version will be produced. The quantized version will use post-training quantization to fully quantize it.\n",
+    "\n",
+    "If you want to run training from scratch you can do this by supplying ```--train``` when running the script. For example:\n",
+    "\n",
+    "```bash\n",
+    "bash ./recreate_model.sh --train\n",
+    "```\n",
+    "\n",
+    "Training is then performed and should produce a model to the stated accuracy in this repository. Note that exporting to TFLite will still happen with the baseline pre-trained checkpoint files, so you will need to re-run the script and this time supply the path to the new checkpoint files you want to use, for example:\n",
+    "\n",
+    "```bash\n",
+    "bash ./recreate_model.sh --ckpt <checkpoint_path>\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2.0 Training<a name=\"training\"></a>\n",
+    "\n",
+    "The training scripts can be used to recreate any of the models from the [Hello Edge paper](https://arxiv.org/pdf/1711.07128.pdf) provided the right hyperparameters are used. The training commands with all the hyperparameters to reproduce the model in this repository are given [here](recreate_model.sh). The model in this part of the repository represents just one variation of the models from the paper, other varieties are covered in other parts of the repository.\n",
+    "\n",
+    "\n",
+    "As a general example of how to train a DNN with 3 fully-connected layers with 128 neurons in each layer, run:\n",
+    "```\n",
+    "python train.py --model_architecture dnn --model_size_info 128 128 128\n",
+    "```\n",
+    "\n",
+    "The command line argument *--model_size_info* is used to pass the neural network layer\n",
+    "dimensions such as number of layers, convolution filter size/stride as a list to models.py,\n",
+    "which builds the TensorFlow graph based on the provided model architecture\n",
+    "and layer dimensions. For more info on *model_size_info* for each network architecture see\n",
+    "[models.py](model_core_utils/models.py).\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3.0 Testing<a name=\"testing\"></a>\n",
+    "To run inference on the trained model from a checkpoint and get accuracy on validation and test sets, run:\n",
+    "```\n",
+    "python evaluation.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>\n",
+    "```\n",
+    "**The model and feature extraction parameters passed to this script should match those used in the Training step.**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4.0 Optimization<a name=\"optimization\"></a>\n",
+    "\n",
+    "We introduce an *optional* step to optimize the trained keyword spotting model for deployment.\n",
+    "\n",
+    "Here we use TensorFlow's [weight clustering API](https://www.tensorflow.org/model_optimization/guide/clustering) to reduce the compressed model size and optimize inference on supported hardware. 32 weight clusters and kmeans++ cluster intialization method are used as the clustering hyperparameters.\n",
+    "\n",
+    "To optimize your trained model (e.g. a DNN), a trained model checkpoint is needed to run clustering and fine-tuning on.\n",
+    "You can use the pre-trained checkpoints provided, or train your own model and use the resulting checkpoint.\n",
+    "\n",
+    "To apply the optimization and fine-tuning, run the following command:\n",
+    "```\n",
+    "python optimisations.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>\n",
+    "```\n",
+    "**The model and feature extraction parameters used here should match those used in the Training step, except for the number of training steps.\n",
+    "The number of training steps is reduced since the optimization step only requires fine-tuning.**\n",
+    "\n",
+    "This will generate a clustered model checkpoint that can be used in the quantization step to generate a quantized and clustered TFLite model."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5.0 Quantization and TFLite Conversion<a name=\"tflite_conversion\"></a>\n",
+    "\n",
+    "You can now use TensorFlow's\n",
+    "[post training quantization](https://www.tensorflow.org/lite/performance/post_training_quantization) to\n",
+    "make quantization of the trained models super simple.\n",
+    "\n",
+    "To quantize your trained model (e.g. a DNN) run:\n",
+    "```\n",
+    "python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> [--inference_type int8|int16]\n",
+    "```\n",
+    "**The model and feature extraction parameters used here should match those used in the Training step.**\n",
+    "\n",
+    "The ```inference_type``` parameter is *optional* and to be used if a fully quantized model with inputs and outputs of type int8 or int16 is needed. It defaults to fp32.\n",
+    "\n",
+    "In this example, this step will produce a quantized TFLite file *dnn_quantized.tflite*."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can test the accuracy of this quantized model on the test set by running:\n",
+    "```\n",
+    "python evaluation.py --tflite_path dnn_quantized.tflite\n",
+    "```\n",
+    "**The model and feature extraction parameters used here should match those used in the Training step.**\n",
+    "\n",
+    "`convert_to_tflite.py` uses post-training quantization to generate a quantized model by default. If you wish to convert to a floating point TFLite model, use the command below:\n",
+    "\n",
+    "```\n",
+    "python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> --no-quantize\n",
+    "```\n",
+    "\n",
+    "This will produce a floating point TFLite file *dnn.tflite*. You can test the accuracy of this floating point model using `evaluation.py` as above.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 6.0 Single inference of the TFLite model files <a name=\"tflite_inference\"></a>\n",
+    "\n",
+    "You can conduct TFLite inference for .fp32 and .int8 model files by using the following command: \n",
+    "\n",
+    "```python cnn_m_inference_tflite.py --labels validation_utils/labels.txt --wav <path_to_wav_file> --tflite_path <path_to_tflite_file>```\n",
+    "\n",
+    "**The feature extraction parameters used here should match those used in the Training step.**\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/README.md b/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/README.md
new file mode 100644
index 0000000..37debc0
--- /dev/null
+++ b/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/README.md
@@ -0,0 +1,62 @@
+# keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32
+
+## Description
+This is a floating point fp32 version of the CNN Medium model developed by Arm, from the Hello Edge paper.
+
+## License
+[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
+
+## Network Information
+| Network Information | Value |
+|---------------------|-------|
+|  Framework          | TensorFlow Lite |
+|  Datatype           | fp32 |
+|  SHA-1 Hash         | 0057378e784ccb8fa28abaa972a86988fbecea19 |
+|  Size (Bytes)       | 717268 |
+|  Provenance         | https://arxiv.org/abs/1711.07128 |
+|  Training           | Trained by Arm |
+|  Paper | https://arxiv.org/abs/1711.07128 |
+
+## DataSet
+| Dataset Information | Value |
+|--------|-------|
+| Name | Google Speech Commands test set |
+
+## Accuracy
+
+| Metric | Value |
+|--------|-------|
+| accuracy | 91.84% |
+
+## HW Support
+| HW Support   | Value |
+|--------------|-------|
+| Cortex-A |:heavy_check_mark:         |
+| Cortex-M |:heavy_check_mark:         |
+| Mali GPU |:heavy_check_mark:         |
+| Ethos U  |:heavy_multiplication_x:          |
+
+### Key
+* :heavy_check_mark: - Will run on this platform.
+* :heavy_multiplication_x: - Will not run on this platform.
+
+## Network Quality
+| Network Quality         | Value |
+|-------------------------|-------|
+|  Recreate               | :heavy_check_mark:    |
+|  Quality level          | Deployable    |
+|  Vanilla                | :heavy_check_mark:    |
+|  Clustered              | :heavy_multiplication_x:    |
+|  Pruned                 | :heavy_multiplication_x:    |
+|  Quantization - default | :heavy_multiplication_x:    |
+|  Quantization - full    | :heavy_multiplication_x:    |
+
+## Network Inputs
+| Input Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| input | (1, 490) | fp32 | models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input | fp32 | [1, 490] | The input is a processed MFCCs. |
+
+## Network Outputs
+| Output Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| Identity | (1, 12) | fp32 | models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity | fp32 | [1, 12] | The probability on 12 keywords |
\ No newline at end of file
diff --git a/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/cnn_m.tflite b/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/cnn_m.tflite
new file mode 100644
index 0000000..f928da7
--- /dev/null
+++ b/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/cnn_m.tflite
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d18705eebbb20d0ffa569266c97c839082f9a6cd37115c834661081832edc22c
+size 717268
diff --git a/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/definition.yaml b/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/definition.yaml
new file mode 100644
index 0000000..8bea635
--- /dev/null
+++ b/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/definition.yaml
@@ -0,0 +1,64 @@
+benchmark:
+  benchmark_metrics:
+    accuracy: 91.84%
+  benchmark_name: Google Speech Commands test set
+description: This is a floating point fp32 version of the CNN Medium model developed
+  by Arm, from the Hello Edge paper.
+license:
+- Apache-2.0
+network:
+  datatype: fp32
+  file_size_bytes: 717268
+  filename: cnn_m.tflite
+  framework: TensorFlow Lite
+  hash:
+    algorithm: sha1
+    value: 0057378e784ccb8fa28abaa972a86988fbecea19
+  provenance: https://arxiv.org/abs/1711.07128
+  training: Trained by Arm
+network_parameters:
+  input_nodes:
+  - description: The input is a processed MFCCs of shape (1, 490)
+    example_input:
+      path: models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input
+      shape:
+      - 1
+      - 490
+      type: fp32
+      use_case: Random input for model regression.
+    input_datatype: fp32
+    name: input
+    shape:
+    - 1
+    - 490
+  output_nodes:
+  - description: The probability on 12 keywords.
+    example_output:
+      path: models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity
+      shape:
+      - 1
+      - 12
+      type: fp32
+      use_case: output for model regression.
+    name: Identity
+    output_datatype: fp32
+    shape:
+    - 1
+    - 12
+network_quality:
+  clustered: false
+  is_vanilla: true
+  pruned: false
+  quality_level: Deployable
+  quality_level_hero_hw: cortex_m
+  quantization_default: false
+  quantization_full: false
+  recreate: true
+operators:
+  TensorFlow Lite:
+  - CONV_2D
+  - FULLY_CONNECTED
+  - RELU
+  - RESHAPE
+  - SOFTMAX
+paper: https://arxiv.org/abs/1711.07128
\ No newline at end of file
diff --git a/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input/0.npy b/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input/0.npy
new file mode 100644
index 0000000..1752993
--- /dev/null
+++ b/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input/0.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a2a935408c16cb85e8d23f9d604ea41231df1f8005c067e0a692146e7b881481
+size 2088
diff --git a/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity/0.npy b/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity/0.npy
new file mode 100644
index 0000000..c590a95
--- /dev/null
+++ b/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity/0.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:62786f0bb0878883ab48d4a76086aff8cea161ac537ea41615901378926052a8
+size 176
diff --git a/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/README.md b/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/README.md
new file mode 100644
index 0000000..6318de4
--- /dev/null
+++ b/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/README.md
@@ -0,0 +1,62 @@
+# keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8
+
+## Description
+This is a fully quantized int8 version of the CNN Medium model developed by Arm, from the Hello Edge paper.
+
+## License
+[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
+
+## Network Information
+| Network Information | Value |
+|---------------------|-------|
+|  Framework          | TensorFlow Lite |
+|  Datatype           | int8 |
+|  SHA-1 Hash         | 6bc68074d960bbb0c695e19fd96fd7903131ef60 |
+|  Size (Bytes)       | 186064 |
+|  Provenance         | https://arxiv.org/abs/1711.07128 |
+|  Training           | Trained by Arm |
+|  Paper | https://arxiv.org/abs/1711.07128 |
+
+## DataSet
+| Dataset Information | Value |
+|--------|-------|
+| Name | Google Speech Commands test set |
+
+## Accuracy
+
+| Metric | Value |
+|--------|-------|
+| Accuracy | 90.47% |
+
+## HW Support
+| HW Support   | Value |
+|--------------|-------|
+| Cortex-A |:heavy_check_mark:          |
+| Cortex-M |:heavy_check_mark:          |
+| Mali GPU |:heavy_check_mark:          |
+| Ethos U  |:heavy_check_mark:          |
+
+### Key
+* :heavy_check_mark: - Will run on this platform.
+* :heavy_multiplication_x: - Will not run on this platform.
+
+## Network Quality
+| Network Quality         | Value |
+|-------------------------|-------|
+|  Recreate               | :heavy_check_mark:    |
+|  Quality level          | Deployable    |
+|  Vanilla                | :heavy_check_mark:    |
+|  Clustered              | :heavy_multiplication_x:    |
+|  Pruned                 | :heavy_multiplication_x:    |
+|  Quantization - default | :heavy_multiplication_x:    |
+|  Quantization - full    | :heavy_check_mark:    |
+
+## Network Inputs
+| Input Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| input | (1, 490) | int8 | models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input | fp32 | [1, 490] | The input is a processed MFCCs |
+
+## Network Outputs
+| Output Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| Identity | (1, 12) | int8 | models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity | fp32 | [1, 12] | The probability on 12 keywords |
\ No newline at end of file
diff --git a/models/keyword_spotting/cnn_medium/tflite_int8/cnn_m_quantized.tflite b/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/cnn_m_quantized.tflite
similarity index 100%
rename from models/keyword_spotting/cnn_medium/tflite_int8/cnn_m_quantized.tflite
rename to models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/cnn_m_quantized.tflite
diff --git a/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/definition.yaml b/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/definition.yaml
new file mode 100644
index 0000000..10f79a7
--- /dev/null
+++ b/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/definition.yaml
@@ -0,0 +1,64 @@
+benchmark:
+  benchmark_metrics:
+    Accuracy: 90.47%
+  benchmark_name: Google Speech Commands test set
+description: This is a fully quantized int8 version of the CNN Medium model developed
+  by Arm, from the Hello Edge paper.
+license:
+- Apache-2.0
+network:
+  datatype: int8
+  file_size_bytes: 186064
+  filename: cnn_m_quantized.tflite
+  framework: TensorFlow Lite
+  hash:
+    algorithm: sha1
+    value: 6bc68074d960bbb0c695e19fd96fd7903131ef60
+  provenance: https://arxiv.org/abs/1711.07128
+  training: Trained by Arm
+network_parameters:
+  input_nodes:
+  - description: The input is a processed MFCCs of shape (1, 490)
+    example_input:
+      path: models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input
+      shape:
+      - 1
+      - 490
+      type: int8
+      use_case: Random input for model regression.
+    input_datatype: int8
+    name: input
+    shape:
+    - 1
+    - 490
+  output_nodes:
+  - description: The probability on 12 keywords.
+    example_output:
+      path: models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity
+      shape:
+      - 1
+      - 12
+      type: int8
+      use_case: output for model regression.
+    name: Identity
+    output_datatype: int8
+    shape:
+    - 1
+    - 12
+network_quality:
+  clustered: false
+  is_vanilla: true
+  pruned: false
+  quality_level: Deployable
+  quality_level_hero_hw: cortex_m
+  quantization_default: false
+  quantization_full: true
+  recreate: true
+operators:
+  TensorFlow Lite:
+  - CONV_2D
+  - FULLY_CONNECTED
+  - RELU
+  - RESHAPE
+  - SOFTMAX
+paper: https://arxiv.org/abs/1711.07128
\ No newline at end of file
diff --git a/models/keyword_spotting/cnn_medium/tflite_int8/testing_input/input/0.npy b/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input/0.npy
similarity index 100%
rename from models/keyword_spotting/cnn_medium/tflite_int8/testing_input/input/0.npy
rename to models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input/0.npy
diff --git a/models/keyword_spotting/cnn_medium/tflite_int8/testing_output/Identity/0.npy b/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity/0.npy
similarity index 100%
rename from models/keyword_spotting/cnn_medium/tflite_int8/testing_output/Identity/0.npy
rename to models/keyword_spotting/cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity/0.npy
diff --git a/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/model_source/saved_model/cnn_medium/keras_metadata.pb b/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/model_source/saved_model/cnn_medium/keras_metadata.pb
new file mode 100644
index 0000000..30ebf5e
--- /dev/null
+++ b/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/model_source/saved_model/cnn_medium/keras_metadata.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae36d2d043a0d2b71e7f5fd8eef87f627324344451706fbfa6dcdcd9fd95bd6f
+size 28876
diff --git a/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/model_source/saved_model/cnn_medium/saved_model.pb b/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/model_source/saved_model/cnn_medium/saved_model.pb
new file mode 100644
index 0000000..5d6fdbc
--- /dev/null
+++ b/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/model_source/saved_model/cnn_medium/saved_model.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0228b4fa8fed68d9bfbaa60e6f7157f91c6b4e142d0278b4141006749fc1ccd8
+size 302218
diff --git a/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/model_source/saved_model/cnn_medium/variables/variables.data-00000-of-00001 b/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/model_source/saved_model/cnn_medium/variables/variables.data-00000-of-00001
new file mode 100644
index 0000000..6a79c8b
--- /dev/null
+++ b/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/model_source/saved_model/cnn_medium/variables/variables.data-00000-of-00001
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d54b7d5df343e2d5285d1d64a9bfb743ace65a402e87e9d963e69b0417a59e5d
+size 725888
diff --git a/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/model_source/saved_model/cnn_medium/variables/variables.index b/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/model_source/saved_model/cnn_medium/variables/variables.index
new file mode 100644
index 0000000..99cba5f
--- /dev/null
+++ b/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/model_source/saved_model/cnn_medium/variables/variables.index
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d12a6c029bb2ff6a692e3376a01e160f78461add8d82d1d6c53e7e65c0d5f278
+size 1476
diff --git a/models/keyword_spotting/cnn_medium/tflite_int8/ckpt/checkpoint b/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/model_source/weights/checkpoint
similarity index 100%
rename from models/keyword_spotting/cnn_medium/tflite_int8/ckpt/checkpoint
rename to models/keyword_spotting/cnn_medium/model_package_tf/model_archive/model_source/weights/checkpoint
diff --git a/models/keyword_spotting/cnn_medium/tflite_int8/ckpt/cnn_0.93_ckpt.data-00000-of-00001 b/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/model_source/weights/cnn_0.93_ckpt.data-00000-of-00001
similarity index 100%
rename from models/keyword_spotting/cnn_medium/tflite_int8/ckpt/cnn_0.93_ckpt.data-00000-of-00001
rename to models/keyword_spotting/cnn_medium/model_package_tf/model_archive/model_source/weights/cnn_0.93_ckpt.data-00000-of-00001
diff --git a/models/keyword_spotting/cnn_medium/tflite_int8/ckpt/cnn_0.93_ckpt.index b/models/keyword_spotting/cnn_medium/model_package_tf/model_archive/model_source/weights/cnn_0.93_ckpt.index
similarity index 100%
rename from models/keyword_spotting/cnn_medium/tflite_int8/ckpt/cnn_0.93_ckpt.index
rename to models/keyword_spotting/cnn_medium/model_package_tf/model_archive/model_source/weights/cnn_0.93_ckpt.index
diff --git a/models/keyword_spotting/cnn_medium/model_package_tf/model_core_utils/__init__.py b/models/keyword_spotting/cnn_medium/model_package_tf/model_core_utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/models/keyword_spotting/cnn_medium/model_package_tf/model_core_utils/models.py b/models/keyword_spotting/cnn_medium/model_package_tf/model_core_utils/models.py
new file mode 100644
index 0000000..1978136
--- /dev/null
+++ b/models/keyword_spotting/cnn_medium/model_package_tf/model_core_utils/models.py
@@ -0,0 +1,327 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Model definitions for simple keyword spotting."""
+
+import math
+
+import tensorflow as tf
+
+
+def prepare_model_settings(label_count, sample_rate, clip_duration_ms,
+                           window_size_ms, window_stride_ms,
+                           dct_coefficient_count):
+    """Calculates common settings needed for all models.
+
+    Args:
+        label_count: How many classes are to be recognized.
+        sample_rate: Number of audio samples per second.
+        clip_duration_ms: Length of each audio clip to be analyzed.
+        window_size_ms: Duration of frequency analysis window.
+        window_stride_ms: How far to move in time between frequency windows.
+        dct_coefficient_count: Number of frequency bins to use for analysis.
+
+    Returns:
+        Dictionary containing common settings.
+    """
+    desired_samples = int(sample_rate * clip_duration_ms / 1000)
+    window_size_samples = int(sample_rate * window_size_ms / 1000)
+    window_stride_samples = int(sample_rate * window_stride_ms / 1000)
+    length_minus_window = (desired_samples - window_size_samples)
+    if length_minus_window < 0:
+        spectrogram_length = 0
+    else:
+        spectrogram_length = 1 + int(length_minus_window / window_stride_samples)
+    fingerprint_size = dct_coefficient_count * spectrogram_length
+
+    return {
+        'desired_samples': desired_samples,
+        'window_size_samples': window_size_samples,
+        'window_stride_samples': window_stride_samples,
+        'spectrogram_length': spectrogram_length,
+        'dct_coefficient_count': dct_coefficient_count,
+        'fingerprint_size': fingerprint_size,
+        'label_count': label_count,
+        'sample_rate': sample_rate,
+    }
+
+
+def create_model(model_settings, model_architecture, model_size_info, is_training):
+    """Builds a tf.keras model of the requested architecture compatible with the settings.
+
+    Args:
+        model_settings: Dictionary of information about the model.
+        model_architecture: String specifying which kind of model to create.
+        model_size_info: Array with specific information for the chosen architecture
+            (e.g convolutional parameters, number of layers).
+
+    Returns:
+        A tf.keras Model with the requested architecture.
+
+    Raises:
+        Exception: If the architecture type isn't recognized.
+    """
+
+    if model_architecture == 'dnn':
+        return create_dnn_model(model_settings, model_size_info)
+
+    elif model_architecture == 'cnn':
+        return create_cnn_model(model_settings, model_size_info)
+
+    elif model_architecture == 'ds_cnn':
+        return create_ds_cnn_model(model_settings, model_size_info)
+    elif model_architecture == 'single_fc':
+        return create_single_fc_model(model_settings)
+    elif model_architecture == 'basic_lstm':
+        return create_basic_lstm_model(model_settings, model_size_info, is_training)
+    else:
+        raise Exception(f'model_architecture argument {model_architecture} not recognized'
+                        f', should be one of, "dnn", "cnn", "ds_cnn" ')
+
+
+def create_single_fc_model(model_settings):
+    """Builds a model with a single fully-connected layer.
+
+    For details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+
+    Returns:
+        tf.keras Model of the 'SINGLE_FC' architecture.
+    """
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size'],), name='input')
+    # Fully connected layer
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(inputs)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_basic_lstm_model(model_settings, model_size_info, is_training):
+    """Builds a model with a basic lstm layer.
+
+        For details see https://arxiv.org/abs/1711.07128.
+
+        Args:
+            model_settings: Dict of different settings for model training.
+            model_size_info: Length of the array defines the number of hidden-layers and
+                each element in the array represent the number of neurons in that layer.
+            is_training: Determining whether the use of the model is for training or for something else.
+
+        Returns:
+            tf.keras Model of the 'Basic_LSTM' architecture.
+        """
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size'], ), name='input')
+
+    input_frequency_size = model_settings['dct_coefficient_count']
+    input_time_size = model_settings['spectrogram_length']
+    x = tf.reshape(inputs, shape=(-1, input_time_size, input_frequency_size))
+
+    # LSTM layer, and unrolling depending on whether you are training or not
+    if is_training:
+        x = tf.keras.layers.LSTM(units=model_size_info[0], time_major=False, unroll=False)(x)
+    else:
+        x = tf.keras.layers.LSTM(units=model_size_info[0], time_major=False, unroll=True)(x)
+
+    # Outputs a fully connected layer
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_dnn_model(model_settings, model_size_info):
+    """Builds a model with multiple hidden fully-connected layers.
+
+    For details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+        model_size_info: Length of the array defines the number of hidden-layers and
+            each element in the array represent the number of neurons in that layer.
+
+    Returns:
+        tf.keras Model of the 'DNN' architecture.
+    """
+
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size'], ), name='input')
+
+    # First fully connected layer.
+    x = tf.keras.layers.Dense(units=model_size_info[0], activation='relu')(inputs)
+
+    # Hidden layers with ReLU activations.
+    for i in range(1, len(model_size_info)):
+        x = tf.keras.layers.Dense(units=model_size_info[i], activation='relu')(x)
+
+    # Output fully connected layer.
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_cnn_model(model_settings, model_size_info):
+    """Builds a model with 2 convolution layers followed by a linear layer and a hidden fully-connected layer.
+
+    For details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+        model_size_info: Defines the first and second convolution parameters in
+            {number of conv features, conv filter height, width, stride in y,x dir.},
+            followed by linear layer size and fully-connected layer size.
+
+    Returns:
+        tf.keras Model of the 'CNN' architecture.
+    """
+
+    input_frequency_size = model_settings['dct_coefficient_count']
+    input_time_size = model_settings['spectrogram_length']
+
+    first_filter_count = model_size_info[0]
+    first_filter_height = model_size_info[1]  # Time axis.
+    first_filter_width = model_size_info[2]  # Frequency axis.
+    first_filter_stride_y = model_size_info[3]  # Time axis.
+    first_filter_stride_x = model_size_info[4]  # Frequency_axis.
+
+    second_filter_count = model_size_info[5]
+    second_filter_height = model_size_info[6]  # Time axis.
+    second_filter_width = model_size_info[7]  # Frequency axis.
+    second_filter_stride_y = model_size_info[8]  # Time axis.
+    second_filter_stride_x = model_size_info[9]  # Frequency axis.
+
+    linear_layer_size = model_size_info[10]
+    fc_size = model_size_info[11]
+
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size']), name='input')
+
+    # Reshape the flattened input.
+    x = tf.reshape(inputs, shape=(-1, input_time_size, input_frequency_size, 1))
+
+    # First convolution.
+    x = tf.keras.layers.Conv2D(filters=first_filter_count,
+                               kernel_size=(first_filter_height, first_filter_width),
+                               strides=(first_filter_stride_y, first_filter_stride_x),
+                               padding='VALID')(x)
+    x = tf.keras.layers.BatchNormalization()(x)
+    x = tf.keras.layers.ReLU()(x)
+    x = tf.keras.layers.Dropout(rate=0)(x)
+
+    # Second convolution.
+    x = tf.keras.layers.Conv2D(filters=second_filter_count,
+                               kernel_size=(second_filter_height, second_filter_width),
+                               strides=(second_filter_stride_y, second_filter_stride_x),
+                               padding='VALID')(x)
+    x = tf.keras.layers.BatchNormalization()(x)
+    x = tf.keras.layers.ReLU()(x)
+    x = tf.keras.layers.Dropout(rate=0)(x)
+
+    # Flatten for fully connected layers.
+    x = tf.keras.layers.Flatten()(x)
+
+    # Fully connected layer with no activation.
+    x = tf.keras.layers.Dense(units=linear_layer_size)(x)
+
+    # Fully connected layer with ReLU activation.
+    x = tf.keras.layers.Dense(units=fc_size)(x)
+    x = tf.keras.layers.BatchNormalization()(x)
+    x = tf.keras.layers.ReLU()(x)
+    x = tf.keras.layers.Dropout(rate=0)(x)
+
+    # Output fully connected.
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_ds_cnn_model(model_settings, model_size_info):
+    """Builds a model with convolutional & depthwise separable convolutional layers.
+
+    For more details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+        model_size_info: Defines number of layers, followed by the DS-Conv layer
+            parameters in the order {number of conv features, conv filter height,
+            width and stride in y,x dir.} for each of the layers.
+
+    Returns:
+        tf.keras Model of the 'DS-CNN' architecture.
+    """
+
+    label_count = model_settings['label_count']
+    input_frequency_size = model_settings['dct_coefficient_count']
+    input_time_size = model_settings['spectrogram_length']
+
+    t_dim = input_time_size
+    f_dim = input_frequency_size
+
+    # Extract model dimensions from model_size_info.
+    num_layers = model_size_info[0]
+    conv_feat = [None]*num_layers
+    conv_kt = [None]*num_layers
+    conv_kf = [None]*num_layers
+    conv_st = [None]*num_layers
+    conv_sf = [None]*num_layers
+
+    i = 1
+    for layer_no in range(0, num_layers):
+        conv_feat[layer_no] = model_size_info[i]
+        i += 1
+        conv_kt[layer_no] = model_size_info[i]
+        i += 1
+        conv_kf[layer_no] = model_size_info[i]
+        i += 1
+        conv_st[layer_no] = model_size_info[i]
+        i += 1
+        conv_sf[layer_no] = model_size_info[i]
+        i += 1
+
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size']), name='input')
+
+    # Reshape the flattened input.
+    x = tf.reshape(inputs, shape=(-1, input_time_size, input_frequency_size, 1))
+
+    # Depthwise separable convolutions.
+    for layer_no in range(0, num_layers):
+        if layer_no == 0:
+            # First convolution.
+            x = tf.keras.layers.Conv2D(filters=conv_feat[0],
+                                       kernel_size=(conv_kt[0], conv_kf[0]),
+                                       strides=(conv_st[0], conv_sf[0]),
+                                       padding='SAME')(x)
+            x = tf.keras.layers.BatchNormalization()(x)
+            x = tf.keras.layers.ReLU()(x)
+        else:
+            # Depthwise convolution.
+            x = tf.keras.layers.DepthwiseConv2D(kernel_size=(conv_kt[layer_no], conv_kf[layer_no]),
+                                                strides=(conv_sf[layer_no], conv_st[layer_no]),
+                                                padding='SAME')(x)
+            x = tf.keras.layers.BatchNormalization()(x)
+            x = tf.keras.layers.ReLU()(x)
+
+            # Pointwise convolution.
+            x = tf.keras.layers.Conv2D(filters=conv_feat[layer_no], kernel_size=(1, 1))(x)
+            x = tf.keras.layers.BatchNormalization()(x)
+            x = tf.keras.layers.ReLU()(x)
+
+        t_dim = math.ceil(t_dim/float(conv_st[layer_no]))
+        f_dim = math.ceil(f_dim/float(conv_sf[layer_no]))
+
+    # Global average pool.
+    x = tf.keras.layers.AveragePooling2D(pool_size=(t_dim, f_dim), strides=1)(x)
+
+    # Squeeze before passing to output fully connected layer.
+    x = tf.reshape(x, shape=(-1, conv_feat[layer_no]))
+
+    # Output connected layer.
+    output = tf.keras.layers.Dense(units=label_count, activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
diff --git a/models/keyword_spotting/cnn_medium/model_package_tf/optimisations.py b/models/keyword_spotting/cnn_medium/model_package_tf/optimisations.py
new file mode 100644
index 0000000..16b6f4c
--- /dev/null
+++ b/models/keyword_spotting/cnn_medium/model_package_tf/optimisations.py
@@ -0,0 +1,259 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for optimizing simple keyword spotting models using clustering API."""
+
+import argparse
+from pathlib import Path
+
+import tensorflow as tf
+import numpy as np
+import tensorflow_model_optimization as tfmot
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+
+
+def print_model_weight_clusters(model):
+
+    for layer in model.layers:
+        if isinstance(layer, tf.keras.layers.Wrapper):
+            weights = layer.trainable_weights
+        else:
+            weights = layer.weights
+        for weight in weights:
+            if "kernel" in weight.name:
+                unique_count = len(np.unique(weight))
+                print(
+                    f"{layer.name}/{weight.name}: {unique_count} clusters "
+                )
+
+
+def optimize():
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    # Create the model to optimize from checkpoint.
+    model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info)
+    model.load_weights(FLAGS.checkpoint).expect_partial()
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    # We decay learning rate in a constant piecewise way to help learning.
+    training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(',')))
+    learning_rates_list = list(map(float, FLAGS.learning_rate.split(',')))
+    lr_boundary_list = training_steps_list[:-1]  # Only need the values at which to change lr.
+    lr_schedule = tf.keras.optimizers.schedules.PiecewiseConstantDecay(boundaries=lr_boundary_list,
+                                                                       values=learning_rates_list)
+
+    cluster_weights = tfmot.clustering.keras.cluster_weights
+    CentroidInitialization = tfmot.clustering.keras.CentroidInitialization
+
+    clustering_params = {
+        'number_of_clusters': 32,
+        'cluster_centroids_init': CentroidInitialization.KMEANS_PLUS_PLUS}
+
+    clustered_model = cluster_weights(model, **clustering_params)
+
+    # Specify the optimizer configurations.
+    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
+    clustered_model.compile(optimizer=optimizer,
+                            loss=tf.keras.losses.SparseCategoricalCrossentropy(),
+                            metrics=['accuracy'])
+
+    train_data = audio_processor.get_data(audio_processor.Modes.TRAINING,
+                                          FLAGS.background_frequency, FLAGS.background_volume,
+                                          int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000))
+    train_data = train_data.repeat().batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION)
+    val_data = val_data.batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+
+    # We train for a max number of iterations so need to calculate how many 'epochs' this will be.
+    training_steps_max = np.sum(training_steps_list)
+    training_epoch_max = int(np.ceil(training_steps_max / FLAGS.eval_step_interval))
+
+    # Train the model with clustering applied.
+    clustered_model.fit(x=train_data,
+                        steps_per_epoch=FLAGS.eval_step_interval,
+                        epochs=training_epoch_max,
+                        validation_data=val_data)
+
+    stripped_clustered_model = tfmot.clustering.keras.strip_clustering(clustered_model)
+
+    print_model_weight_clusters(stripped_clustered_model)
+
+    # Save the clustered model weights
+    train_dir = Path(FLAGS.train_dir) / "optimized"
+    train_dir.mkdir(parents=True, exist_ok=True)
+
+    stripped_clustered_model.save_weights((train_dir /
+                                          (FLAGS.model_architecture +
+                                           "_clustered_ckpt")))
+
+    # Test the model.
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING)
+    test_data = test_data.batch(FLAGS.batch_size)
+
+    stripped_clustered_model.compile(optimizer=optimizer,
+                                     loss=tf.keras.losses.SparseCategoricalCrossentropy(),
+                                     metrics=['accuracy'])
+
+    test_loss, test_acc = stripped_clustered_model.evaluate(x=test_data)
+    print(f'Final test accuracy: {test_acc*100:.2f}%')
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--background_volume',
+        type=float,
+        default=0.1,
+        help="""\
+        How loud the background noise should be, between 0 and 1.
+        """)
+    parser.add_argument(
+        '--background_frequency',
+        type=float,
+        default=0.8,
+        help="""\
+        How many of the training samples have background noise mixed in.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--time_shift_ms',
+        type=float,
+        default=100.0,
+        help="""\
+        Range to randomly shift the training audio by in time.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--how_many_training_steps',
+        type=str,
+        default='3750,750',
+        help='How many training loops to run',)
+    parser.add_argument(
+        '--eval_step_interval',
+        type=int,
+        default=400,
+        help='How often to evaluate the training results.')
+    parser.add_argument(
+        '--learning_rate',
+        type=str,
+        default='0.001,0.0001',
+        help='How large a learning rate to use when training.')
+    parser.add_argument(
+        '--batch_size',
+        type=int,
+        default=100,
+        help='How many items to train with at once',)
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--train_dir',
+        type=str,
+        default='/tmp/speech_commands_train',
+        help='Directory to write event logs and checkpoint.')
+    parser.add_argument(
+        '--save_step_interval',
+        type=int,
+        default=100,
+        help='Save model checkpoint every save_steps.')
+    parser.add_argument(
+        '--checkpoint',
+        type=str,
+        help='Checkpoint to load the weights from before fine-tuning.')
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+
+    FLAGS, _ = parser.parse_known_args()
+    optimize()
diff --git a/models/keyword_spotting/cnn_medium/model_package_tf/recreate_model.sh b/models/keyword_spotting/cnn_medium/model_package_tf/recreate_model.sh
new file mode 100644
index 0000000..a295f58
--- /dev/null
+++ b/models/keyword_spotting/cnn_medium/model_package_tf/recreate_model.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+# Copyright (C) 2023 Arm Limited or its affiliates. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+ckpt_path=model_archive/model_source/weights/cnn_0.93_ckpt
+train=false
+
+# Parse command line args
+while (( $# >= 1 )); do 
+    case $1 in
+    --ckpt)
+       if [ "$2" ]; then
+           ckpt_path=$2
+           shift
+       else
+           printf 'ERROR: "--ckpt" requires a path to be supplied.\n'
+           exit 1
+       fi
+       ;;
+    --train) 
+    	train=true
+	break;;
+    *) shift;
+    esac;
+done
+
+
+# CNN Medium training
+if [ "$train" = true ]
+then
+python train.py --model_architecture cnn --model_size_info 64 10 4 1 1 48 10 4 2 1 16 128 --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --learning_rate 0.0005,0.0001,0.00002 --how_many_training_steps 10000,10000,10000 --summaries_dir work/CNN/CNN_M/retrain_logs --train_dir work/CNN/CNN_M/training
+fi
+
+# Conversion to TFLite fp32
+python convert_to_tflite.py --model_architecture cnn --model_size_info 64 10 4 1 1 48 10 4 2 1 16 128 --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --checkpoint $ckpt_path --no-quantize
+
+# Conversion to TFLite int8
+python convert_to_tflite.py --model_architecture cnn --model_size_info 64 10 4 1 1 48 10 4 2 1 16 128 --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --checkpoint $ckpt_path --inference_type int8
+
diff --git a/models/keyword_spotting/cnn_medium/model_package_tf/requirements.txt b/models/keyword_spotting/cnn_medium/model_package_tf/requirements.txt
new file mode 100644
index 0000000..3448cff
--- /dev/null
+++ b/models/keyword_spotting/cnn_medium/model_package_tf/requirements.txt
@@ -0,0 +1,3 @@
+numpy == 1.19.5
+tensorflow == 2.5.0
+tensorflow-model-optimization == 0.6.0
\ No newline at end of file
diff --git a/models/keyword_spotting/cnn_medium/model_package_tf/train.py b/models/keyword_spotting/cnn_medium/model_package_tf/train.py
new file mode 100644
index 0000000..8c488b3
--- /dev/null
+++ b/models/keyword_spotting/cnn_medium/model_package_tf/train.py
@@ -0,0 +1,227 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for training simple keyword spotting models."""
+
+import argparse
+from pathlib import Path
+
+import tensorflow as tf
+import numpy as np
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+
+
+def train():
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    # Create the model.
+    model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info, True)
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    # We decay learning rate in a constant piecewise way to help learning.
+    training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(',')))
+    learning_rates_list = list(map(float, FLAGS.learning_rate.split(',')))
+    lr_boundary_list = training_steps_list[:-1]  # Only need the values at which to change lr.
+    lr_schedule = tf.keras.optimizers.schedules.PiecewiseConstantDecay(boundaries=lr_boundary_list,
+                                                                       values=learning_rates_list)
+
+    # Specify the optimizer configurations.
+    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
+    model.compile(optimizer=optimizer,
+                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
+                  metrics=['accuracy'])
+
+    train_data = audio_processor.get_data(audio_processor.Modes.TRAINING,
+                                          FLAGS.background_frequency, FLAGS.background_volume,
+                                          int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000))
+    train_data = train_data.repeat().batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION)
+    val_data = val_data.batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+
+    # We train for a max number of iterations so need to calculate how many 'epochs' this will be.
+    training_steps_max = np.sum(training_steps_list)
+    training_epoch_max = int(np.ceil(training_steps_max / FLAGS.eval_step_interval))
+
+    # Callbacks.
+    train_dir = Path(FLAGS.train_dir) / "best"
+    train_dir.mkdir(parents=True, exist_ok=True)
+    model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
+        filepath=(train_dir / (FLAGS.model_architecture + "_{val_accuracy:.3f}_ckpt")),
+        save_weights_only=True,
+        monitor='val_accuracy',
+        mode='max',
+        save_best_only=True)
+    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=FLAGS.summaries_dir)
+
+    # Train the model.
+    model.fit(x=train_data,
+              steps_per_epoch=FLAGS.eval_step_interval,
+              epochs=training_epoch_max,
+              validation_data=val_data,
+              callbacks=[model_checkpoint_callback, tensorboard_callback])
+
+    # Test and save the model.
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING)
+    test_data = test_data.batch(FLAGS.batch_size)
+
+    test_loss, test_acc = model.evaluate(x=test_data)
+    print(f'Final test accuracy: {test_acc*100:.2f}%')
+    model.save(f'saved_model/{FLAGS.model_architecture}')
+    model.save(f'keras/{FLAGS.model_architecture}.h5')
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--background_volume',
+        type=float,
+        default=0.1,
+        help="""\
+        How loud the background noise should be, between 0 and 1.
+        """)
+    parser.add_argument(
+        '--background_frequency',
+        type=float,
+        default=0.8,
+        help="""\
+        How many of the training samples have background noise mixed in.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--time_shift_ms',
+        type=float,
+        default=100.0,
+        help="""\
+        Range to randomly shift the training audio by in time.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--how_many_training_steps',
+        type=str,
+        default='15000,3000',
+        help='How many training loops to run',)
+    parser.add_argument(
+        '--eval_step_interval',
+        type=int,
+        default=400,
+        help='How often to evaluate the training results.')
+    parser.add_argument(
+        '--learning_rate',
+        type=str,
+        default='0.001,0.0001',
+        help='How large a learning rate to use when training.')
+    parser.add_argument(
+        '--batch_size',
+        type=int,
+        default=100,
+        help='How many items to train with at once',)
+    parser.add_argument(
+        '--summaries_dir',
+        type=str,
+        default='/tmp/retrain_logs',
+        help='Where to save summary logs for TensorBoard.')
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--train_dir',
+        type=str,
+        default='/tmp/speech_commands_train',
+        help='Directory to write event logs and checkpoint.')
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+
+    FLAGS, _ = parser.parse_known_args()
+    train()
diff --git a/models/keyword_spotting/cnn_medium/model_package_tf/validation_utils/labels.txt b/models/keyword_spotting/cnn_medium/model_package_tf/validation_utils/labels.txt
new file mode 100644
index 0000000..ba41645
--- /dev/null
+++ b/models/keyword_spotting/cnn_medium/model_package_tf/validation_utils/labels.txt
@@ -0,0 +1,12 @@
+_silence_
+_unknown_
+yes
+no
+up
+down
+left
+right
+on
+off
+stop
+go
\ No newline at end of file
diff --git a/models/keyword_spotting/cnn_medium/tflite_int8/README.md b/models/keyword_spotting/cnn_medium/tflite_int8/README.md
deleted file mode 100644
index 5576d61..0000000
--- a/models/keyword_spotting/cnn_medium/tflite_int8/README.md
+++ /dev/null
@@ -1,59 +0,0 @@
-# CNN Medium INT8
-
-## Description
-This is a fully quantized version (asymmetrical int8) of the CNN Medium model developed by Arm, with training checkpoints, from the Hello Edge paper. Code to recreate this model can be found here: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m
-
-## License
-[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
-
-## Related Materials
-### Class Labels
-The class labels associated with this model can be downloaded by running the script `get_class_labels.sh`.
-
-### Model Recreation Code
-Code to recreate this model can be found here: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m.
-
-## Network Information
-| Network Information |  Value         |
-|---------------------|------------------|
-|  Framework          | TensorFlow Lite |
-|  SHA-1 Hash         | 6bc68074d960bbb0c695e19fd96fd7903131ef60 |
-|  Size (Bytes)       | 186064 |
-|  Provenance         | https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m |
-|  Paper              | https://arxiv.org/abs/1711.07128 |
-
-## Accuracy
-Dataset: Google Speech Commands Test Set
-
-| Metric | Value |
-|--------|-------|
-| Accuracy | 0.911 |
-
-## Performance
-| Platform | Optimized |
-|----------|:---------:|
-| Cortex-A |:heavy_check_mark:         |
-| Cortex-M |:heavy_check_mark:         |
-| Mali GPU |:heavy_check_mark:         |
-| Ethos U  |:heavy_check_mark:         |
-
-### Key
-* :heavy_check_mark: - Will run on this platform.
-* :heavy_multiplication_x: - Will not run on this platform.
-
-
-
-## Optimizations
-| Optimization |  Value  |
-|-----------------|---------|
-| Quantization | INT8 |
-
-## Network Inputs
-| Input Node Name |  Shape  | Description |
-|-----------------|---------|-------------|
-| input | (1, 490) | The input is a processed MFCCs of shape (1, 490) |
-
-## Network Outputs
-| Output Node Name |  Shape  | Description |
-|------------------|---------|-------------|
-| Identity | (1, 12) | The probability on 12 keywords. |
diff --git a/models/keyword_spotting/cnn_medium/tflite_int8/definition.yaml b/models/keyword_spotting/cnn_medium/tflite_int8/definition.yaml
deleted file mode 100644
index a7851bb..0000000
--- a/models/keyword_spotting/cnn_medium/tflite_int8/definition.yaml
+++ /dev/null
@@ -1,43 +0,0 @@
-benchmark:
-  Google Speech Commands test set:
-    Accuracy: 91.08%
-description: 'This is a fully quantized version (asymmetrical int8) of the CNN Medium
-  model developed by Arm, with training checkpoints, from the Hello Edge paper. Code
-  to recreate this model can be found here: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m'
-license:
-- Apache-2.0
-network:
-  file_size_bytes: 186064
-  filename: cnn_m_quantized.tflite
-  framework: TensorFlow Lite
-  hash:
-    algorithm: sha1
-    value: 6bc68074d960bbb0c695e19fd96fd7903131ef60
-  provenance: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m
-  quality_level: null
-network_parameters:
-  input_nodes:
-  - description: The input is a processed MFCCs of shape (1, 490)
-    example_input:
-      path: models/keyword_spotting/cnn_medium/tflite_int8/testing_input/input
-    name: input
-    shape:
-    - 1
-    - 490
-  output_nodes:
-  - description: The probability on 12 keywords.
-    name: Identity
-    shape:
-    - 1
-    - 12
-    test_output_path: models/keyword_spotting/cnn_medium/tflite_int8/testing_output/Identity
-operators:
-  TensorFlow Lite:
-  - CONV_2D
-  - DEQUANTIZE
-  - FULLY_CONNECTED
-  - QUANTIZE
-  - RELU
-  - RESHAPE
-  - SOFTMAX
-paper: https://arxiv.org/abs/1711.07128
diff --git a/models/keyword_spotting/cnn_medium/tflite_int8/get_class_labels.sh b/models/keyword_spotting/cnn_medium/tflite_int8/get_class_labels.sh
deleted file mode 100755
index e59caf5..0000000
--- a/models/keyword_spotting/cnn_medium/tflite_int8/get_class_labels.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (C) 2021 Arm Limited or its affiliates. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the License); you may
-# not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an AS IS BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#!/usr/bin/env bash
-
-wget https://raw.githubusercontent.com/ARM-software/ML-KWS-for-MCU/e9cf319e9aa2ff71d433e111477dd95329fb94cb/Pretrained_models/labels.txt
-mv labels.txt labelmappings.txt
\ No newline at end of file
diff --git a/models/keyword_spotting/cnn_small/model_package_tf/README.md b/models/keyword_spotting/cnn_small/model_package_tf/README.md
new file mode 100644
index 0000000..b74f3ba
--- /dev/null
+++ b/models/keyword_spotting/cnn_small/model_package_tf/README.md
@@ -0,0 +1,115 @@
+# CNN Small model package
+
+This folder contains code that will allow you to recreate the CNN Small keyword spotting model from
+the [Hello Edge paper](https://arxiv.org/pdf/1711.07128.pdf).
+
+## License
+[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
+
+## Model Package Overview
+| Model           	|   CNN_Small                            	   |
+|:---------------:	|:------------------------------------------:|
+| <u>**Format**</u>:          	| Keras, Saved Model, TensorFlow Lite int8, TensorFlow Lite fp32 |
+| <u>**Feature**</u>:         	|   Keyword spotting for Arm Cortex-M CPUs   |
+| <u>**Architectural Delta w.r.t. Vanilla**</u>: |                    None                    |
+| <u>**Domain**</u>:         	|              Keyword spotting              |
+| <u>**Package Quality**</u>: 	|                 Optimised                  |
+
+## Model Recreation
+
+In order to recreate the model you will first need to be using ```Python3.7``` and install the requirements in ```requirements.txt```.
+
+Once you have these requirements satisfied you can execute the recreation script contained within this folder, just run:
+
+```bash
+bash ./recreate_model.sh
+```
+
+Running this script will use the pre-trained checkpoint files supplied in the ```./model_archive/model_source/weights``` folder
+to generate the TFLite files and perform evaluation on the test sets. Both an fp32 version and a quantized version will be produced.
+The quantized version will use post-training quantization to fully quantize it.
+
+If you want to run training from scratch you can do this by supplying ```--train``` when running the script. For example:
+
+```bash
+bash ./recreate_model.sh --train
+```
+
+Training is then performed and should produce a model to the stated accuracy in this repository.
+Note that exporting to TFLite will still happen with the pre-trained checkpoint files so you will need to re-run the script
+and this time supply the path to the new checkpoint files you want to use, for example:
+
+```bash
+bash ./recreate_model.sh --ckpt <checkpoint_path>
+```
+
+
+## Training
+
+To train a DNN with 3 fully-connected layers with 128 neurons in each layer, run:
+
+```
+python train.py --model_architecture dnn --model_size_info 128 128 128
+```
+The command line argument *--model_size_info* is used to pass the neural network layer
+dimensions such as number of layers, convolution filter size/stride as a list to models.py,
+which builds the TensorFlow graph based on the provided model architecture
+and layer dimensions. For more info on *model_size_info* for each network architecture see
+[models.py](models.py).
+
+The training commands with all the hyperparameters to reproduce the models shown in the
+[paper](https://arxiv.org/pdf/1711.07128.pdf) are given [here](recreate_model.sh).
+
+## Testing
+To run inference on the trained model from a checkpoint and get accuracy on validation and test sets, run:
+```
+python evaluation.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>
+```
+The parameters used here should match those used in the Training step.
+
+## Optimization
+
+We introduce a new *optional* step to optimize the trained keyword spotting model for deployment.
+
+Here we use TensorFlow's [weight clustering API](https://www.tensorflow.org/model_optimization/guide/clustering) to reduce the compressed model size and optimize inference on supported hardware. 32 weight clusters and kmeans++ cluster intialization method are used as the clustering hyperparameters.
+
+To optimize your trained model (e.g. a DNN), a trained model checkpoint is needed to run clustering and fine-tuning on.
+You can use the pre-trained checkpoints provided, or train your own model and use the resulting checkpoint.
+
+To apply the optimization and fine-tuning, run the following command:
+```
+python optimisations.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>
+```
+The parameters used here should match those used in the Training step, except for the number of training steps.
+The number of training steps is reduced since the optimization step only requires fine-tuning.
+
+This will generate a clustered model checkpoint that can be used in the quantization step to generate a quantized and clustered TFLite model.
+
+## Quantization and TFLite Conversion
+
+As part of the update we now use TensorFlow's
+[post training quantization](https://www.tensorflow.org/lite/performance/post_training_quantization) to
+make quantization of the trained models super simple.
+
+To quantize your trained model (e.g. a DNN) run:
+```
+python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> [--inference_type int8|int16]
+```
+The parameters used here should match those used in the Training step.
+
+The inference_type parameter is *optional* and to be used if a fully quantized model with inputs and outputs of type int8 or int16 is needed. It defaults to fp32.
+
+This step will produce a quantized TFLite file *dnn_quantized.tflite*.
+You can test the accuracy of this quantized model on the test set by running:
+```
+python evaluation.py --tflite_path dnn_quantized.tflite
+```
+The parameters used here should match those used in the Training step.
+
+`convert_to_tflite.py` uses post-training quantization to generate a quantized model by default. If you wish to convert to a floating point TFLite model, use the command below:
+
+```
+python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> --no-quantize
+```
+
+This will produce a floating point TFLite file *dnn.tflite*. You can test the accuracy of this floating point model using `evaluation.py` as above.
diff --git a/models/keyword_spotting/cnn_small/model_package_tf/cnn_s_inference_keras.py b/models/keyword_spotting/cnn_small/model_package_tf/cnn_s_inference_keras.py
new file mode 100644
index 0000000..db7694a
--- /dev/null
+++ b/models/keyword_spotting/cnn_small/model_package_tf/cnn_s_inference_keras.py
@@ -0,0 +1,76 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from data_processing.data_preprocessing import load_wav_file, calculate_mfcc
+
+import tensorflow as tf
+import argparse
+
+
+def load_labels(filename):
+    """Read in labels, one label per line."""
+    f = open(filename, "r")
+    return f.read().splitlines()
+
+
+def main():
+    window_size_samples = int(FLAGS.sample_rate * FLAGS.window_size_ms / 1000)
+    window_stride_samples = int(FLAGS.sample_rate * FLAGS.window_stride_ms / 1000)
+    decoded, sample = load_wav_file(FLAGS.wav, FLAGS.sample_rate)
+    x = calculate_mfcc(decoded, sample, window_size_samples, window_stride_samples, FLAGS.dct_coefficient_count)
+    x = tf.reshape(x, [1, -1])
+
+    model = tf.keras.models.load_model(FLAGS.keras_file_path)
+    predictions = model.predict(x)
+
+    # Sort to show labels in order of confidence
+    top_k = predictions[0].argsort()[-1:][::-1]
+    for node_id in top_k:
+        human_string = load_labels(FLAGS.labels)[int(node_id)]
+        score = predictions[0,node_id]
+        print(f'model predicted: {human_string} with score {score:.5f}')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--wav', type=str, default='', help='Audio file to be identified.')
+    parser.add_argument(
+        '--labels', type=str, default='', help='Path to file containing labels.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs', )
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint', )
+    parser.add_argument(
+        '--keras_file_path',
+        type=str,
+        default='',
+        help='Path to the .h5 Keras model file to use for testing.')
+    FLAGS, unparsed = parser.parse_known_args()
+    main()
diff --git a/models/keyword_spotting/cnn_small/model_package_tf/cnn_s_inference_tflite.py b/models/keyword_spotting/cnn_small/model_package_tf/cnn_s_inference_tflite.py
new file mode 100644
index 0000000..9f79d99
--- /dev/null
+++ b/models/keyword_spotting/cnn_small/model_package_tf/cnn_s_inference_tflite.py
@@ -0,0 +1,120 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from data_processing.data_preprocessing import load_wav_file, calculate_mfcc
+
+import tensorflow as tf
+import numpy as np
+import argparse
+
+
+def tflite_inference(input_data, tflite_path):
+    """Call forwards pass of TFLite file and returns the result.
+
+    Args:
+        input_data: Input data to use on forward pass.
+        tflite_path: Path to TFLite file to run.
+
+    Returns:
+        Output from inference.
+    """
+    supported_quant_dtypes = (np.int8, np.int16)
+    interpreter = tf.lite.Interpreter(model_path=tflite_path)
+    interpreter.allocate_tensors()
+
+    input_details = interpreter.get_input_details()
+    output_details = interpreter.get_output_details()
+
+    input_dtype = input_details[0]["dtype"]
+    output_dtype = output_details[0]["dtype"]
+
+    # Check if the input/output type is quantized,
+    # set scale and zero-point accordingly
+    if input_dtype in supported_quant_dtypes:
+        input_scale, input_zero_point = input_details[0]["quantization"]
+    else:
+        input_scale, input_zero_point = 1, 0
+
+    input_data = input_data / input_scale + input_zero_point
+    input_data = np.round(input_data) if input_dtype in supported_quant_dtypes else input_data
+
+    if output_dtype in supported_quant_dtypes:
+        output_scale, output_zero_point = output_details[0]["quantization"]
+    else:
+        output_scale, output_zero_point = 1, 0
+
+    interpreter.set_tensor(input_details[0]['index'], tf.cast(input_data, input_dtype))
+    interpreter.invoke()
+
+    output_data = interpreter.get_tensor(output_details[0]['index'])
+
+    output_data = output_scale * (output_data.astype(np.float32) - output_zero_point)
+
+    return output_data
+
+
+def load_labels(filename):
+    """Read in labels, one label per line."""
+    f = open(filename, "r")
+    return f.read().splitlines()
+
+
+def main():
+    window_size_samples = int(FLAGS.sample_rate * FLAGS.window_size_ms / 1000)
+    window_stride_samples = int(FLAGS.sample_rate * FLAGS.window_stride_ms / 1000)
+    decoded, sample = load_wav_file(FLAGS.wav, FLAGS.sample_rate)
+    x = calculate_mfcc(decoded, sample, window_size_samples, window_stride_samples, FLAGS.dct_coefficient_count)
+    x = tf.reshape(x, [1, -1])
+    predictions = tflite_inference(x, FLAGS.tflite_path)
+
+    # Sort to show labels in order of confidence
+    top_k = predictions[0].argsort()[-1:][::-1]
+    for node_id in top_k:
+        human_string = load_labels(FLAGS.labels)[int(node_id)]
+        score = predictions[0,node_id]
+        print(f'model predicted: {human_string} with score {score:.5f}')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--wav', type=str, default='', help='Audio file to be identified.')
+    parser.add_argument(
+        '--labels', type=str, default='', help='Path to file containing labels.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs', )
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint', )
+    parser.add_argument(
+        '--tflite_path',
+        type=str,
+        default='',
+        help='Path to TFLite file to use for testing.')
+    FLAGS, unparsed = parser.parse_known_args()
+    main()
diff --git a/models/keyword_spotting/cnn_small/model_package_tf/convert_to_tflite.py b/models/keyword_spotting/cnn_small/model_package_tf/convert_to_tflite.py
new file mode 100644
index 0000000..64ab8df
--- /dev/null
+++ b/models/keyword_spotting/cnn_small/model_package_tf/convert_to_tflite.py
@@ -0,0 +1,234 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for converting and quantizing a trained keyword spotting
+   model and saving to TFLite."""
+
+import argparse
+
+import tensorflow as tf
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+from evaluation import tflite_test
+
+NUM_REP_DATA_SAMPLES = 100  # How many samples to use for post training quantization.
+
+
+def convert(model_settings, audio_processor, checkpoint, quantize, inference_type, tflite_path):
+    """Load our trained floating point model and convert it.
+
+    TFLite conversion or post training quantization is performed and the
+    resulting model is saved as a TFLite file.
+    We use samples from the validation set to do post training quantization.
+
+    Args:
+        model_settings: Dictionary of common model settings.
+        audio_processor: Audio processor class object.
+        checkpoint: Path to training checkpoint to load.
+        quantize: Whether to quantize the model or convert to fp32 TFLite model.
+        inference_type: Input/output type of the quantized model.
+        tflite_path: Output TFLite file save path.
+    """
+    model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info, False)
+    model.load_weights(checkpoint).expect_partial()
+
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION).batch(1)
+
+    def _rep_dataset():
+        """Generator function to produce representative dataset."""
+        i = 0
+        for mfcc, label in val_data:
+            if i > NUM_REP_DATA_SAMPLES:
+                break
+            i += 1
+            yield [mfcc]
+
+    if quantize:
+        # Quantize model and save to disk.
+        tflite_model = post_training_quantize(model, inference_type, _rep_dataset)
+        with open(tflite_path, 'wb') as f:
+            f.write(tflite_model)
+        print(f'Quantized model saved to {tflite_path}.')
+    else:
+        converter = tf.lite.TFLiteConverter.from_keras_model(model)
+        tflite_model = converter.convert()
+        with open(tflite_path, 'wb') as f:
+            f.write(tflite_model)
+        print(f'Converted model saved to {tflite_path}.')
+
+
+def post_training_quantize(keras_model, inference_type, rep_dataset):
+    """Perform post training quantization and returns the TFLite model ready for saving.
+
+    See https://www.tensorflow.org/lite/performance/post_training_quantization#full_integer_quantization for
+    more details.
+
+    Args:
+        keras_model: The trained tf Keras model used for post training quantization.
+        inference_type: Input/output type of the quantized model.
+        rep_dataset: Function to use as a representative dataset, must be callable.
+
+    Returns:
+        Quantized TFLite model ready for saving to disk.
+    """
+    converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
+    converter.optimizations = [tf.lite.Optimize.DEFAULT]
+
+    if inference_type == 'int8':
+        converter.inference_input_type = tf.int8
+        converter.inference_output_type = tf.int8
+        supported_ops = tf.lite.OpsSet.TFLITE_BUILTINS_INT8
+    if inference_type == 'int16':
+        converter.inference_input_type = tf.int16
+        converter.inference_output_type = tf.int16
+        supported_ops = tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8
+
+    # Int8 post training quantization needs representative dataset.
+    converter.representative_dataset = rep_dataset
+    converter.target_spec.supported_ops = [supported_ops]
+
+    tflite_model = converter.convert()
+
+    return tflite_model
+
+
+def main():
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    if FLAGS.quantize:
+        tflite_path = f'{FLAGS.model_architecture}_quantized.tflite'
+    else:
+        tflite_path = f'{FLAGS.model_architecture}.tflite'
+
+    # Load floating point model from checkpoint and convert it.
+    convert(model_settings, audio_processor, FLAGS.checkpoint,
+            FLAGS.quantize, FLAGS.inference_type, tflite_path)
+
+    # Test the newly converted model on the test set.
+    tflite_test(model_settings, audio_processor, tflite_path)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+    parser.add_argument(
+        '--checkpoint',
+        type=str,
+        help='Checkpoint to load the weights from.')
+    parser.add_argument(
+        '--quantize',
+        dest='quantize',
+        action="store_true",
+        default=True,
+        help='Whether to quantize the model or convert to fp32 TFLite model. Defaults to True.')
+    parser.add_argument(
+        '--no-quantize',
+        dest='quantize',
+        action="store_false",
+        help='Whether to quantize the model or convert to fp32 TFLite model. Defaults to True.')
+    parser.add_argument(
+        '--inference_type',
+        type=str,
+        default='fp32',
+        help='If quantize is true, whether the model input and output is float32, int8 or int16')
+
+    FLAGS, _ = parser.parse_known_args()
+    main()
diff --git a/models/keyword_spotting/cnn_small/model_package_tf/data_processing/__init__.py b/models/keyword_spotting/cnn_small/model_package_tf/data_processing/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/models/keyword_spotting/cnn_small/model_package_tf/data_processing/data_preprocessing.py b/models/keyword_spotting/cnn_small/model_package_tf/data_processing/data_preprocessing.py
new file mode 100644
index 0000000..05cf5ba
--- /dev/null
+++ b/models/keyword_spotting/cnn_small/model_package_tf/data_processing/data_preprocessing.py
@@ -0,0 +1,462 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Modifications Copyright 2023 Arm Inc. All Rights Reserved.
+# Modified to use TensorFlow 2.0 and data pipelines.
+#
+"""Functions for loading and preparing data for keyword spotting."""
+
+import os
+import re
+import sys
+import urllib
+from pathlib import Path
+import tarfile
+import hashlib
+import random
+import math
+from enum import Enum
+
+import numpy as np
+import tensorflow as tf
+from tensorflow.python.ops import gen_audio_ops as audio_ops
+
+MAX_NUM_WAVS_PER_CLASS = 2**27 - 1  # ~134M
+RANDOM_SEED = 59185
+BACKGROUND_NOISE_DIR_NAME = '_background_noise_'
+SILENCE_LABEL = '_silence_'
+SILENCE_INDEX = 0
+UNKNOWN_WORD_INDEX = 1
+UNKNOWN_WORD_LABEL = '_unknown_'
+
+
+def load_wav_file(wav_filename, desired_samples):
+    """Loads and then decodes a given 16bit PCM wav file.
+
+    Decoded audio is scaled to the range [-1, 1] and padded or cropped to the desired number of samples.
+
+    Args:
+        wav_filename: 16bit PCM wav file to load.
+        desired_samples: Number of samples wanted from the audio file.
+
+    Returns:
+        Tuple consisting of the decoded audio and sample rate.
+    """
+    wav_file = tf.io.read_file(wav_filename)
+    decoded_wav = audio_ops.decode_wav(wav_file, desired_channels=1, desired_samples=desired_samples)
+
+    return decoded_wav.audio, decoded_wav.sample_rate
+
+
+def calculate_mfcc(audio_signal, audio_sample_rate, window_size, window_stride, num_mfcc):
+    """Returns Mel Frequency Cepstral Coefficients (MFCC) for a given audio signal.
+
+    Args:
+        audio_signal: Raw audio signal in range [-1, 1]
+        audio_sample_rate: Audio signal sample rate
+        window_size: Window size in samples for calculating spectrogram
+        window_stride: Window stride in samples for calculating spectrogram
+        num_mfcc: The number of MFCC features wanted.
+
+    Returns:
+        Calculated mffc features.
+    """
+    spectrogram = audio_ops.audio_spectrogram(input=audio_signal, window_size=window_size, stride=window_stride,
+                                              magnitude_squared=True)
+
+    mfcc_features = audio_ops.mfcc(spectrogram, audio_sample_rate, dct_coefficient_count=num_mfcc)
+
+    return mfcc_features
+
+
+def which_set(filename, validation_percentage, testing_percentage):
+    """Determines which data partition the file should belong to.
+
+    We want to keep files in the same training, validation, or testing sets even
+    if new ones are added over time. This makes it less likely that testing
+    samples will accidentally be reused in training when long runs are restarted
+    for example. To keep this stability, a hash of the filename is taken and used
+    to determine which set it should belong to. This determination only depends on
+    the name and the set proportions, so it won't change as other files are added.
+    It's also useful to associate particular files as related (for example words
+    spoken by the same person), so anything after '_nohash_' in a filename is
+    ignored for set determination. This ensures that 'bobby_nohash_0.wav' and
+    'bobby_nohash_1.wav' are always in the same set, for example.
+
+    Args:
+        filename: File path of the data sample.
+        validation_percentage: How much of the data set to use for validation.
+        testing_percentage: How much of the data set to use for testing.
+
+    Returns:
+        String, one of 'training', 'validation', or 'testing'.
+    """
+    base_name = os.path.basename(filename)
+    # We want to ignore anything after '_nohash_' in the file name when
+    # deciding which set to put a wav in, so the data set creator has a way of
+    # grouping wavs that are close variations of each other.
+    hash_name = re.sub(r'_nohash_.*$', '', base_name)
+    # This looks a bit magical, but we need to decide whether this file should
+    # go into the training, testing, or validation sets, and we want to keep
+    # existing files in the same set even if more files are subsequently
+    # added.
+    # To do that, we need a stable way of deciding based on just the file name
+    # itself, so we do a hash of that and then use that to generate a
+    # probability value that we use to assign it.
+    hash_name_hashed = hashlib.sha1(tf.compat.as_bytes(hash_name)).hexdigest()
+    percentage_hash = ((int(hash_name_hashed, 16) %
+                       (MAX_NUM_WAVS_PER_CLASS + 1)) *
+                       (100.0 / MAX_NUM_WAVS_PER_CLASS))
+    if percentage_hash < validation_percentage:
+        result = 'validation'
+    elif percentage_hash < (testing_percentage + validation_percentage):
+        result = 'testing'
+    else:
+        result = 'training'
+    return result
+
+
+def prepare_words_list(wanted_words):
+    """Prepends common tokens to the custom word list.
+
+    Args:
+        wanted_words: List of strings containing custom words to spot.
+
+    Returns:
+        List of words with silence and unknown tokens added.
+    """
+    return [SILENCE_LABEL, UNKNOWN_WORD_LABEL] + wanted_words
+
+
+class AudioProcessor:
+    """Handles loading, partitioning, and preparing audio training data."""
+
+    class Modes(Enum):
+        TRAINING = 1
+        VALIDATION = 2
+        TESTING = 3
+
+    def __init__(self, data_url, data_dir, silence_percentage, unknown_percentage,
+                 wanted_words, validation_percentage, testing_percentage, model_settings):
+        self.data_dir = Path(data_dir)
+        self.model_settings = model_settings
+        self.words_list = prepare_words_list(wanted_words)
+
+        self._tf_datasets = {}
+        self.background_data = None
+        self._set_size = {'training': 0, 'validation': 0, 'testing': 0}
+
+        self._download_and_extract_data(data_url, data_dir)
+        self._prepare_datasets(silence_percentage, unknown_percentage, wanted_words,
+                               validation_percentage, testing_percentage)
+        self._prepare_background_data()
+
+    def get_data(self, mode, background_frequency=0, background_volume_range=0, time_shift=0):
+        """Returns the train, validation or test set for KWS as a TF Dataset.
+
+        Args:
+            mode: The set to return, see AudioProcessor.Modes enumeration.
+            background_frequency: How many of the samples have background noise mixed in.
+            background_volume_range: How loud the background noise should be, between 0 and 1.
+            time_shift: Range to randomly shift the training audio by in time.
+
+        Returns:
+            TF dataset that will generate tuples containing an mfcc and corresponding label.
+
+        Raises:
+            ValueError: If mode is not recognised.
+        """
+        if mode == AudioProcessor.Modes.TRAINING:
+            dataset = self._tf_datasets['training']
+        elif mode == AudioProcessor.Modes.VALIDATION:
+            dataset = self._tf_datasets['validation']
+        elif mode == AudioProcessor.Modes.TESTING:
+            dataset = self._tf_datasets['testing']
+        else:
+            ValueError("Incorrect dataset type given")
+
+        use_background = (self.background_data is not None) and (mode == AudioProcessor.Modes.TRAINING)
+        dataset = dataset.map(lambda path, label: self._process_path(path, label, self.model_settings,
+                                                                     background_frequency, background_volume_range,
+                                                                     time_shift, use_background, self.background_data),
+                              num_parallel_calls=tf.data.experimental.AUTOTUNE)
+
+        return dataset
+
+    def set_size(self, mode):
+        """Get the number of samples in the requested dataset partition.
+
+        Args:
+            mode: Which partition, see AudioProcessor.Modes enumeration.
+
+        Returns:
+            Number of samples in the partition.
+
+        Raises:
+            ValueError: If mode is not recognised.
+        """
+        if mode == AudioProcessor.Modes.TRAINING:
+            return self._set_size['training']
+        elif mode == AudioProcessor.Modes.VALIDATION:
+            return self._set_size['validation']
+        elif mode == AudioProcessor.Modes.TESTING:
+            return self._set_size['testing']
+        else:
+            ValueError('Incorrect dataset type given')
+
+    @staticmethod
+    def _process_path(path, label, model_settings, background_frequency, background_volume_range, time_shift_samples,
+                      use_background, background_data):
+        """Load wav files and calculate mfcc features.
+
+        Random shifting of samples and adding in background noise is done within this function as well.
+        This function is meant to be mapped onto a TF Dataset by using a lambda function.
+
+        Args:
+            path: Path to the wav file to load.
+            label: Integer label for classifying the audio clip.
+            model_settings: Dictionary of settings for model being trained.
+            background_frequency: How many clips will have background noise, 0.0 to 1.0.
+            background_volume_range: How loud the background noise will be.
+            time_shift_samples: How much to randomly shift the clips by.
+            use_background: Add in background noise to audio clips or not.
+            background_data: Ragged tensor of loaded background noise samples.
+
+        Returns:
+            Tuple of calculated flattened mfcc and its class label.
+        """
+
+        desired_samples = model_settings['desired_samples']
+        audio, sample_rate = load_wav_file(path, desired_samples=desired_samples)
+
+        # Make our own silence audio data.
+        if label == SILENCE_INDEX:
+            audio = tf.multiply(audio, 0)
+
+        # Shift samples start position and pad any gaps with zeros.
+        if time_shift_samples > 0:
+            time_shift_amount = tf.random.uniform(shape=(), minval=-time_shift_samples, maxval=time_shift_samples,
+                                                  dtype=tf.int32)
+        else:
+            time_shift_amount = 0
+        if time_shift_amount > 0:
+            time_shift_padding = [[time_shift_amount, 0], [0, 0]]
+            time_shift_offset = [0, 0]
+        else:
+            time_shift_padding = [[0, -time_shift_amount], [0, 0]]
+            time_shift_offset = [-time_shift_amount, 0]
+
+        padded_foreground = tf.pad(audio, time_shift_padding, mode='CONSTANT')
+        sliced_foreground = tf.slice(padded_foreground, time_shift_offset, [desired_samples, -1])
+
+        # Get a random section of background noise.
+        if use_background:
+            background_index = tf.random.uniform(shape=(), maxval=background_data.shape[0], dtype=tf.int32)
+            background_sample = background_data[background_index]
+            background_offset = tf.random.uniform(shape=(), maxval=len(background_sample)-desired_samples,
+                                                  dtype=tf.int32)
+            background_clipped = background_sample[background_offset:(background_offset + desired_samples)]
+            background_reshaped = tf.reshape(background_clipped, [desired_samples, 1])
+            if tf.random.uniform(shape=(), maxval=1) < background_frequency:
+                background_volume = tf.random.uniform(shape=(), maxval=background_volume_range)
+            else:
+                background_volume = tf.constant(0, dtype='float32')
+        else:
+            background_reshaped = np.zeros([desired_samples, 1], dtype=np.float32)
+            background_volume = tf.constant(0, dtype='float32')
+
+        # Mix in background noise.
+        background_mul = tf.multiply(background_reshaped, background_volume)
+        background_add = tf.add(background_mul, sliced_foreground)
+        background_clamp = tf.clip_by_value(background_add, -1.0, 1.0)
+
+        mfcc = calculate_mfcc(background_clamp, sample_rate, model_settings['window_size_samples'],
+                              model_settings['window_stride_samples'],
+                              model_settings['dct_coefficient_count'])
+        mfcc = tf.reshape(mfcc, [-1])
+
+        return mfcc, label
+
+    def _download_and_extract_data(self, data_url, target_directory):
+        """Downloads and extracts file to target directory.
+
+        If the file does not already exist download it and then untar into the target directory.
+
+        Args:
+            data_url: Web link to the tarred data to download.
+            target_directory: Directory to download and extract to.
+        """
+        target_directory = Path(target_directory)
+        target_directory.mkdir(exist_ok=True)
+
+        filename = data_url.split('/')[-1]
+        filepath = target_directory / filename
+
+        if not filepath.exists():
+            def _report_hook(block_num, block_size, total_size):
+                """Function to track download progress in urllib"""
+                read_so_far = block_num * block_size
+                percent = (read_so_far / total_size) * 100.0
+
+                s = f"\rDownloading {filename} {percent:.1f}%"
+
+                sys.stdout.write(s)
+                sys.stdout.flush()
+
+            filepath, _ = urllib.request.urlretrieve(data_url, filepath, _report_hook)
+            print()
+
+        print(f'Untarring {filename}...')
+        tarfile.open(filepath, 'r:gz').extractall(target_directory)
+
+    def _prepare_datasets(self, silence_percentage, unknown_percentage, wanted_words,
+                          validation_percentage, testing_percentage):
+        """Split the data into train, validation and testing sets.
+
+        Silence and unknown data is added, then sets are converted to TF Datasets.
+
+        Args:
+            silence_percentage: Percent of words should be silence.
+            unknown_percentage: Percent of words that should be unknown.
+            wanted_words: List of words wanted to classify.
+            validation_percentage: Percent to split off for validation.
+            testing_percentage: Percent to split off for testing.
+        """
+        # Make sure the shuffling and picking of unknowns is deterministic.
+        random.seed(RANDOM_SEED)
+        wanted_words_index = {}
+
+        for index, wanted_word in enumerate(wanted_words):
+            wanted_words_index[wanted_word] = index + 2
+
+        # Find all wav files in subfolders.
+        search_path = self.data_dir / '*' / '*.wav'
+        data_index, unknown_index, all_words = self._find_and_sort_wavs(search_path, validation_percentage,
+                                                                        testing_percentage, wanted_words_index)
+
+        for index, wanted_word in enumerate(wanted_words):
+            if wanted_word not in all_words:
+                raise Exception(f'Tried to find {wanted_word} in labels but only found: {", ".join(all_words.keys())}')
+
+        word_to_index = {}
+        for word in all_words:
+            if word in wanted_words_index:
+                word_to_index[word] = wanted_words_index[word]
+            else:
+                word_to_index[word] = UNKNOWN_WORD_INDEX
+        word_to_index[SILENCE_LABEL] = SILENCE_INDEX
+
+        # We need an arbitrary file to load as the input for the silence samples.
+        # It's multiplied by zero later, so the content doesn't matter.
+        silence_wav_path = data_index['training'][0]['file']
+        for set_index in ['validation', 'testing', 'training']:
+            set_size = len(data_index[set_index])  # Size before adding silence and unknown samples.
+            silence_size = int(math.ceil(set_size * silence_percentage / 100))
+            for _ in range(silence_size):
+                data_index[set_index].append({
+                    'label': SILENCE_LABEL,
+                    'file': silence_wav_path
+                })
+            # Pick some unknowns to add to each partition of the data set.
+            random.shuffle(unknown_index[set_index])
+            unknown_size = int(math.ceil(set_size * unknown_percentage / 100))
+            data_index[set_index].extend(unknown_index[set_index][:unknown_size])
+
+            self._set_size[set_index] = len(data_index[set_index])  # Size after adding silence and unknown samples.
+
+            # Make sure the ordering is random.
+            random.shuffle(data_index[set_index])
+
+            # Transform into TF Datasets ready for easier processing later.
+            labels, paths = list(zip(*[d.values() for d in data_index[set_index]]))
+            labels = [word_to_index[label] for label in labels]
+            self._tf_datasets[set_index] = tf.data.Dataset.from_tensor_slices((list(paths), labels))
+
+    def _find_and_sort_wavs(self, search_pattern, validation_percentage, testing_percentage, wanted_words_index):
+        """Find and sort wav files into known and unknown word sets.
+
+        Known words are files containing words in the list of wanted words.
+        Any other clip goes to the unknown label set. Labels come from the folder names.
+        All clips are also assigned to train, test and validation sets.
+
+        Args:
+            search_pattern: Path pattern used by glob to find wav files.
+            validation_percentage: Percent to split off for validation.
+            testing_percentage: Percent to split off for testing.
+            wanted_words_index: Dict mapping wanted words to their label index.
+
+        Returns:
+            3-tuple of known words, unknown words and mapping of all word labels.
+        """
+        data_index = {'validation': [], 'testing': [], 'training': []}
+        unknown_index = {'validation': [], 'testing': [], 'training': []}
+        all_words = {}
+
+        for wav_path in sorted(tf.io.gfile.glob(str(search_pattern))):
+            word = Path(wav_path).parent.name.lower()
+
+            # Treat the '_background_noise_' folder as a special case, since we expect
+            # it to contain long audio samples we mix in to improve training.
+            if word == BACKGROUND_NOISE_DIR_NAME:
+                continue
+
+            all_words[word] = True
+            set_index = which_set(wav_path, validation_percentage, testing_percentage)
+            # If it's a known class, store its detail, otherwise add it to the list
+            # we'll use to train the unknown label.
+            if word in wanted_words_index:
+                data_index[set_index].append({'label': word, 'file': wav_path})
+            else:
+                unknown_index[set_index].append({'label': word, 'file': wav_path})
+        if not all_words:
+            raise Exception('No .wavs found at ' + str(search_pattern))
+
+        return data_index, unknown_index, all_words
+
+    def _prepare_background_data(self):
+        """Searches a folder for background noise audio, and loads it into memory.
+
+        It's expected that the background audio samples will be in a subdirectory
+        named '_background_noise_' inside the 'data_dir' folder, as .wavs that match
+        the sample rate of the training data, but can be much longer in duration.
+
+        If the '_background_noise_' folder doesn't exist at all, this isn't an
+        error, it's just taken to mean that no background noise augmentation should
+        be used. If the folder does exist, but it's empty, that's treated as an
+        error.
+
+        Returns:
+          Ragged tensor of raw PCM-encoded audio samples of background noise.
+          None if '_background_noise_' folder doesnt exist.
+
+        Raises:
+          Exception: If files aren't found in the folder.
+        """
+        background_data = []
+        background_dir = Path(self.data_dir / BACKGROUND_NOISE_DIR_NAME)
+        if not background_dir.exists():
+            self.background_data = None
+            return
+
+        search_path = Path(background_dir / '*.wav')
+        for wav_path in tf.io.gfile.glob(str(search_path)):
+            wav_data, _ = load_wav_file(wav_path, desired_samples=-1)
+            background_data.append(tf.reshape(wav_data, [-1]))
+
+        if not background_data:
+            raise Exception('No background wav files were found in ' + str(search_path))
+
+        # Ragged tensor as we cant use lists in tf dataset map functions.
+        self.background_data = tf.ragged.stack(background_data)
diff --git a/models/keyword_spotting/cnn_small/model_package_tf/evaluation.py b/models/keyword_spotting/cnn_small/model_package_tf/evaluation.py
new file mode 100644
index 0000000..026e8f8
--- /dev/null
+++ b/models/keyword_spotting/cnn_small/model_package_tf/evaluation.py
@@ -0,0 +1,250 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for testing trained keyword spotting models from checkpoint files and TFLite files."""
+
+import argparse
+
+import numpy as np
+import tensorflow as tf
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+from cnn_s_inference_tflite import tflite_inference
+
+
+def tflite_test(model_settings, audio_processor, tflite_path):
+    """Calculate accuracy and confusion matrices on the validation and test sets.
+
+    A TFLite model is used for doing testing.
+
+    Args:
+        model_settings: Dictionary of common model settings.
+        audio_processor: Audio processor class object.
+        tflite_path: Path to TFLite file to use for inference.
+    """
+    # Evaluate on validation set.
+    print("Running TFLite evaluation on validation set...")
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION).batch(1)
+    expected_indices = np.concatenate([y for x, y in val_data])
+    predicted_indices = []
+
+    for mfcc, label in val_data:
+        prediction = tflite_inference(mfcc, tflite_path)
+        predicted_indices.append(np.squeeze(tf.argmax(prediction, axis=1)))
+
+    val_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+
+    print(confusion_matrix.numpy())
+    print(f'Validation accuracy = {val_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.VALIDATION)})')
+
+    # Evaluate on testing set.
+    print("Running TFLite evaluation on test set...")
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING).batch(1)
+    expected_indices = np.concatenate([y for x, y in test_data])
+    predicted_indices = []
+
+    for mfcc, label in test_data:
+        prediction = tflite_inference(mfcc, tflite_path)
+        predicted_indices.append(np.squeeze(tf.argmax(prediction, axis=1)))
+
+    test_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+
+    print(confusion_matrix.numpy())
+    print(f'Test accuracy = {test_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.TESTING)})')
+
+
+def keras_test(model_settings, audio_processor, model):
+    """Calculate accuracy and confusion matrices on the validation and test sets.
+
+    A loaded keras model is used for doing testing.
+
+    Args:
+        model_settings: Dictionary of common model settings.
+        audio_processor: Audio processor class object.
+        model: Loaded keras model.
+    """
+    # Evaluate on validation set.
+    print("Running TF evaluation on validation set...")
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION).batch(FLAGS.batch_size)
+    expected_indices = np.concatenate([y for x, y in val_data])
+
+    predictions = model.predict(val_data)
+    predicted_indices = tf.argmax(predictions, axis=1)
+
+    val_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+    print(confusion_matrix.numpy())
+    print(f'Validation accuracy = {val_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.VALIDATION)})')
+
+    # Evaluate on testing set.
+    print("Running TF evaluation on test set...")
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING).batch(FLAGS.batch_size)
+    expected_indices = np.concatenate([y for x, y in test_data])
+
+    predictions = model.predict(test_data)
+    predicted_indices = tf.argmax(predictions, axis=1)
+
+    test_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+    print(confusion_matrix.numpy())
+    print(f'Test accuracy = {test_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.TESTING)})')
+
+
+def calculate_accuracy(predicted_indices, expected_indices):
+    """Calculates and returns accuracy.
+
+    Args:
+        predicted_indices: List of predicted integer indices.
+        expected_indices: List of expected integer indices.
+
+    Returns:
+        Accuracy value between 0 and 1.
+    """
+    correct_prediction = tf.equal(predicted_indices, expected_indices)
+    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
+    return accuracy
+
+
+def evaluate():
+    """Calculate accuracy and confusion matrices on validation and test sets.
+
+    Model is created and weights loaded from supplied command line arguments.
+    """
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    if FLAGS.tflite_path:
+        tflite_test(model_settings, audio_processor, FLAGS.tflite_path)
+
+    if FLAGS.checkpoint:
+        model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info, False)
+        model.load_weights(FLAGS.checkpoint).expect_partial()
+        keras_test(model_settings, audio_processor, model)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--batch_size',
+        type=int,
+        default=100,
+        help='How many items to train with at once',)
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--checkpoint',
+        type=str,
+        help='Checkpoint to load the weights from')
+    parser.add_argument(
+        '--tflite_path',
+        type=str,
+        help='Path to TFLite file to use for evaluation')
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+
+    FLAGS, _ = parser.parse_known_args()
+    evaluate()
diff --git a/models/keyword_spotting/cnn_small/model_package_tf/how_to_guidance.ipynb b/models/keyword_spotting/cnn_small/model_package_tf/how_to_guidance.ipynb
new file mode 100644
index 0000000..8b19ae4
--- /dev/null
+++ b/models/keyword_spotting/cnn_small/model_package_tf/how_to_guidance.ipynb
@@ -0,0 +1,428 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Copyright (C) 2023 Arm Limited or its affiliates. All rights reserved.\n",
+    "#\n",
+    "# SPDX-License-Identifier: Apache-2.0\n",
+    "#\n",
+    "# Licensed under the Apache License, Version 2.0 (the License); you may\n",
+    "# not use this file except in compliance with the License.\n",
+    "# You may obtain a copy of the License at\n",
+    "#\n",
+    "# www.apache.org/licenses/LICENSE-2.0\n",
+    "#\n",
+    "# Unless required by applicable law or agreed to in writing, software\n",
+    "# distributed under the License is distributed on an AS IS BASIS, WITHOUT\n",
+    "# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+    "# See the License for the specific language governing permissions and\n",
+    "# limitations under the License."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# CNN_Small - Optimised\n",
+    "\n",
+    "Here we reproduce the models with our established codebase and ModelPackage approach for your convenience.\n",
+    "\n",
+    "## Model-Package Overview:\n",
+    "\n",
+    "| Model           \t| CNN_Small                            \t|\n",
+    "|:---------------:\t|:---------------------------------------------------------------:\t|\n",
+    "| <u>**Format**</u>:          \t| Keras, Saved Model, TensorFlow Lite int8, TensorFlow Lite fp32 |\n",
+    "| <u>**Feature**</u>:         \t| Keyword spotting for Arm Cortex-M CPUs |\n",
+    "| <u>**Architectural Delta w.r.t. Vanilla**</u>: | None |\n",
+    "| <u>**Domain**</u>:         \t| Keyword spotting |\n",
+    "| <u>**Package Quality**</u>: \t| Optimised |"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Table of contents <a name=\"index_page\"></a>\n",
+    "\n",
+    "This how-to guidance presents the key steps to reproduce everything in this package. The contents are organised as below. We provided the internal navigation links for users to easy-jump among different sections.  \n",
+    "\n",
+    "    \n",
+    "* [1.0 Model recreation](#model_recreation)\n",
+    "\n",
+    "* [2.0 Training](#training)\n",
+    "\n",
+    "* [3.0 Testing](#testing)\n",
+    "\n",
+    "* [4.0 Optimization](#optimization)\n",
+    "\n",
+    "* [5.0 Quantization and TFLite conversion](#tflite_conversion)\n",
+    "\n",
+    "* [6.0 Inference the TFLite model files](#tflite_inference)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1.0 Model Recreation<a name=\"model_recreation\"></a>\n",
+    "\n",
+    "In order to recreate the model you will first need to be using ```Python3.7``` and install the requirements in ```requirements.txt```.\n",
+    "\n",
+    "Once you have these requirements satisfied you can execute the recreation script contained within this folder, just run:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-01-31 13:13:21.365383: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "Untarring speech_commands_v0.02.tar.gz...\n",
+      "2023-01-31 13:14:12.415896: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1\n",
+      "2023-01-31 13:14:12.453662: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 13:14:12.453701: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 13:14:12.477025: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublas.so.11\n",
+      "2023-01-31 13:14:12.477130: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublasLt.so.11\n",
+      "2023-01-31 13:14:12.480970: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcufft.so.10\n",
+      "2023-01-31 13:14:12.481614: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcurand.so.10\n",
+      "2023-01-31 13:14:12.482232: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusolver.so.11\n",
+      "2023-01-31 13:14:12.483034: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusparse.so.11\n",
+      "2023-01-31 13:14:12.483190: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudnn.so.8\n",
+      "2023-01-31 13:14:12.483677: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 13:14:12.483964: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2023-01-31 13:14:12.484760: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 13:14:12.485262: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 13:14:12.485316: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 13:14:12.916344: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 13:14:12.916381: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 13:14:12.916389: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 13:14:12.916905: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10809 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.\n",
+      "2023-01-31 13:14:14.471348: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.\n",
+      "2023-01-31 13:14:15.329325: I tensorflow/core/grappler/devices.cc:69] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1\n",
+      "2023-01-31 13:14:15.329556: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session\n",
+      "2023-01-31 13:14:15.329983: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 13:14:15.330272: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 13:14:15.330306: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 13:14:15.330322: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 13:14:15.330334: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 13:14:15.330642: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10809 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "2023-01-31 13:14:15.347491: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 3492140000 Hz\n",
+      "2023-01-31 13:14:15.352470: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1144] Optimization results for grappler item: graph_to_optimize\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.021ms.\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.002ms.\n",
+      "\n",
+      "2023-01-31 13:14:15.425956: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:345] Ignored output_format.\n",
+      "2023-01-31 13:14:15.425996: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:348] Ignored drop_control_dependency.\n",
+      "2023-01-31 13:14:15.429502: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:210] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n",
+      "2023-01-31 13:14:15.431843: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 13:14:15.432118: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 13:14:15.432154: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 13:14:15.432167: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 13:14:15.432178: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 13:14:15.432489: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10809 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "Converted model saved to cnn.tflite.\n",
+      "Running TFLite evaluation on validation set...\n",
+      "2023-01-31 13:14:15.484981: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)\n",
+      "[[371   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  0 300   5   7   2  13   9   4  11   4   2  14]\n",
+      " [  0   1 381   4   0   2   8   0   0   0   0   1]\n",
+      " [  1  13   1 363   0   8   3   1   0   1   4  11]\n",
+      " [  0   3   1   1 328   0   1   0   5   8   3   0]\n",
+      " [  0   9   0  12   1 340   5   0   0   0   4   6]\n",
+      " [  1   3   9   2   2   0 332   2   0   0   0   1]\n",
+      " [  0  11   0   0   1   2   6 341   0   1   0   1]\n",
+      " [  1   9   0   0   4   1   0   0 339   8   1   0]\n",
+      " [  0   3   2   0  20   0   4   0   4 334   3   3]\n",
+      " [  1   5   1   0   9   1   2   0   0   2 329   0]\n",
+      " [  0   9   0  28   1   8   1   0   0   5   6 314]]\n",
+      "Validation accuracy = 91.61%(N=4445)\n",
+      "Running TFLite evaluation on test set...\n",
+      "[[408   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  0 338   5   9   3   1   6  10  17   4   6   9]\n",
+      " [  0  10 395   2   0   2   9   0   0   0   0   1]\n",
+      " [  0   7   3 374   0  10   5   0   0   0   0   6]\n",
+      " [  0   8   0   0 395   2   0   0   5   7   6   2]\n",
+      " [  0   9   2  14   1 369   0   1   3   0   2   5]\n",
+      " [  0   6   7   0   1   0 394   2   0   1   1   0]\n",
+      " [  0   4   0   0   0   2   8 378   1   1   0   2]\n",
+      " [  1  13   0   0   5   3   1   0 356  14   1   2]\n",
+      " [  0   2   0   1  11   0   1   0   7 372   0   8]\n",
+      " [  0   1   0   0   5   4   2   0   0   0 394   5]\n",
+      " [  0  15   0  28   4  10   2   2   1   2   2 336]]\n",
+      "Test accuracy = 92.21%(N=4890)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-01-31 13:14:39.184982: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "Untarring speech_commands_v0.02.tar.gz...\n",
+      "2023-01-31 13:15:30.798819: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1\n",
+      "2023-01-31 13:15:30.834958: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 13:15:30.834997: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 13:15:30.856434: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublas.so.11\n",
+      "2023-01-31 13:15:30.856508: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublasLt.so.11\n",
+      "2023-01-31 13:15:30.860012: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcufft.so.10\n",
+      "2023-01-31 13:15:30.860406: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcurand.so.10\n",
+      "2023-01-31 13:15:30.861063: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusolver.so.11\n",
+      "2023-01-31 13:15:30.861848: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusparse.so.11\n",
+      "2023-01-31 13:15:30.862001: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudnn.so.8\n",
+      "2023-01-31 13:15:30.862359: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 13:15:30.862643: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2023-01-31 13:15:30.863248: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 13:15:30.863639: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 13:15:30.863701: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 13:15:31.316265: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 13:15:31.316302: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 13:15:31.316312: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 13:15:31.316827: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10809 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.\n",
+      "2023-01-31 13:15:32.911559: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.\n",
+      "2023-01-31 13:15:33.701396: I tensorflow/core/grappler/devices.cc:69] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1\n",
+      "2023-01-31 13:15:33.701483: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session\n",
+      "2023-01-31 13:15:33.702020: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 13:15:33.702305: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 13:15:33.702342: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 13:15:33.702357: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 13:15:33.702364: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 13:15:33.702677: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10809 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "2023-01-31 13:15:33.719401: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 3492140000 Hz\n",
+      "2023-01-31 13:15:33.721665: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1144] Optimization results for grappler item: graph_to_optimize\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.012ms.\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.002ms.\n",
+      "\n",
+      "2023-01-31 13:15:33.790485: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:345] Ignored output_format.\n",
+      "2023-01-31 13:15:33.790521: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:348] Ignored drop_control_dependency.\n",
+      "2023-01-31 13:15:33.793705: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:210] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n",
+      "2023-01-31 13:15:33.795921: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 13:15:33.796178: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 13:15:33.796208: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 13:15:33.796218: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 13:15:33.796225: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 13:15:33.796508: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10809 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "2023-01-31 13:15:33.820120: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)\n",
+      "fully_quantize: 0, inference_type: 6, input_inference_type: 9, output_inference_type: 9\n",
+      "Quantized model saved to cnn_quantized.tflite.\n",
+      "Running TFLite evaluation on validation set...\n",
+      "[[371   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  0 301   5   8   2  12   9   4  11   3   3  13]\n",
+      " [  0   2 376   3   0   2  11   2   0   0   0   1]\n",
+      " [  1  13   1 350   4   8   5   1   1   0   4  18]\n",
+      " [  0   4   1   1 321   0   2   0   5   7   9   0]\n",
+      " [  0  10   0   9   3 337   4   1   0   0   7   6]\n",
+      " [  1   6   9   1   4   1 327   2   0   0   0   1]\n",
+      " [  0  14   0   0   2   2   6 337   1   1   0   0]\n",
+      " [  1   9   1   0   4   2   1   0 339   5   1   0]\n",
+      " [  0   4   1   0  25   0   5   0   6 322   5   5]\n",
+      " [  1   6   1   0  13   1   1   0   1   3 323   0]\n",
+      " [  0  11   1  26   3   7   1   1   1   3   4 314]]\n",
+      "Validation accuracy = 90.39%(N=4445)\n",
+      "Running TFLite evaluation on test set...\n",
+      "[[408   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  0 332   3   8   7   3  11   6  15   2  10  11]\n",
+      " [  0   9 390   1   3   2  14   0   0   0   0   0]\n",
+      " [  0   8   2 355   5  12   8   0   0   0   1  14]\n",
+      " [  0  12   0   0 386   2   1   0   5   7  11   1]\n",
+      " [  0  12   2  11   2 363   0   1   4   1   6   4]\n",
+      " [  0   5   7   0   8   0 388   3   0   1   0   0]\n",
+      " [  0   5   0   0   4   0  15 369   0   1   0   2]\n",
+      " [  1  14   0   0   6   3   1   1 352  14   2   2]\n",
+      " [  0   4   0   1  16   0   4   0  16 352   2   7]\n",
+      " [  0   1   0   0  10   3   2   1   1   0 388   5]\n",
+      " [  0  14   1  28  10  14   3   4   0   1   2 325]]\n",
+      "Test accuracy = 90.14%(N=4890)\n"
+     ]
+    }
+   ],
+   "source": [
+    "!bash ./recreate_model.sh"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Running this script will use the pre-trained checkpoint files supplied in the ```./model_archive/model_source/weights``` folder to generate the TFLite files and perform evaluation on the test set. Both an fp32 version and a quantized version will be produced. The quantized version will use post-training quantization to fully quantize it.\n",
+    "\n",
+    "If you want to run training from scratch you can do this by supplying ```--train``` when running the script. For example:\n",
+    "\n",
+    "```bash\n",
+    "bash ./recreate_model.sh --train\n",
+    "```\n",
+    "\n",
+    "Training is then performed and should produce a model to the stated accuracy in this repository. Note that exporting to TFLite will still happen with the baseline pre-trained checkpoint files, so you will need to re-run the script and this time supply the path to the new checkpoint files you want to use, for example:\n",
+    "\n",
+    "```bash\n",
+    "bash ./recreate_model.sh --ckpt <checkpoint_path>\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2.0 Training<a name=\"training\"></a>\n",
+    "\n",
+    "The training scripts can be used to recreate any of the models from the [Hello Edge paper](https://arxiv.org/pdf/1711.07128.pdf) provided the right hyperparameters are used. The training commands with all the hyperparameters to reproduce the model in this repository are given [here](recreate_model.sh). The model in this part of the repository represents just one variation of the models from the paper, other varieties are covered in other parts of the repository.\n",
+    "\n",
+    "\n",
+    "As a general example of how to train a DNN with 3 fully-connected layers with 128 neurons in each layer, run:\n",
+    "```\n",
+    "python train.py --model_architecture dnn --model_size_info 128 128 128\n",
+    "```\n",
+    "\n",
+    "The command line argument *--model_size_info* is used to pass the neural network layer\n",
+    "dimensions such as number of layers, convolution filter size/stride as a list to models.py,\n",
+    "which builds the TensorFlow graph based on the provided model architecture\n",
+    "and layer dimensions. For more info on *model_size_info* for each network architecture see\n",
+    "[models.py](model_core_utils/models.py).\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3.0 Testing<a name=\"testing\"></a>\n",
+    "To run inference on the trained model from a checkpoint and get accuracy on validation and test sets, run:\n",
+    "```\n",
+    "python evaluation.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>\n",
+    "```\n",
+    "**The model and feature extraction parameters passed to this script should match those used in the Training step.**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4.0 Optimization<a name=\"optimization\"></a>\n",
+    "\n",
+    "We introduce an *optional* step to optimize the trained keyword spotting model for deployment.\n",
+    "\n",
+    "Here we use TensorFlow's [weight clustering API](https://www.tensorflow.org/model_optimization/guide/clustering) to reduce the compressed model size and optimize inference on supported hardware. 32 weight clusters and kmeans++ cluster intialization method are used as the clustering hyperparameters.\n",
+    "\n",
+    "To optimize your trained model (e.g. a DNN), a trained model checkpoint is needed to run clustering and fine-tuning on.\n",
+    "You can use the pre-trained checkpoints provided, or train your own model and use the resulting checkpoint.\n",
+    "\n",
+    "To apply the optimization and fine-tuning, run the following command:\n",
+    "```\n",
+    "python optimisations.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>\n",
+    "```\n",
+    "**The model and feature extraction parameters used here should match those used in the Training step, except for the number of training steps.\n",
+    "The number of training steps is reduced since the optimization step only requires fine-tuning.**\n",
+    "\n",
+    "This will generate a clustered model checkpoint that can be used in the quantization step to generate a quantized and clustered TFLite model."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5.0 Quantization and TFLite Conversion<a name=\"tflite_conversion\"></a>\n",
+    "\n",
+    "You can now use TensorFlow's\n",
+    "[post training quantization](https://www.tensorflow.org/lite/performance/post_training_quantization) to\n",
+    "make quantization of the trained models super simple.\n",
+    "\n",
+    "To quantize your trained model (e.g. a DNN) run:\n",
+    "```\n",
+    "python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> [--inference_type int8|int16]\n",
+    "```\n",
+    "**The model and feature extraction parameters used here should match those used in the Training step.**\n",
+    "\n",
+    "The ```inference_type``` parameter is *optional* and to be used if a fully quantized model with inputs and outputs of type int8 or int16 is needed. It defaults to fp32.\n",
+    "\n",
+    "In this example, this step will produce a quantized TFLite file *dnn_quantized.tflite*."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can test the accuracy of this quantized model on the test set by running:\n",
+    "```\n",
+    "python evaluation.py --tflite_path dnn_quantized.tflite\n",
+    "```\n",
+    "**The model and feature extraction parameters used here should match those used in the Training step.**\n",
+    "\n",
+    "`convert_to_tflite.py` uses post-training quantization to generate a quantized model by default. If you wish to convert to a floating point TFLite model, use the command below:\n",
+    "\n",
+    "```\n",
+    "python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> --no-quantize\n",
+    "```\n",
+    "\n",
+    "This will produce a floating point TFLite file *dnn.tflite*. You can test the accuracy of this floating point model using `evaluation.py` as above.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 6.0 Single inference of the TFLite model files <a name=\"tflite_inference\"></a>\n",
+    "\n",
+    "You can conduct TFLite inference for .fp32 and .int8 model files by using the following command: \n",
+    "\n",
+    "```python cnn_s_inference_tflite.py --labels validation_utils/labels.txt --wav <path_to_wav_file> --tflite_path <path_to_tflite_file>```\n",
+    "\n",
+    "**The feature extraction parameters used here should match those used in the Training step.**\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/README.md b/models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/README.md
new file mode 100644
index 0000000..c964371
--- /dev/null
+++ b/models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/README.md
@@ -0,0 +1,62 @@
+# keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32
+
+## Description
+This is a floating point fp32 version of the CNN Small model developed by Arm, from the Hello Edge paper.
+
+## License
+[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
+
+## Network Information
+| Network Information | Value |
+|---------------------|-------|
+|  Framework          | TensorFlow Lite |
+|  Datatype           | fp32 |
+|  SHA-1 Hash         | e9471348e6fb25191092236dac6af7c1fc84116b |
+|  Size (Bytes)       | 280444 |
+|  Provenance         | https://arxiv.org/abs/1711.07128 |
+|  Training           | Trained by Arm |
+|  Paper | https://arxiv.org/abs/1711.07128 |
+
+## DataSet
+| Dataset Information | Value |
+|--------|-------|
+| Name | Google Speech Commands test set |
+
+## Accuracy
+
+| Metric | Value |
+|--------|-------|
+| accuracy | 92.21% |
+
+## HW Support
+| HW Support   | Value |
+|--------------|-------|
+| Cortex-A |:heavy_check_mark:         |
+| Cortex-M |:heavy_check_mark:         |
+| Mali GPU |:heavy_check_mark:         |
+| Ethos U  |:heavy_multiplication_x:          |
+
+### Key
+* :heavy_check_mark: - Will run on this platform.
+* :heavy_multiplication_x: - Will not run on this platform.
+
+## Network Quality
+| Network Quality         | Value |
+|-------------------------|-------|
+|  Recreate               | :heavy_check_mark:    |
+|  Quality level          | Deployable    |
+|  Vanilla                | :heavy_check_mark:    |
+|  Clustered              | :heavy_multiplication_x:    |
+|  Pruned                 | :heavy_multiplication_x:    |
+|  Quantization - default | :heavy_multiplication_x:    |
+|  Quantization - full    | :heavy_multiplication_x:    |
+
+## Network Inputs
+| Input Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| input | (1, 490) | fp32 | models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input | fp32 | [1, 490] | The input is a processed MFCCs |
+
+## Network Outputs
+| Output Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| Identity | (1, 12) | fp32 | models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity | fp32 | [1, 12] | The probability on 12 keywords |
\ No newline at end of file
diff --git a/models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/cnn_s.tflite b/models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/cnn_s.tflite
new file mode 100644
index 0000000..11ed7c3
--- /dev/null
+++ b/models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/cnn_s.tflite
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:39d968b59dec6a543fba800718fd72c9009644b39bcfd1e08226e18b40b6d9b5
+size 280444
diff --git a/models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/definition.yaml b/models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/definition.yaml
new file mode 100644
index 0000000..18e9f60
--- /dev/null
+++ b/models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/definition.yaml
@@ -0,0 +1,64 @@
+benchmark:
+  benchmark_metrics:
+    accuracy: 92.21%
+  benchmark_name: Google Speech Commands test set
+description: This is a floating point fp32 version of the CNN Small model developed
+  by Arm, from the Hello Edge paper.
+license:
+- Apache-2.0
+network:
+  datatype: fp32
+  file_size_bytes: 280444
+  filename: cnn_s.tflite
+  framework: TensorFlow Lite
+  hash:
+    algorithm: sha1
+    value: e9471348e6fb25191092236dac6af7c1fc84116b
+  provenance: https://arxiv.org/abs/1711.07128
+  training: Trained by Arm
+network_parameters:
+  input_nodes:
+  - description: The input is a processed MFCCs of shape (1, 490)
+    example_input:
+      path: models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input
+      shape:
+      - 1
+      - 490
+      type: fp32
+      use_case: Random input for model regression.
+    input_datatype: fp32
+    name: input
+    shape:
+    - 1
+    - 490
+  output_nodes:
+  - description: The probability on 12 keywords.
+    example_output:
+      path: models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity
+      shape:
+      - 1
+      - 12
+      type: fp32
+      use_case: output for model regression.
+    name: Identity
+    output_datatype: fp32
+    shape:
+    - 1
+    - 12
+network_quality:
+  clustered: false
+  is_vanilla: true
+  pruned: false
+  quality_level: Deployable
+  quality_level_hero_hw: cortex_m
+  quantization_default: false
+  quantization_full: false
+  recreate: true
+operators:
+  TensorFlow Lite:
+  - CONV_2D
+  - FULLY_CONNECTED
+  - RELU
+  - RESHAPE
+  - SOFTMAX
+paper: https://arxiv.org/abs/1711.07128
\ No newline at end of file
diff --git a/models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input/0.npy b/models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input/0.npy
new file mode 100644
index 0000000..2759db6
--- /dev/null
+++ b/models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input/0.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a4e38dbf192916f7af5440e17d27eaf1a19e13054977fed1ec5e85322e3da897
+size 2088
diff --git a/models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity/0.npy b/models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity/0.npy
new file mode 100644
index 0000000..b651412
--- /dev/null
+++ b/models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity/0.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ad3e4972e18774433a093b7228742fe66dceece314ea2de02bc0ac29a632cf8
+size 176
diff --git a/models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_int8/README.md b/models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_int8/README.md
new file mode 100644
index 0000000..30ae15d
--- /dev/null
+++ b/models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_int8/README.md
@@ -0,0 +1,62 @@
+# keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_int8
+
+## Description
+This is a fully quantized int8 version of the CNN Small model developed by Arm, from the Hello Edge paper.
+
+## License
+[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
+
+## Network Information
+| Network Information | Value |
+|---------------------|-------|
+|  Framework          | TensorFlow Lite |
+|  Datatype           | int8 |
+|  SHA-1 Hash         | 3415f88dfb8f78fe47d282d68ccbc3ce71a7510f |
+|  Size (Bytes)       | 75400 |
+|  Provenance         | https://arxiv.org/abs/1711.07128 |
+|  Training           | Trained by Arm |
+|  Paper | https://arxiv.org/abs/1711.07128 |
+
+## DataSet
+| Dataset Information | Value |
+|--------|-------|
+| Name | Google Speech Commands test set |
+
+## Accuracy
+
+| Metric | Value |
+|--------|-------|
+| Accuracy | 90.18% |
+
+## HW Support
+| HW Support   | Value |
+|--------------|-------|
+| Cortex-A |:heavy_check_mark:          |
+| Cortex-M |:heavy_check_mark:          |
+| Mali GPU |:heavy_check_mark:          |
+| Ethos U  |:heavy_check_mark:          |
+
+### Key
+* :heavy_check_mark: - Will run on this platform.
+* :heavy_multiplication_x: - Will not run on this platform.
+
+## Network Quality
+| Network Quality         | Value |
+|-------------------------|-------|
+|  Recreate               | :heavy_check_mark:    |
+|  Quality level          | Deployable    |
+|  Vanilla                | :heavy_check_mark:    |
+|  Clustered              | :heavy_multiplication_x:    |
+|  Pruned                 | :heavy_multiplication_x:    |
+|  Quantization - default | :heavy_multiplication_x:    |
+|  Quantization - full    | :heavy_check_mark:    |
+
+## Network Inputs
+| Input Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| input | (1, 490) | int8 | models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input | fp32 | [1, 490] | The input is a processed MFCCs |
+
+## Network Outputs
+| Output Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| Identity | (1, 12) | int8 | models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity | fp32 | [1, 12] | The probability on 12 keywords |
\ No newline at end of file
diff --git a/models/keyword_spotting/cnn_small/tflite_int8/cnn_s_quantized.tflite b/models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_int8/cnn_s_quantized.tflite
similarity index 100%
rename from models/keyword_spotting/cnn_small/tflite_int8/cnn_s_quantized.tflite
rename to models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_int8/cnn_s_quantized.tflite
diff --git a/models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_int8/definition.yaml b/models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_int8/definition.yaml
new file mode 100644
index 0000000..c836274
--- /dev/null
+++ b/models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_int8/definition.yaml
@@ -0,0 +1,64 @@
+benchmark:
+  benchmark_metrics:
+    Accuracy: 90.18%
+  benchmark_name: Google Speech Commands test set
+description: This is a fully quantized int8 version of the CNN Small model developed
+  by Arm, from the Hello Edge paper.
+license:
+- Apache-2.0
+network:
+  datatype: int8
+  file_size_bytes: 75400
+  filename: cnn_s_quantized.tflite
+  framework: TensorFlow Lite
+  hash:
+    algorithm: sha1
+    value: 3415f88dfb8f78fe47d282d68ccbc3ce71a7510f
+  provenance: https://arxiv.org/abs/1711.07128
+  training: Trained by Arm
+network_parameters:
+  input_nodes:
+  - description: The input is a processed MFCCs of shape (1, 490)
+    example_input:
+      path: models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input
+      shape:
+      - 1
+      - 490
+      type: int8
+      use_case: Random input for model regression.
+    input_datatype: int8
+    name: input
+    shape:
+    - 1
+    - 490
+  output_nodes:
+  - description: The probability on 12 keywords.
+    example_output:
+      path: models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity
+      shape:
+      - 1
+      - 12
+      type: int8
+      use_case: output for model regression.
+    name: Identity
+    output_datatype: int8
+    shape:
+    - 1
+    - 12
+network_quality:
+  clustered: false
+  is_vanilla: true
+  pruned: false
+  quality_level: Deployable
+  quality_level_hero_hw: cortex_m
+  quantization_default: false
+  quantization_full: true
+  recreate: true
+operators:
+  TensorFlow Lite:
+  - CONV_2D
+  - FULLY_CONNECTED
+  - RELU
+  - RESHAPE
+  - SOFTMAX
+paper: https://arxiv.org/abs/1711.07128
\ No newline at end of file
diff --git a/models/keyword_spotting/cnn_small/tflite_int8/testing_input/input/0.npy b/models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input/0.npy
similarity index 100%
rename from models/keyword_spotting/cnn_small/tflite_int8/testing_input/input/0.npy
rename to models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input/0.npy
diff --git a/models/keyword_spotting/cnn_small/tflite_int8/testing_output/Identity/0.npy b/models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity/0.npy
similarity index 100%
rename from models/keyword_spotting/cnn_small/tflite_int8/testing_output/Identity/0.npy
rename to models/keyword_spotting/cnn_small/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity/0.npy
diff --git a/models/keyword_spotting/cnn_small/model_package_tf/model_archive/model_source/saved_model/cnn_small/keras_metadata.pb b/models/keyword_spotting/cnn_small/model_package_tf/model_archive/model_source/saved_model/cnn_small/keras_metadata.pb
new file mode 100644
index 0000000..f463c39
--- /dev/null
+++ b/models/keyword_spotting/cnn_small/model_package_tf/model_archive/model_source/saved_model/cnn_small/keras_metadata.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:97d0b45b0027a13e5c2d0a0049775bfa1ac4661ee6e1e9c20690137ba0b91539
+size 28876
diff --git a/models/keyword_spotting/cnn_small/model_package_tf/model_archive/model_source/saved_model/cnn_small/saved_model.pb b/models/keyword_spotting/cnn_small/model_package_tf/model_archive/model_source/saved_model/cnn_small/saved_model.pb
new file mode 100644
index 0000000..1904687
--- /dev/null
+++ b/models/keyword_spotting/cnn_small/model_package_tf/model_archive/model_source/saved_model/cnn_small/saved_model.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c1d3d2c96b473b7cd1b9ca9cd60695a3c6e27d6cc57469b79da75e709e869ff6
+size 302218
diff --git a/models/keyword_spotting/cnn_small/model_package_tf/model_archive/model_source/saved_model/cnn_small/variables/variables.data-00000-of-00001 b/models/keyword_spotting/cnn_small/model_package_tf/model_archive/model_source/saved_model/cnn_small/variables/variables.data-00000-of-00001
new file mode 100644
index 0000000..ad5b44d
--- /dev/null
+++ b/models/keyword_spotting/cnn_small/model_package_tf/model_archive/model_source/saved_model/cnn_small/variables/variables.data-00000-of-00001
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da9dce03052ef2895fbd3b41f28aade4d53d3ba38a706ded903c133b4c57a549
+size 288200
diff --git a/models/keyword_spotting/cnn_small/model_package_tf/model_archive/model_source/saved_model/cnn_small/variables/variables.index b/models/keyword_spotting/cnn_small/model_package_tf/model_archive/model_source/saved_model/cnn_small/variables/variables.index
new file mode 100644
index 0000000..c4f021a
--- /dev/null
+++ b/models/keyword_spotting/cnn_small/model_package_tf/model_archive/model_source/saved_model/cnn_small/variables/variables.index
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3021889ecbad08fd6d5edf947596f2fd9dee8a594a63a1f3d2f4bafee7271cce
+size 1466
diff --git a/models/keyword_spotting/cnn_small/tflite_int8/ckpt/checkpoint b/models/keyword_spotting/cnn_small/model_package_tf/model_archive/model_source/weights/checkpoint
similarity index 100%
rename from models/keyword_spotting/cnn_small/tflite_int8/ckpt/checkpoint
rename to models/keyword_spotting/cnn_small/model_package_tf/model_archive/model_source/weights/checkpoint
diff --git a/models/keyword_spotting/cnn_small/tflite_int8/ckpt/cnn_0.92_ckpt.data-00000-of-00001 b/models/keyword_spotting/cnn_small/model_package_tf/model_archive/model_source/weights/cnn_0.92_ckpt.data-00000-of-00001
similarity index 100%
rename from models/keyword_spotting/cnn_small/tflite_int8/ckpt/cnn_0.92_ckpt.data-00000-of-00001
rename to models/keyword_spotting/cnn_small/model_package_tf/model_archive/model_source/weights/cnn_0.92_ckpt.data-00000-of-00001
diff --git a/models/keyword_spotting/cnn_small/tflite_int8/ckpt/cnn_0.92_ckpt.index b/models/keyword_spotting/cnn_small/model_package_tf/model_archive/model_source/weights/cnn_0.92_ckpt.index
similarity index 100%
rename from models/keyword_spotting/cnn_small/tflite_int8/ckpt/cnn_0.92_ckpt.index
rename to models/keyword_spotting/cnn_small/model_package_tf/model_archive/model_source/weights/cnn_0.92_ckpt.index
diff --git a/models/keyword_spotting/cnn_small/model_package_tf/model_core_utils/__init__.py b/models/keyword_spotting/cnn_small/model_package_tf/model_core_utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/models/keyword_spotting/cnn_small/model_package_tf/model_core_utils/models.py b/models/keyword_spotting/cnn_small/model_package_tf/model_core_utils/models.py
new file mode 100644
index 0000000..1978136
--- /dev/null
+++ b/models/keyword_spotting/cnn_small/model_package_tf/model_core_utils/models.py
@@ -0,0 +1,327 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Model definitions for simple keyword spotting."""
+
+import math
+
+import tensorflow as tf
+
+
+def prepare_model_settings(label_count, sample_rate, clip_duration_ms,
+                           window_size_ms, window_stride_ms,
+                           dct_coefficient_count):
+    """Calculates common settings needed for all models.
+
+    Args:
+        label_count: How many classes are to be recognized.
+        sample_rate: Number of audio samples per second.
+        clip_duration_ms: Length of each audio clip to be analyzed.
+        window_size_ms: Duration of frequency analysis window.
+        window_stride_ms: How far to move in time between frequency windows.
+        dct_coefficient_count: Number of frequency bins to use for analysis.
+
+    Returns:
+        Dictionary containing common settings.
+    """
+    desired_samples = int(sample_rate * clip_duration_ms / 1000)
+    window_size_samples = int(sample_rate * window_size_ms / 1000)
+    window_stride_samples = int(sample_rate * window_stride_ms / 1000)
+    length_minus_window = (desired_samples - window_size_samples)
+    if length_minus_window < 0:
+        spectrogram_length = 0
+    else:
+        spectrogram_length = 1 + int(length_minus_window / window_stride_samples)
+    fingerprint_size = dct_coefficient_count * spectrogram_length
+
+    return {
+        'desired_samples': desired_samples,
+        'window_size_samples': window_size_samples,
+        'window_stride_samples': window_stride_samples,
+        'spectrogram_length': spectrogram_length,
+        'dct_coefficient_count': dct_coefficient_count,
+        'fingerprint_size': fingerprint_size,
+        'label_count': label_count,
+        'sample_rate': sample_rate,
+    }
+
+
+def create_model(model_settings, model_architecture, model_size_info, is_training):
+    """Builds a tf.keras model of the requested architecture compatible with the settings.
+
+    Args:
+        model_settings: Dictionary of information about the model.
+        model_architecture: String specifying which kind of model to create.
+        model_size_info: Array with specific information for the chosen architecture
+            (e.g convolutional parameters, number of layers).
+
+    Returns:
+        A tf.keras Model with the requested architecture.
+
+    Raises:
+        Exception: If the architecture type isn't recognized.
+    """
+
+    if model_architecture == 'dnn':
+        return create_dnn_model(model_settings, model_size_info)
+
+    elif model_architecture == 'cnn':
+        return create_cnn_model(model_settings, model_size_info)
+
+    elif model_architecture == 'ds_cnn':
+        return create_ds_cnn_model(model_settings, model_size_info)
+    elif model_architecture == 'single_fc':
+        return create_single_fc_model(model_settings)
+    elif model_architecture == 'basic_lstm':
+        return create_basic_lstm_model(model_settings, model_size_info, is_training)
+    else:
+        raise Exception(f'model_architecture argument {model_architecture} not recognized'
+                        f', should be one of, "dnn", "cnn", "ds_cnn" ')
+
+
+def create_single_fc_model(model_settings):
+    """Builds a model with a single fully-connected layer.
+
+    For details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+
+    Returns:
+        tf.keras Model of the 'SINGLE_FC' architecture.
+    """
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size'],), name='input')
+    # Fully connected layer
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(inputs)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_basic_lstm_model(model_settings, model_size_info, is_training):
+    """Builds a model with a basic lstm layer.
+
+        For details see https://arxiv.org/abs/1711.07128.
+
+        Args:
+            model_settings: Dict of different settings for model training.
+            model_size_info: Length of the array defines the number of hidden-layers and
+                each element in the array represent the number of neurons in that layer.
+            is_training: Determining whether the use of the model is for training or for something else.
+
+        Returns:
+            tf.keras Model of the 'Basic_LSTM' architecture.
+        """
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size'], ), name='input')
+
+    input_frequency_size = model_settings['dct_coefficient_count']
+    input_time_size = model_settings['spectrogram_length']
+    x = tf.reshape(inputs, shape=(-1, input_time_size, input_frequency_size))
+
+    # LSTM layer, and unrolling depending on whether you are training or not
+    if is_training:
+        x = tf.keras.layers.LSTM(units=model_size_info[0], time_major=False, unroll=False)(x)
+    else:
+        x = tf.keras.layers.LSTM(units=model_size_info[0], time_major=False, unroll=True)(x)
+
+    # Outputs a fully connected layer
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_dnn_model(model_settings, model_size_info):
+    """Builds a model with multiple hidden fully-connected layers.
+
+    For details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+        model_size_info: Length of the array defines the number of hidden-layers and
+            each element in the array represent the number of neurons in that layer.
+
+    Returns:
+        tf.keras Model of the 'DNN' architecture.
+    """
+
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size'], ), name='input')
+
+    # First fully connected layer.
+    x = tf.keras.layers.Dense(units=model_size_info[0], activation='relu')(inputs)
+
+    # Hidden layers with ReLU activations.
+    for i in range(1, len(model_size_info)):
+        x = tf.keras.layers.Dense(units=model_size_info[i], activation='relu')(x)
+
+    # Output fully connected layer.
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_cnn_model(model_settings, model_size_info):
+    """Builds a model with 2 convolution layers followed by a linear layer and a hidden fully-connected layer.
+
+    For details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+        model_size_info: Defines the first and second convolution parameters in
+            {number of conv features, conv filter height, width, stride in y,x dir.},
+            followed by linear layer size and fully-connected layer size.
+
+    Returns:
+        tf.keras Model of the 'CNN' architecture.
+    """
+
+    input_frequency_size = model_settings['dct_coefficient_count']
+    input_time_size = model_settings['spectrogram_length']
+
+    first_filter_count = model_size_info[0]
+    first_filter_height = model_size_info[1]  # Time axis.
+    first_filter_width = model_size_info[2]  # Frequency axis.
+    first_filter_stride_y = model_size_info[3]  # Time axis.
+    first_filter_stride_x = model_size_info[4]  # Frequency_axis.
+
+    second_filter_count = model_size_info[5]
+    second_filter_height = model_size_info[6]  # Time axis.
+    second_filter_width = model_size_info[7]  # Frequency axis.
+    second_filter_stride_y = model_size_info[8]  # Time axis.
+    second_filter_stride_x = model_size_info[9]  # Frequency axis.
+
+    linear_layer_size = model_size_info[10]
+    fc_size = model_size_info[11]
+
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size']), name='input')
+
+    # Reshape the flattened input.
+    x = tf.reshape(inputs, shape=(-1, input_time_size, input_frequency_size, 1))
+
+    # First convolution.
+    x = tf.keras.layers.Conv2D(filters=first_filter_count,
+                               kernel_size=(first_filter_height, first_filter_width),
+                               strides=(first_filter_stride_y, first_filter_stride_x),
+                               padding='VALID')(x)
+    x = tf.keras.layers.BatchNormalization()(x)
+    x = tf.keras.layers.ReLU()(x)
+    x = tf.keras.layers.Dropout(rate=0)(x)
+
+    # Second convolution.
+    x = tf.keras.layers.Conv2D(filters=second_filter_count,
+                               kernel_size=(second_filter_height, second_filter_width),
+                               strides=(second_filter_stride_y, second_filter_stride_x),
+                               padding='VALID')(x)
+    x = tf.keras.layers.BatchNormalization()(x)
+    x = tf.keras.layers.ReLU()(x)
+    x = tf.keras.layers.Dropout(rate=0)(x)
+
+    # Flatten for fully connected layers.
+    x = tf.keras.layers.Flatten()(x)
+
+    # Fully connected layer with no activation.
+    x = tf.keras.layers.Dense(units=linear_layer_size)(x)
+
+    # Fully connected layer with ReLU activation.
+    x = tf.keras.layers.Dense(units=fc_size)(x)
+    x = tf.keras.layers.BatchNormalization()(x)
+    x = tf.keras.layers.ReLU()(x)
+    x = tf.keras.layers.Dropout(rate=0)(x)
+
+    # Output fully connected.
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_ds_cnn_model(model_settings, model_size_info):
+    """Builds a model with convolutional & depthwise separable convolutional layers.
+
+    For more details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+        model_size_info: Defines number of layers, followed by the DS-Conv layer
+            parameters in the order {number of conv features, conv filter height,
+            width and stride in y,x dir.} for each of the layers.
+
+    Returns:
+        tf.keras Model of the 'DS-CNN' architecture.
+    """
+
+    label_count = model_settings['label_count']
+    input_frequency_size = model_settings['dct_coefficient_count']
+    input_time_size = model_settings['spectrogram_length']
+
+    t_dim = input_time_size
+    f_dim = input_frequency_size
+
+    # Extract model dimensions from model_size_info.
+    num_layers = model_size_info[0]
+    conv_feat = [None]*num_layers
+    conv_kt = [None]*num_layers
+    conv_kf = [None]*num_layers
+    conv_st = [None]*num_layers
+    conv_sf = [None]*num_layers
+
+    i = 1
+    for layer_no in range(0, num_layers):
+        conv_feat[layer_no] = model_size_info[i]
+        i += 1
+        conv_kt[layer_no] = model_size_info[i]
+        i += 1
+        conv_kf[layer_no] = model_size_info[i]
+        i += 1
+        conv_st[layer_no] = model_size_info[i]
+        i += 1
+        conv_sf[layer_no] = model_size_info[i]
+        i += 1
+
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size']), name='input')
+
+    # Reshape the flattened input.
+    x = tf.reshape(inputs, shape=(-1, input_time_size, input_frequency_size, 1))
+
+    # Depthwise separable convolutions.
+    for layer_no in range(0, num_layers):
+        if layer_no == 0:
+            # First convolution.
+            x = tf.keras.layers.Conv2D(filters=conv_feat[0],
+                                       kernel_size=(conv_kt[0], conv_kf[0]),
+                                       strides=(conv_st[0], conv_sf[0]),
+                                       padding='SAME')(x)
+            x = tf.keras.layers.BatchNormalization()(x)
+            x = tf.keras.layers.ReLU()(x)
+        else:
+            # Depthwise convolution.
+            x = tf.keras.layers.DepthwiseConv2D(kernel_size=(conv_kt[layer_no], conv_kf[layer_no]),
+                                                strides=(conv_sf[layer_no], conv_st[layer_no]),
+                                                padding='SAME')(x)
+            x = tf.keras.layers.BatchNormalization()(x)
+            x = tf.keras.layers.ReLU()(x)
+
+            # Pointwise convolution.
+            x = tf.keras.layers.Conv2D(filters=conv_feat[layer_no], kernel_size=(1, 1))(x)
+            x = tf.keras.layers.BatchNormalization()(x)
+            x = tf.keras.layers.ReLU()(x)
+
+        t_dim = math.ceil(t_dim/float(conv_st[layer_no]))
+        f_dim = math.ceil(f_dim/float(conv_sf[layer_no]))
+
+    # Global average pool.
+    x = tf.keras.layers.AveragePooling2D(pool_size=(t_dim, f_dim), strides=1)(x)
+
+    # Squeeze before passing to output fully connected layer.
+    x = tf.reshape(x, shape=(-1, conv_feat[layer_no]))
+
+    # Output connected layer.
+    output = tf.keras.layers.Dense(units=label_count, activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
diff --git a/models/keyword_spotting/cnn_small/model_package_tf/optimisations.py b/models/keyword_spotting/cnn_small/model_package_tf/optimisations.py
new file mode 100644
index 0000000..16b6f4c
--- /dev/null
+++ b/models/keyword_spotting/cnn_small/model_package_tf/optimisations.py
@@ -0,0 +1,259 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for optimizing simple keyword spotting models using clustering API."""
+
+import argparse
+from pathlib import Path
+
+import tensorflow as tf
+import numpy as np
+import tensorflow_model_optimization as tfmot
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+
+
+def print_model_weight_clusters(model):
+
+    for layer in model.layers:
+        if isinstance(layer, tf.keras.layers.Wrapper):
+            weights = layer.trainable_weights
+        else:
+            weights = layer.weights
+        for weight in weights:
+            if "kernel" in weight.name:
+                unique_count = len(np.unique(weight))
+                print(
+                    f"{layer.name}/{weight.name}: {unique_count} clusters "
+                )
+
+
+def optimize():
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    # Create the model to optimize from checkpoint.
+    model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info)
+    model.load_weights(FLAGS.checkpoint).expect_partial()
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    # We decay learning rate in a constant piecewise way to help learning.
+    training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(',')))
+    learning_rates_list = list(map(float, FLAGS.learning_rate.split(',')))
+    lr_boundary_list = training_steps_list[:-1]  # Only need the values at which to change lr.
+    lr_schedule = tf.keras.optimizers.schedules.PiecewiseConstantDecay(boundaries=lr_boundary_list,
+                                                                       values=learning_rates_list)
+
+    cluster_weights = tfmot.clustering.keras.cluster_weights
+    CentroidInitialization = tfmot.clustering.keras.CentroidInitialization
+
+    clustering_params = {
+        'number_of_clusters': 32,
+        'cluster_centroids_init': CentroidInitialization.KMEANS_PLUS_PLUS}
+
+    clustered_model = cluster_weights(model, **clustering_params)
+
+    # Specify the optimizer configurations.
+    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
+    clustered_model.compile(optimizer=optimizer,
+                            loss=tf.keras.losses.SparseCategoricalCrossentropy(),
+                            metrics=['accuracy'])
+
+    train_data = audio_processor.get_data(audio_processor.Modes.TRAINING,
+                                          FLAGS.background_frequency, FLAGS.background_volume,
+                                          int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000))
+    train_data = train_data.repeat().batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION)
+    val_data = val_data.batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+
+    # We train for a max number of iterations so need to calculate how many 'epochs' this will be.
+    training_steps_max = np.sum(training_steps_list)
+    training_epoch_max = int(np.ceil(training_steps_max / FLAGS.eval_step_interval))
+
+    # Train the model with clustering applied.
+    clustered_model.fit(x=train_data,
+                        steps_per_epoch=FLAGS.eval_step_interval,
+                        epochs=training_epoch_max,
+                        validation_data=val_data)
+
+    stripped_clustered_model = tfmot.clustering.keras.strip_clustering(clustered_model)
+
+    print_model_weight_clusters(stripped_clustered_model)
+
+    # Save the clustered model weights
+    train_dir = Path(FLAGS.train_dir) / "optimized"
+    train_dir.mkdir(parents=True, exist_ok=True)
+
+    stripped_clustered_model.save_weights((train_dir /
+                                          (FLAGS.model_architecture +
+                                           "_clustered_ckpt")))
+
+    # Test the model.
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING)
+    test_data = test_data.batch(FLAGS.batch_size)
+
+    stripped_clustered_model.compile(optimizer=optimizer,
+                                     loss=tf.keras.losses.SparseCategoricalCrossentropy(),
+                                     metrics=['accuracy'])
+
+    test_loss, test_acc = stripped_clustered_model.evaluate(x=test_data)
+    print(f'Final test accuracy: {test_acc*100:.2f}%')
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--background_volume',
+        type=float,
+        default=0.1,
+        help="""\
+        How loud the background noise should be, between 0 and 1.
+        """)
+    parser.add_argument(
+        '--background_frequency',
+        type=float,
+        default=0.8,
+        help="""\
+        How many of the training samples have background noise mixed in.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--time_shift_ms',
+        type=float,
+        default=100.0,
+        help="""\
+        Range to randomly shift the training audio by in time.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--how_many_training_steps',
+        type=str,
+        default='3750,750',
+        help='How many training loops to run',)
+    parser.add_argument(
+        '--eval_step_interval',
+        type=int,
+        default=400,
+        help='How often to evaluate the training results.')
+    parser.add_argument(
+        '--learning_rate',
+        type=str,
+        default='0.001,0.0001',
+        help='How large a learning rate to use when training.')
+    parser.add_argument(
+        '--batch_size',
+        type=int,
+        default=100,
+        help='How many items to train with at once',)
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--train_dir',
+        type=str,
+        default='/tmp/speech_commands_train',
+        help='Directory to write event logs and checkpoint.')
+    parser.add_argument(
+        '--save_step_interval',
+        type=int,
+        default=100,
+        help='Save model checkpoint every save_steps.')
+    parser.add_argument(
+        '--checkpoint',
+        type=str,
+        help='Checkpoint to load the weights from before fine-tuning.')
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+
+    FLAGS, _ = parser.parse_known_args()
+    optimize()
diff --git a/models/keyword_spotting/cnn_small/model_package_tf/recreate_model.sh b/models/keyword_spotting/cnn_small/model_package_tf/recreate_model.sh
new file mode 100644
index 0000000..1f0289a
--- /dev/null
+++ b/models/keyword_spotting/cnn_small/model_package_tf/recreate_model.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+# Copyright (C) 2023 Arm Limited or its affiliates. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+ckpt_path=model_archive/model_source/weights/cnn_0.92_ckpt
+train=false
+
+# Parse command line args
+while (( $# >= 1 )); do 
+    case $1 in
+    --ckpt)
+       if [ "$2" ]; then
+           ckpt_path=$2
+           shift
+       else
+           printf 'ERROR: "--ckpt" requires a path to be supplied.\n'
+           exit 1
+       fi
+       ;;
+    --train) 
+    	train=true
+	break;;
+    *) shift;
+    esac;
+done
+
+
+# CNN Small training
+if [ "$train" = true ]
+then
+python train.py --model_architecture cnn --model_size_info 28 10 4 1 1 30 10 4 2 1 16 128 --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --learning_rate 0.0005,0.0001,0.00002 --how_many_training_steps 10000,10000,10000 --summaries_dir work/CNN/CNN_S/retrain_logs --train_dir work/CNN/CNN_S/training
+fi
+
+# Conversion to TFLite fp32
+python convert_to_tflite.py --model_architecture cnn --model_size_info 28 10 4 1 1 30 10 4 2 1 16 128 --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --checkpoint $ckpt_path --no-quantize
+
+# Conversion to TFLite int8
+python convert_to_tflite.py --model_architecture cnn --model_size_info 28 10 4 1 1 30 10 4 2 1 16 128 --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --checkpoint $ckpt_path --inference_type int8
+
diff --git a/models/keyword_spotting/cnn_small/model_package_tf/requirements.txt b/models/keyword_spotting/cnn_small/model_package_tf/requirements.txt
new file mode 100644
index 0000000..3448cff
--- /dev/null
+++ b/models/keyword_spotting/cnn_small/model_package_tf/requirements.txt
@@ -0,0 +1,3 @@
+numpy == 1.19.5
+tensorflow == 2.5.0
+tensorflow-model-optimization == 0.6.0
\ No newline at end of file
diff --git a/models/keyword_spotting/cnn_small/model_package_tf/train.py b/models/keyword_spotting/cnn_small/model_package_tf/train.py
new file mode 100644
index 0000000..8c488b3
--- /dev/null
+++ b/models/keyword_spotting/cnn_small/model_package_tf/train.py
@@ -0,0 +1,227 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for training simple keyword spotting models."""
+
+import argparse
+from pathlib import Path
+
+import tensorflow as tf
+import numpy as np
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+
+
+def train():
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    # Create the model.
+    model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info, True)
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    # We decay learning rate in a constant piecewise way to help learning.
+    training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(',')))
+    learning_rates_list = list(map(float, FLAGS.learning_rate.split(',')))
+    lr_boundary_list = training_steps_list[:-1]  # Only need the values at which to change lr.
+    lr_schedule = tf.keras.optimizers.schedules.PiecewiseConstantDecay(boundaries=lr_boundary_list,
+                                                                       values=learning_rates_list)
+
+    # Specify the optimizer configurations.
+    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
+    model.compile(optimizer=optimizer,
+                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
+                  metrics=['accuracy'])
+
+    train_data = audio_processor.get_data(audio_processor.Modes.TRAINING,
+                                          FLAGS.background_frequency, FLAGS.background_volume,
+                                          int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000))
+    train_data = train_data.repeat().batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION)
+    val_data = val_data.batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+
+    # We train for a max number of iterations so need to calculate how many 'epochs' this will be.
+    training_steps_max = np.sum(training_steps_list)
+    training_epoch_max = int(np.ceil(training_steps_max / FLAGS.eval_step_interval))
+
+    # Callbacks.
+    train_dir = Path(FLAGS.train_dir) / "best"
+    train_dir.mkdir(parents=True, exist_ok=True)
+    model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
+        filepath=(train_dir / (FLAGS.model_architecture + "_{val_accuracy:.3f}_ckpt")),
+        save_weights_only=True,
+        monitor='val_accuracy',
+        mode='max',
+        save_best_only=True)
+    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=FLAGS.summaries_dir)
+
+    # Train the model.
+    model.fit(x=train_data,
+              steps_per_epoch=FLAGS.eval_step_interval,
+              epochs=training_epoch_max,
+              validation_data=val_data,
+              callbacks=[model_checkpoint_callback, tensorboard_callback])
+
+    # Test and save the model.
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING)
+    test_data = test_data.batch(FLAGS.batch_size)
+
+    test_loss, test_acc = model.evaluate(x=test_data)
+    print(f'Final test accuracy: {test_acc*100:.2f}%')
+    model.save(f'saved_model/{FLAGS.model_architecture}')
+    model.save(f'keras/{FLAGS.model_architecture}.h5')
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--background_volume',
+        type=float,
+        default=0.1,
+        help="""\
+        How loud the background noise should be, between 0 and 1.
+        """)
+    parser.add_argument(
+        '--background_frequency',
+        type=float,
+        default=0.8,
+        help="""\
+        How many of the training samples have background noise mixed in.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--time_shift_ms',
+        type=float,
+        default=100.0,
+        help="""\
+        Range to randomly shift the training audio by in time.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--how_many_training_steps',
+        type=str,
+        default='15000,3000',
+        help='How many training loops to run',)
+    parser.add_argument(
+        '--eval_step_interval',
+        type=int,
+        default=400,
+        help='How often to evaluate the training results.')
+    parser.add_argument(
+        '--learning_rate',
+        type=str,
+        default='0.001,0.0001',
+        help='How large a learning rate to use when training.')
+    parser.add_argument(
+        '--batch_size',
+        type=int,
+        default=100,
+        help='How many items to train with at once',)
+    parser.add_argument(
+        '--summaries_dir',
+        type=str,
+        default='/tmp/retrain_logs',
+        help='Where to save summary logs for TensorBoard.')
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--train_dir',
+        type=str,
+        default='/tmp/speech_commands_train',
+        help='Directory to write event logs and checkpoint.')
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+
+    FLAGS, _ = parser.parse_known_args()
+    train()
diff --git a/models/keyword_spotting/cnn_small/model_package_tf/validation_utils/labels.txt b/models/keyword_spotting/cnn_small/model_package_tf/validation_utils/labels.txt
new file mode 100644
index 0000000..ba41645
--- /dev/null
+++ b/models/keyword_spotting/cnn_small/model_package_tf/validation_utils/labels.txt
@@ -0,0 +1,12 @@
+_silence_
+_unknown_
+yes
+no
+up
+down
+left
+right
+on
+off
+stop
+go
\ No newline at end of file
diff --git a/models/keyword_spotting/cnn_small/tflite_int8/README.md b/models/keyword_spotting/cnn_small/tflite_int8/README.md
deleted file mode 100644
index 54e42bd..0000000
--- a/models/keyword_spotting/cnn_small/tflite_int8/README.md
+++ /dev/null
@@ -1,57 +0,0 @@
-# CNN Small INT8
-
-## Description
-This is a fully quantized version (asymmetrical int8) of the CNN Small model developed by Arm, with training checkpoints, from the Hello Edge paper. Code to recreate this model can be found here: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m
-
-## License
-[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
-
-## Related Materials
-### Class Labels
-The class labels associated with this model can be downloaded by running the script `get_class_labels.sh`.
-
-### Model Recreation Code
-Code to recreate this model can be found here: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m.
-
-## Network Information
-| Network Information |  Value         |
-|---------------------|------------------|
-|  Framework          | TensorFlow Lite |
-|  SHA-1 Hash         | 3415f88dfb8f78fe47d282d68ccbc3ce71a7510f |
-|  Size (Bytes)       | 75400 |
-|  Provenance         | https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m |
-|  Paper              | https://arxiv.org/abs/1711.07128 |
-
-## Accuracy
-Dataset: Google Speech Commands Test Set
-
-| Metric | Value |
-|--------|-------|
-| Accuracy | 0.912 |
-
-## Performance
-| Platform | Optimized |
-|----------|:---------:|
-| Cortex-A |:heavy_check_mark:         |
-| Cortex-M |:heavy_check_mark:         |
-| Mali GPU |:heavy_check_mark:         |
-| Ethos U  |:heavy_check_mark:         |
-
-### Key
-* :heavy_check_mark: - Will run on this platform.
-* :heavy_multiplication_x: - Will not run on this platform.
-
-## Optimizations
-| Optimization |  Value  |
-|-----------------|---------|
-| Quantization | INT8 |
-
-## Network Inputs
-| Input Node Name |  Shape  | Description |
-|-----------------|---------|-------------|
-| input | (1, 490) | The input is a processed MFCCs of shape (1, 490) |
-
-## Network Outputs
-| Output Node Name |  Shape  | Description |
-|------------------|---------|-------------|
-| Identity | (1, 12) | The probability on 12 keywords. |
diff --git a/models/keyword_spotting/cnn_small/tflite_int8/definition.yaml b/models/keyword_spotting/cnn_small/tflite_int8/definition.yaml
deleted file mode 100644
index e5cd3c4..0000000
--- a/models/keyword_spotting/cnn_small/tflite_int8/definition.yaml
+++ /dev/null
@@ -1,43 +0,0 @@
-benchmark:
-  Google Speech Commands test set:
-    Accuracy: 91.23%
-description: 'This is a fully quantized version (asymmetrical int8) of the CNN Small
-  model developed by Arm, with training checkpoints, from the Hello Edge paper. Code
-  to recreate this model can be found here: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m'
-license:
-- Apache-2.0
-network:
-  file_size_bytes: 75400
-  filename: cnn_s_quantized.tflite
-  framework: TensorFlow Lite
-  hash:
-    algorithm: sha1
-    value: 3415f88dfb8f78fe47d282d68ccbc3ce71a7510f
-  provenance: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m
-  quality_level: null
-network_parameters:
-  input_nodes:
-  - description: The input is a processed MFCCs of shape (1, 490)
-    example_input:
-      path: models/keyword_spotting/cnn_small/tflite_int8/testing_input/input
-    name: input
-    shape:
-    - 1
-    - 490
-  output_nodes:
-  - description: The probability on 12 keywords.
-    name: Identity
-    shape:
-    - 1
-    - 12
-    test_output_path: models/keyword_spotting/cnn_small/tflite_int8/testing_output/Identity
-operators:
-  TensorFlow Lite:
-  - CONV_2D
-  - DEQUANTIZE
-  - FULLY_CONNECTED
-  - QUANTIZE
-  - RELU
-  - RESHAPE
-  - SOFTMAX
-paper: https://arxiv.org/abs/1711.07128
diff --git a/models/keyword_spotting/cnn_small/tflite_int8/get_class_labels.sh b/models/keyword_spotting/cnn_small/tflite_int8/get_class_labels.sh
deleted file mode 100755
index e59caf5..0000000
--- a/models/keyword_spotting/cnn_small/tflite_int8/get_class_labels.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (C) 2021 Arm Limited or its affiliates. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the License); you may
-# not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an AS IS BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#!/usr/bin/env bash
-
-wget https://raw.githubusercontent.com/ARM-software/ML-KWS-for-MCU/e9cf319e9aa2ff71d433e111477dd95329fb94cb/Pretrained_models/labels.txt
-mv labels.txt labelmappings.txt
\ No newline at end of file
diff --git a/models/keyword_spotting/dnn_large/model_package_tf/README.md b/models/keyword_spotting/dnn_large/model_package_tf/README.md
new file mode 100644
index 0000000..75d5348
--- /dev/null
+++ b/models/keyword_spotting/dnn_large/model_package_tf/README.md
@@ -0,0 +1,115 @@
+# DNN Large model package
+
+This folder contains code that will allow you to recreate the DNN Large keyword spotting model from
+the [Hello Edge paper](https://arxiv.org/pdf/1711.07128.pdf).
+
+## License
+[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
+
+## Model Package Overview
+| Model           	|   DNN_Large                            	   |
+|:---------------:	|:------------------------------------------:|
+| <u>**Format**</u>:          	| Keras, Saved Model, TensorFlow Lite int8, TensorFlow Lite fp32 |
+| <u>**Feature**</u>:         	|   Keyword spotting for Arm Cortex-M CPUs   |
+| <u>**Architectural Delta w.r.t. Vanilla**</u>: |                    None                    |
+| <u>**Domain**</u>:         	|              Keyword spotting              |
+| <u>**Package Quality**</u>: 	|                 Optimised                  |
+
+## Model Recreation
+
+In order to recreate the model you will first need to be using ```Python3.7``` and install the requirements in ```requirements.txt```.
+
+Once you have these requirements satisfied you can execute the recreation script contained within this folder, just run:
+
+```bash
+bash ./recreate_model.sh
+```
+
+Running this script will use the pre-trained checkpoint files supplied in the ```./model_archive/model_source/weights``` folder
+to generate the TFLite files and perform evaluation on the test sets. Both an fp32 version and a quantized version will be produced.
+The quantized version will use post-training quantization to fully quantize it.
+
+If you want to run training from scratch you can do this by supplying ```--train``` when running the script. For example:
+
+```bash
+bash ./recreate_model.sh --train
+```
+
+Training is then performed and should produce a model to the stated accuracy in this repository.
+Note that exporting to TFLite will still happen with the pre-trained checkpoint files so you will need to re-run the script
+and this time supply the path to the new checkpoint files you want to use, for example:
+
+```bash
+bash ./recreate_model.sh --ckpt <checkpoint_path>
+```
+
+
+## Training
+
+To train a DNN with 3 fully-connected layers with 128 neurons in each layer, run:
+
+```
+python train.py --model_architecture dnn --model_size_info 128 128 128
+```
+The command line argument *--model_size_info* is used to pass the neural network layer
+dimensions such as number of layers, convolution filter size/stride as a list to models.py,
+which builds the TensorFlow graph based on the provided model architecture
+and layer dimensions. For more info on *model_size_info* for each network architecture see
+[models.py](models.py).
+
+The training commands with all the hyperparameters to reproduce the models shown in the
+[paper](https://arxiv.org/pdf/1711.07128.pdf) are given [here](recreate_model.sh).
+
+## Testing
+To run inference on the trained model from a checkpoint and get accuracy on validation and test sets, run:
+```
+python evaluation.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>
+```
+The parameters used here should match those used in the Training step.
+
+## Optimization
+
+We introduce a new *optional* step to optimize the trained keyword spotting model for deployment.
+
+Here we use TensorFlow's [weight clustering API](https://www.tensorflow.org/model_optimization/guide/clustering) to reduce the compressed model size and optimize inference on supported hardware. 32 weight clusters and kmeans++ cluster intialization method are used as the clustering hyperparameters.
+
+To optimize your trained model (e.g. a DNN), a trained model checkpoint is needed to run clustering and fine-tuning on.
+You can use the pre-trained checkpoints provided, or train your own model and use the resulting checkpoint.
+
+To apply the optimization and fine-tuning, run the following command:
+```
+python optimisations.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>
+```
+The parameters used here should match those used in the Training step, except for the number of training steps.
+The number of training steps is reduced since the optimization step only requires fine-tuning.
+
+This will generate a clustered model checkpoint that can be used in the quantization step to generate a quantized and clustered TFLite model.
+
+## Quantization and TFLite Conversion
+
+As part of the update we now use TensorFlow's
+[post training quantization](https://www.tensorflow.org/lite/performance/post_training_quantization) to
+make quantization of the trained models super simple.
+
+To quantize your trained model (e.g. a DNN) run:
+```
+python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> [--inference_type int8|int16]
+```
+The parameters used here should match those used in the Training step.
+
+The inference_type parameter is *optional* and to be used if a fully quantized model with inputs and outputs of type int8 or int16 is needed. It defaults to fp32.
+
+This step will produce a quantized TFLite file *dnn_quantized.tflite*.
+You can test the accuracy of this quantized model on the test set by running:
+```
+python evaluation.py --tflite_path dnn_quantized.tflite
+```
+The parameters used here should match those used in the Training step.
+
+`convert_to_tflite.py` uses post-training quantization to generate a quantized model by default. If you wish to convert to a floating point TFLite model, use the command below:
+
+```
+python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> --no-quantize
+```
+
+This will produce a floating point TFLite file *dnn.tflite*. You can test the accuracy of this floating point model using `evaluation.py` as above.
diff --git a/models/keyword_spotting/dnn_large/model_package_tf/convert_to_tflite.py b/models/keyword_spotting/dnn_large/model_package_tf/convert_to_tflite.py
new file mode 100644
index 0000000..64ab8df
--- /dev/null
+++ b/models/keyword_spotting/dnn_large/model_package_tf/convert_to_tflite.py
@@ -0,0 +1,234 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for converting and quantizing a trained keyword spotting
+   model and saving to TFLite."""
+
+import argparse
+
+import tensorflow as tf
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+from evaluation import tflite_test
+
+NUM_REP_DATA_SAMPLES = 100  # How many samples to use for post training quantization.
+
+
+def convert(model_settings, audio_processor, checkpoint, quantize, inference_type, tflite_path):
+    """Load our trained floating point model and convert it.
+
+    TFLite conversion or post training quantization is performed and the
+    resulting model is saved as a TFLite file.
+    We use samples from the validation set to do post training quantization.
+
+    Args:
+        model_settings: Dictionary of common model settings.
+        audio_processor: Audio processor class object.
+        checkpoint: Path to training checkpoint to load.
+        quantize: Whether to quantize the model or convert to fp32 TFLite model.
+        inference_type: Input/output type of the quantized model.
+        tflite_path: Output TFLite file save path.
+    """
+    model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info, False)
+    model.load_weights(checkpoint).expect_partial()
+
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION).batch(1)
+
+    def _rep_dataset():
+        """Generator function to produce representative dataset."""
+        i = 0
+        for mfcc, label in val_data:
+            if i > NUM_REP_DATA_SAMPLES:
+                break
+            i += 1
+            yield [mfcc]
+
+    if quantize:
+        # Quantize model and save to disk.
+        tflite_model = post_training_quantize(model, inference_type, _rep_dataset)
+        with open(tflite_path, 'wb') as f:
+            f.write(tflite_model)
+        print(f'Quantized model saved to {tflite_path}.')
+    else:
+        converter = tf.lite.TFLiteConverter.from_keras_model(model)
+        tflite_model = converter.convert()
+        with open(tflite_path, 'wb') as f:
+            f.write(tflite_model)
+        print(f'Converted model saved to {tflite_path}.')
+
+
+def post_training_quantize(keras_model, inference_type, rep_dataset):
+    """Perform post training quantization and returns the TFLite model ready for saving.
+
+    See https://www.tensorflow.org/lite/performance/post_training_quantization#full_integer_quantization for
+    more details.
+
+    Args:
+        keras_model: The trained tf Keras model used for post training quantization.
+        inference_type: Input/output type of the quantized model.
+        rep_dataset: Function to use as a representative dataset, must be callable.
+
+    Returns:
+        Quantized TFLite model ready for saving to disk.
+    """
+    converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
+    converter.optimizations = [tf.lite.Optimize.DEFAULT]
+
+    if inference_type == 'int8':
+        converter.inference_input_type = tf.int8
+        converter.inference_output_type = tf.int8
+        supported_ops = tf.lite.OpsSet.TFLITE_BUILTINS_INT8
+    if inference_type == 'int16':
+        converter.inference_input_type = tf.int16
+        converter.inference_output_type = tf.int16
+        supported_ops = tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8
+
+    # Int8 post training quantization needs representative dataset.
+    converter.representative_dataset = rep_dataset
+    converter.target_spec.supported_ops = [supported_ops]
+
+    tflite_model = converter.convert()
+
+    return tflite_model
+
+
+def main():
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    if FLAGS.quantize:
+        tflite_path = f'{FLAGS.model_architecture}_quantized.tflite'
+    else:
+        tflite_path = f'{FLAGS.model_architecture}.tflite'
+
+    # Load floating point model from checkpoint and convert it.
+    convert(model_settings, audio_processor, FLAGS.checkpoint,
+            FLAGS.quantize, FLAGS.inference_type, tflite_path)
+
+    # Test the newly converted model on the test set.
+    tflite_test(model_settings, audio_processor, tflite_path)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+    parser.add_argument(
+        '--checkpoint',
+        type=str,
+        help='Checkpoint to load the weights from.')
+    parser.add_argument(
+        '--quantize',
+        dest='quantize',
+        action="store_true",
+        default=True,
+        help='Whether to quantize the model or convert to fp32 TFLite model. Defaults to True.')
+    parser.add_argument(
+        '--no-quantize',
+        dest='quantize',
+        action="store_false",
+        help='Whether to quantize the model or convert to fp32 TFLite model. Defaults to True.')
+    parser.add_argument(
+        '--inference_type',
+        type=str,
+        default='fp32',
+        help='If quantize is true, whether the model input and output is float32, int8 or int16')
+
+    FLAGS, _ = parser.parse_known_args()
+    main()
diff --git a/models/keyword_spotting/dnn_large/model_package_tf/data_processing/__init__.py b/models/keyword_spotting/dnn_large/model_package_tf/data_processing/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/models/keyword_spotting/dnn_large/model_package_tf/data_processing/data_preprocessing.py b/models/keyword_spotting/dnn_large/model_package_tf/data_processing/data_preprocessing.py
new file mode 100644
index 0000000..05cf5ba
--- /dev/null
+++ b/models/keyword_spotting/dnn_large/model_package_tf/data_processing/data_preprocessing.py
@@ -0,0 +1,462 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Modifications Copyright 2023 Arm Inc. All Rights Reserved.
+# Modified to use TensorFlow 2.0 and data pipelines.
+#
+"""Functions for loading and preparing data for keyword spotting."""
+
+import os
+import re
+import sys
+import urllib
+from pathlib import Path
+import tarfile
+import hashlib
+import random
+import math
+from enum import Enum
+
+import numpy as np
+import tensorflow as tf
+from tensorflow.python.ops import gen_audio_ops as audio_ops
+
+MAX_NUM_WAVS_PER_CLASS = 2**27 - 1  # ~134M
+RANDOM_SEED = 59185
+BACKGROUND_NOISE_DIR_NAME = '_background_noise_'
+SILENCE_LABEL = '_silence_'
+SILENCE_INDEX = 0
+UNKNOWN_WORD_INDEX = 1
+UNKNOWN_WORD_LABEL = '_unknown_'
+
+
+def load_wav_file(wav_filename, desired_samples):
+    """Loads and then decodes a given 16bit PCM wav file.
+
+    Decoded audio is scaled to the range [-1, 1] and padded or cropped to the desired number of samples.
+
+    Args:
+        wav_filename: 16bit PCM wav file to load.
+        desired_samples: Number of samples wanted from the audio file.
+
+    Returns:
+        Tuple consisting of the decoded audio and sample rate.
+    """
+    wav_file = tf.io.read_file(wav_filename)
+    decoded_wav = audio_ops.decode_wav(wav_file, desired_channels=1, desired_samples=desired_samples)
+
+    return decoded_wav.audio, decoded_wav.sample_rate
+
+
+def calculate_mfcc(audio_signal, audio_sample_rate, window_size, window_stride, num_mfcc):
+    """Returns Mel Frequency Cepstral Coefficients (MFCC) for a given audio signal.
+
+    Args:
+        audio_signal: Raw audio signal in range [-1, 1]
+        audio_sample_rate: Audio signal sample rate
+        window_size: Window size in samples for calculating spectrogram
+        window_stride: Window stride in samples for calculating spectrogram
+        num_mfcc: The number of MFCC features wanted.
+
+    Returns:
+        Calculated mffc features.
+    """
+    spectrogram = audio_ops.audio_spectrogram(input=audio_signal, window_size=window_size, stride=window_stride,
+                                              magnitude_squared=True)
+
+    mfcc_features = audio_ops.mfcc(spectrogram, audio_sample_rate, dct_coefficient_count=num_mfcc)
+
+    return mfcc_features
+
+
+def which_set(filename, validation_percentage, testing_percentage):
+    """Determines which data partition the file should belong to.
+
+    We want to keep files in the same training, validation, or testing sets even
+    if new ones are added over time. This makes it less likely that testing
+    samples will accidentally be reused in training when long runs are restarted
+    for example. To keep this stability, a hash of the filename is taken and used
+    to determine which set it should belong to. This determination only depends on
+    the name and the set proportions, so it won't change as other files are added.
+    It's also useful to associate particular files as related (for example words
+    spoken by the same person), so anything after '_nohash_' in a filename is
+    ignored for set determination. This ensures that 'bobby_nohash_0.wav' and
+    'bobby_nohash_1.wav' are always in the same set, for example.
+
+    Args:
+        filename: File path of the data sample.
+        validation_percentage: How much of the data set to use for validation.
+        testing_percentage: How much of the data set to use for testing.
+
+    Returns:
+        String, one of 'training', 'validation', or 'testing'.
+    """
+    base_name = os.path.basename(filename)
+    # We want to ignore anything after '_nohash_' in the file name when
+    # deciding which set to put a wav in, so the data set creator has a way of
+    # grouping wavs that are close variations of each other.
+    hash_name = re.sub(r'_nohash_.*$', '', base_name)
+    # This looks a bit magical, but we need to decide whether this file should
+    # go into the training, testing, or validation sets, and we want to keep
+    # existing files in the same set even if more files are subsequently
+    # added.
+    # To do that, we need a stable way of deciding based on just the file name
+    # itself, so we do a hash of that and then use that to generate a
+    # probability value that we use to assign it.
+    hash_name_hashed = hashlib.sha1(tf.compat.as_bytes(hash_name)).hexdigest()
+    percentage_hash = ((int(hash_name_hashed, 16) %
+                       (MAX_NUM_WAVS_PER_CLASS + 1)) *
+                       (100.0 / MAX_NUM_WAVS_PER_CLASS))
+    if percentage_hash < validation_percentage:
+        result = 'validation'
+    elif percentage_hash < (testing_percentage + validation_percentage):
+        result = 'testing'
+    else:
+        result = 'training'
+    return result
+
+
+def prepare_words_list(wanted_words):
+    """Prepends common tokens to the custom word list.
+
+    Args:
+        wanted_words: List of strings containing custom words to spot.
+
+    Returns:
+        List of words with silence and unknown tokens added.
+    """
+    return [SILENCE_LABEL, UNKNOWN_WORD_LABEL] + wanted_words
+
+
+class AudioProcessor:
+    """Handles loading, partitioning, and preparing audio training data."""
+
+    class Modes(Enum):
+        TRAINING = 1
+        VALIDATION = 2
+        TESTING = 3
+
+    def __init__(self, data_url, data_dir, silence_percentage, unknown_percentage,
+                 wanted_words, validation_percentage, testing_percentage, model_settings):
+        self.data_dir = Path(data_dir)
+        self.model_settings = model_settings
+        self.words_list = prepare_words_list(wanted_words)
+
+        self._tf_datasets = {}
+        self.background_data = None
+        self._set_size = {'training': 0, 'validation': 0, 'testing': 0}
+
+        self._download_and_extract_data(data_url, data_dir)
+        self._prepare_datasets(silence_percentage, unknown_percentage, wanted_words,
+                               validation_percentage, testing_percentage)
+        self._prepare_background_data()
+
+    def get_data(self, mode, background_frequency=0, background_volume_range=0, time_shift=0):
+        """Returns the train, validation or test set for KWS as a TF Dataset.
+
+        Args:
+            mode: The set to return, see AudioProcessor.Modes enumeration.
+            background_frequency: How many of the samples have background noise mixed in.
+            background_volume_range: How loud the background noise should be, between 0 and 1.
+            time_shift: Range to randomly shift the training audio by in time.
+
+        Returns:
+            TF dataset that will generate tuples containing an mfcc and corresponding label.
+
+        Raises:
+            ValueError: If mode is not recognised.
+        """
+        if mode == AudioProcessor.Modes.TRAINING:
+            dataset = self._tf_datasets['training']
+        elif mode == AudioProcessor.Modes.VALIDATION:
+            dataset = self._tf_datasets['validation']
+        elif mode == AudioProcessor.Modes.TESTING:
+            dataset = self._tf_datasets['testing']
+        else:
+            ValueError("Incorrect dataset type given")
+
+        use_background = (self.background_data is not None) and (mode == AudioProcessor.Modes.TRAINING)
+        dataset = dataset.map(lambda path, label: self._process_path(path, label, self.model_settings,
+                                                                     background_frequency, background_volume_range,
+                                                                     time_shift, use_background, self.background_data),
+                              num_parallel_calls=tf.data.experimental.AUTOTUNE)
+
+        return dataset
+
+    def set_size(self, mode):
+        """Get the number of samples in the requested dataset partition.
+
+        Args:
+            mode: Which partition, see AudioProcessor.Modes enumeration.
+
+        Returns:
+            Number of samples in the partition.
+
+        Raises:
+            ValueError: If mode is not recognised.
+        """
+        if mode == AudioProcessor.Modes.TRAINING:
+            return self._set_size['training']
+        elif mode == AudioProcessor.Modes.VALIDATION:
+            return self._set_size['validation']
+        elif mode == AudioProcessor.Modes.TESTING:
+            return self._set_size['testing']
+        else:
+            ValueError('Incorrect dataset type given')
+
+    @staticmethod
+    def _process_path(path, label, model_settings, background_frequency, background_volume_range, time_shift_samples,
+                      use_background, background_data):
+        """Load wav files and calculate mfcc features.
+
+        Random shifting of samples and adding in background noise is done within this function as well.
+        This function is meant to be mapped onto a TF Dataset by using a lambda function.
+
+        Args:
+            path: Path to the wav file to load.
+            label: Integer label for classifying the audio clip.
+            model_settings: Dictionary of settings for model being trained.
+            background_frequency: How many clips will have background noise, 0.0 to 1.0.
+            background_volume_range: How loud the background noise will be.
+            time_shift_samples: How much to randomly shift the clips by.
+            use_background: Add in background noise to audio clips or not.
+            background_data: Ragged tensor of loaded background noise samples.
+
+        Returns:
+            Tuple of calculated flattened mfcc and its class label.
+        """
+
+        desired_samples = model_settings['desired_samples']
+        audio, sample_rate = load_wav_file(path, desired_samples=desired_samples)
+
+        # Make our own silence audio data.
+        if label == SILENCE_INDEX:
+            audio = tf.multiply(audio, 0)
+
+        # Shift samples start position and pad any gaps with zeros.
+        if time_shift_samples > 0:
+            time_shift_amount = tf.random.uniform(shape=(), minval=-time_shift_samples, maxval=time_shift_samples,
+                                                  dtype=tf.int32)
+        else:
+            time_shift_amount = 0
+        if time_shift_amount > 0:
+            time_shift_padding = [[time_shift_amount, 0], [0, 0]]
+            time_shift_offset = [0, 0]
+        else:
+            time_shift_padding = [[0, -time_shift_amount], [0, 0]]
+            time_shift_offset = [-time_shift_amount, 0]
+
+        padded_foreground = tf.pad(audio, time_shift_padding, mode='CONSTANT')
+        sliced_foreground = tf.slice(padded_foreground, time_shift_offset, [desired_samples, -1])
+
+        # Get a random section of background noise.
+        if use_background:
+            background_index = tf.random.uniform(shape=(), maxval=background_data.shape[0], dtype=tf.int32)
+            background_sample = background_data[background_index]
+            background_offset = tf.random.uniform(shape=(), maxval=len(background_sample)-desired_samples,
+                                                  dtype=tf.int32)
+            background_clipped = background_sample[background_offset:(background_offset + desired_samples)]
+            background_reshaped = tf.reshape(background_clipped, [desired_samples, 1])
+            if tf.random.uniform(shape=(), maxval=1) < background_frequency:
+                background_volume = tf.random.uniform(shape=(), maxval=background_volume_range)
+            else:
+                background_volume = tf.constant(0, dtype='float32')
+        else:
+            background_reshaped = np.zeros([desired_samples, 1], dtype=np.float32)
+            background_volume = tf.constant(0, dtype='float32')
+
+        # Mix in background noise.
+        background_mul = tf.multiply(background_reshaped, background_volume)
+        background_add = tf.add(background_mul, sliced_foreground)
+        background_clamp = tf.clip_by_value(background_add, -1.0, 1.0)
+
+        mfcc = calculate_mfcc(background_clamp, sample_rate, model_settings['window_size_samples'],
+                              model_settings['window_stride_samples'],
+                              model_settings['dct_coefficient_count'])
+        mfcc = tf.reshape(mfcc, [-1])
+
+        return mfcc, label
+
+    def _download_and_extract_data(self, data_url, target_directory):
+        """Downloads and extracts file to target directory.
+
+        If the file does not already exist download it and then untar into the target directory.
+
+        Args:
+            data_url: Web link to the tarred data to download.
+            target_directory: Directory to download and extract to.
+        """
+        target_directory = Path(target_directory)
+        target_directory.mkdir(exist_ok=True)
+
+        filename = data_url.split('/')[-1]
+        filepath = target_directory / filename
+
+        if not filepath.exists():
+            def _report_hook(block_num, block_size, total_size):
+                """Function to track download progress in urllib"""
+                read_so_far = block_num * block_size
+                percent = (read_so_far / total_size) * 100.0
+
+                s = f"\rDownloading {filename} {percent:.1f}%"
+
+                sys.stdout.write(s)
+                sys.stdout.flush()
+
+            filepath, _ = urllib.request.urlretrieve(data_url, filepath, _report_hook)
+            print()
+
+        print(f'Untarring {filename}...')
+        tarfile.open(filepath, 'r:gz').extractall(target_directory)
+
+    def _prepare_datasets(self, silence_percentage, unknown_percentage, wanted_words,
+                          validation_percentage, testing_percentage):
+        """Split the data into train, validation and testing sets.
+
+        Silence and unknown data is added, then sets are converted to TF Datasets.
+
+        Args:
+            silence_percentage: Percent of words should be silence.
+            unknown_percentage: Percent of words that should be unknown.
+            wanted_words: List of words wanted to classify.
+            validation_percentage: Percent to split off for validation.
+            testing_percentage: Percent to split off for testing.
+        """
+        # Make sure the shuffling and picking of unknowns is deterministic.
+        random.seed(RANDOM_SEED)
+        wanted_words_index = {}
+
+        for index, wanted_word in enumerate(wanted_words):
+            wanted_words_index[wanted_word] = index + 2
+
+        # Find all wav files in subfolders.
+        search_path = self.data_dir / '*' / '*.wav'
+        data_index, unknown_index, all_words = self._find_and_sort_wavs(search_path, validation_percentage,
+                                                                        testing_percentage, wanted_words_index)
+
+        for index, wanted_word in enumerate(wanted_words):
+            if wanted_word not in all_words:
+                raise Exception(f'Tried to find {wanted_word} in labels but only found: {", ".join(all_words.keys())}')
+
+        word_to_index = {}
+        for word in all_words:
+            if word in wanted_words_index:
+                word_to_index[word] = wanted_words_index[word]
+            else:
+                word_to_index[word] = UNKNOWN_WORD_INDEX
+        word_to_index[SILENCE_LABEL] = SILENCE_INDEX
+
+        # We need an arbitrary file to load as the input for the silence samples.
+        # It's multiplied by zero later, so the content doesn't matter.
+        silence_wav_path = data_index['training'][0]['file']
+        for set_index in ['validation', 'testing', 'training']:
+            set_size = len(data_index[set_index])  # Size before adding silence and unknown samples.
+            silence_size = int(math.ceil(set_size * silence_percentage / 100))
+            for _ in range(silence_size):
+                data_index[set_index].append({
+                    'label': SILENCE_LABEL,
+                    'file': silence_wav_path
+                })
+            # Pick some unknowns to add to each partition of the data set.
+            random.shuffle(unknown_index[set_index])
+            unknown_size = int(math.ceil(set_size * unknown_percentage / 100))
+            data_index[set_index].extend(unknown_index[set_index][:unknown_size])
+
+            self._set_size[set_index] = len(data_index[set_index])  # Size after adding silence and unknown samples.
+
+            # Make sure the ordering is random.
+            random.shuffle(data_index[set_index])
+
+            # Transform into TF Datasets ready for easier processing later.
+            labels, paths = list(zip(*[d.values() for d in data_index[set_index]]))
+            labels = [word_to_index[label] for label in labels]
+            self._tf_datasets[set_index] = tf.data.Dataset.from_tensor_slices((list(paths), labels))
+
+    def _find_and_sort_wavs(self, search_pattern, validation_percentage, testing_percentage, wanted_words_index):
+        """Find and sort wav files into known and unknown word sets.
+
+        Known words are files containing words in the list of wanted words.
+        Any other clip goes to the unknown label set. Labels come from the folder names.
+        All clips are also assigned to train, test and validation sets.
+
+        Args:
+            search_pattern: Path pattern used by glob to find wav files.
+            validation_percentage: Percent to split off for validation.
+            testing_percentage: Percent to split off for testing.
+            wanted_words_index: Dict mapping wanted words to their label index.
+
+        Returns:
+            3-tuple of known words, unknown words and mapping of all word labels.
+        """
+        data_index = {'validation': [], 'testing': [], 'training': []}
+        unknown_index = {'validation': [], 'testing': [], 'training': []}
+        all_words = {}
+
+        for wav_path in sorted(tf.io.gfile.glob(str(search_pattern))):
+            word = Path(wav_path).parent.name.lower()
+
+            # Treat the '_background_noise_' folder as a special case, since we expect
+            # it to contain long audio samples we mix in to improve training.
+            if word == BACKGROUND_NOISE_DIR_NAME:
+                continue
+
+            all_words[word] = True
+            set_index = which_set(wav_path, validation_percentage, testing_percentage)
+            # If it's a known class, store its detail, otherwise add it to the list
+            # we'll use to train the unknown label.
+            if word in wanted_words_index:
+                data_index[set_index].append({'label': word, 'file': wav_path})
+            else:
+                unknown_index[set_index].append({'label': word, 'file': wav_path})
+        if not all_words:
+            raise Exception('No .wavs found at ' + str(search_pattern))
+
+        return data_index, unknown_index, all_words
+
+    def _prepare_background_data(self):
+        """Searches a folder for background noise audio, and loads it into memory.
+
+        It's expected that the background audio samples will be in a subdirectory
+        named '_background_noise_' inside the 'data_dir' folder, as .wavs that match
+        the sample rate of the training data, but can be much longer in duration.
+
+        If the '_background_noise_' folder doesn't exist at all, this isn't an
+        error, it's just taken to mean that no background noise augmentation should
+        be used. If the folder does exist, but it's empty, that's treated as an
+        error.
+
+        Returns:
+          Ragged tensor of raw PCM-encoded audio samples of background noise.
+          None if '_background_noise_' folder doesnt exist.
+
+        Raises:
+          Exception: If files aren't found in the folder.
+        """
+        background_data = []
+        background_dir = Path(self.data_dir / BACKGROUND_NOISE_DIR_NAME)
+        if not background_dir.exists():
+            self.background_data = None
+            return
+
+        search_path = Path(background_dir / '*.wav')
+        for wav_path in tf.io.gfile.glob(str(search_path)):
+            wav_data, _ = load_wav_file(wav_path, desired_samples=-1)
+            background_data.append(tf.reshape(wav_data, [-1]))
+
+        if not background_data:
+            raise Exception('No background wav files were found in ' + str(search_path))
+
+        # Ragged tensor as we cant use lists in tf dataset map functions.
+        self.background_data = tf.ragged.stack(background_data)
diff --git a/models/keyword_spotting/dnn_large/model_package_tf/dnn_l_inference_keras.py b/models/keyword_spotting/dnn_large/model_package_tf/dnn_l_inference_keras.py
new file mode 100644
index 0000000..db7694a
--- /dev/null
+++ b/models/keyword_spotting/dnn_large/model_package_tf/dnn_l_inference_keras.py
@@ -0,0 +1,76 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from data_processing.data_preprocessing import load_wav_file, calculate_mfcc
+
+import tensorflow as tf
+import argparse
+
+
+def load_labels(filename):
+    """Read in labels, one label per line."""
+    f = open(filename, "r")
+    return f.read().splitlines()
+
+
+def main():
+    window_size_samples = int(FLAGS.sample_rate * FLAGS.window_size_ms / 1000)
+    window_stride_samples = int(FLAGS.sample_rate * FLAGS.window_stride_ms / 1000)
+    decoded, sample = load_wav_file(FLAGS.wav, FLAGS.sample_rate)
+    x = calculate_mfcc(decoded, sample, window_size_samples, window_stride_samples, FLAGS.dct_coefficient_count)
+    x = tf.reshape(x, [1, -1])
+
+    model = tf.keras.models.load_model(FLAGS.keras_file_path)
+    predictions = model.predict(x)
+
+    # Sort to show labels in order of confidence
+    top_k = predictions[0].argsort()[-1:][::-1]
+    for node_id in top_k:
+        human_string = load_labels(FLAGS.labels)[int(node_id)]
+        score = predictions[0,node_id]
+        print(f'model predicted: {human_string} with score {score:.5f}')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--wav', type=str, default='', help='Audio file to be identified.')
+    parser.add_argument(
+        '--labels', type=str, default='', help='Path to file containing labels.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs', )
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint', )
+    parser.add_argument(
+        '--keras_file_path',
+        type=str,
+        default='',
+        help='Path to the .h5 Keras model file to use for testing.')
+    FLAGS, unparsed = parser.parse_known_args()
+    main()
diff --git a/models/keyword_spotting/dnn_large/model_package_tf/dnn_l_inference_tflite.py b/models/keyword_spotting/dnn_large/model_package_tf/dnn_l_inference_tflite.py
new file mode 100644
index 0000000..9f79d99
--- /dev/null
+++ b/models/keyword_spotting/dnn_large/model_package_tf/dnn_l_inference_tflite.py
@@ -0,0 +1,120 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from data_processing.data_preprocessing import load_wav_file, calculate_mfcc
+
+import tensorflow as tf
+import numpy as np
+import argparse
+
+
+def tflite_inference(input_data, tflite_path):
+    """Call forwards pass of TFLite file and returns the result.
+
+    Args:
+        input_data: Input data to use on forward pass.
+        tflite_path: Path to TFLite file to run.
+
+    Returns:
+        Output from inference.
+    """
+    supported_quant_dtypes = (np.int8, np.int16)
+    interpreter = tf.lite.Interpreter(model_path=tflite_path)
+    interpreter.allocate_tensors()
+
+    input_details = interpreter.get_input_details()
+    output_details = interpreter.get_output_details()
+
+    input_dtype = input_details[0]["dtype"]
+    output_dtype = output_details[0]["dtype"]
+
+    # Check if the input/output type is quantized,
+    # set scale and zero-point accordingly
+    if input_dtype in supported_quant_dtypes:
+        input_scale, input_zero_point = input_details[0]["quantization"]
+    else:
+        input_scale, input_zero_point = 1, 0
+
+    input_data = input_data / input_scale + input_zero_point
+    input_data = np.round(input_data) if input_dtype in supported_quant_dtypes else input_data
+
+    if output_dtype in supported_quant_dtypes:
+        output_scale, output_zero_point = output_details[0]["quantization"]
+    else:
+        output_scale, output_zero_point = 1, 0
+
+    interpreter.set_tensor(input_details[0]['index'], tf.cast(input_data, input_dtype))
+    interpreter.invoke()
+
+    output_data = interpreter.get_tensor(output_details[0]['index'])
+
+    output_data = output_scale * (output_data.astype(np.float32) - output_zero_point)
+
+    return output_data
+
+
+def load_labels(filename):
+    """Read in labels, one label per line."""
+    f = open(filename, "r")
+    return f.read().splitlines()
+
+
+def main():
+    window_size_samples = int(FLAGS.sample_rate * FLAGS.window_size_ms / 1000)
+    window_stride_samples = int(FLAGS.sample_rate * FLAGS.window_stride_ms / 1000)
+    decoded, sample = load_wav_file(FLAGS.wav, FLAGS.sample_rate)
+    x = calculate_mfcc(decoded, sample, window_size_samples, window_stride_samples, FLAGS.dct_coefficient_count)
+    x = tf.reshape(x, [1, -1])
+    predictions = tflite_inference(x, FLAGS.tflite_path)
+
+    # Sort to show labels in order of confidence
+    top_k = predictions[0].argsort()[-1:][::-1]
+    for node_id in top_k:
+        human_string = load_labels(FLAGS.labels)[int(node_id)]
+        score = predictions[0,node_id]
+        print(f'model predicted: {human_string} with score {score:.5f}')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--wav', type=str, default='', help='Audio file to be identified.')
+    parser.add_argument(
+        '--labels', type=str, default='', help='Path to file containing labels.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs', )
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint', )
+    parser.add_argument(
+        '--tflite_path',
+        type=str,
+        default='',
+        help='Path to TFLite file to use for testing.')
+    FLAGS, unparsed = parser.parse_known_args()
+    main()
diff --git a/models/keyword_spotting/dnn_large/model_package_tf/evaluation.py b/models/keyword_spotting/dnn_large/model_package_tf/evaluation.py
new file mode 100644
index 0000000..5e60134
--- /dev/null
+++ b/models/keyword_spotting/dnn_large/model_package_tf/evaluation.py
@@ -0,0 +1,250 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for testing trained keyword spotting models from checkpoint files and TFLite files."""
+
+import argparse
+
+import numpy as np
+import tensorflow as tf
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+from dnn_l_inference_tflite import tflite_inference
+
+
+def tflite_test(model_settings, audio_processor, tflite_path):
+    """Calculate accuracy and confusion matrices on the validation and test sets.
+
+    A TFLite model is used for doing testing.
+
+    Args:
+        model_settings: Dictionary of common model settings.
+        audio_processor: Audio processor class object.
+        tflite_path: Path to TFLite file to use for inference.
+    """
+    # Evaluate on validation set.
+    print("Running TFLite evaluation on validation set...")
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION).batch(1)
+    expected_indices = np.concatenate([y for x, y in val_data])
+    predicted_indices = []
+
+    for mfcc, label in val_data:
+        prediction = tflite_inference(mfcc, tflite_path)
+        predicted_indices.append(np.squeeze(tf.argmax(prediction, axis=1)))
+
+    val_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+
+    print(confusion_matrix.numpy())
+    print(f'Validation accuracy = {val_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.VALIDATION)})')
+
+    # Evaluate on testing set.
+    print("Running TFLite evaluation on test set...")
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING).batch(1)
+    expected_indices = np.concatenate([y for x, y in test_data])
+    predicted_indices = []
+
+    for mfcc, label in test_data:
+        prediction = tflite_inference(mfcc, tflite_path)
+        predicted_indices.append(np.squeeze(tf.argmax(prediction, axis=1)))
+
+    test_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+
+    print(confusion_matrix.numpy())
+    print(f'Test accuracy = {test_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.TESTING)})')
+
+
+def keras_test(model_settings, audio_processor, model):
+    """Calculate accuracy and confusion matrices on the validation and test sets.
+
+    A loaded keras model is used for doing testing.
+
+    Args:
+        model_settings: Dictionary of common model settings.
+        audio_processor: Audio processor class object.
+        model: Loaded keras model.
+    """
+    # Evaluate on validation set.
+    print("Running TF evaluation on validation set...")
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION).batch(FLAGS.batch_size)
+    expected_indices = np.concatenate([y for x, y in val_data])
+
+    predictions = model.predict(val_data)
+    predicted_indices = tf.argmax(predictions, axis=1)
+
+    val_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+    print(confusion_matrix.numpy())
+    print(f'Validation accuracy = {val_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.VALIDATION)})')
+
+    # Evaluate on testing set.
+    print("Running TF evaluation on test set...")
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING).batch(FLAGS.batch_size)
+    expected_indices = np.concatenate([y for x, y in test_data])
+
+    predictions = model.predict(test_data)
+    predicted_indices = tf.argmax(predictions, axis=1)
+
+    test_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+    print(confusion_matrix.numpy())
+    print(f'Test accuracy = {test_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.TESTING)})')
+
+
+def calculate_accuracy(predicted_indices, expected_indices):
+    """Calculates and returns accuracy.
+
+    Args:
+        predicted_indices: List of predicted integer indices.
+        expected_indices: List of expected integer indices.
+
+    Returns:
+        Accuracy value between 0 and 1.
+    """
+    correct_prediction = tf.equal(predicted_indices, expected_indices)
+    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
+    return accuracy
+
+
+def evaluate():
+    """Calculate accuracy and confusion matrices on validation and test sets.
+
+    Model is created and weights loaded from supplied command line arguments.
+    """
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    if FLAGS.tflite_path:
+        tflite_test(model_settings, audio_processor, FLAGS.tflite_path)
+
+    if FLAGS.checkpoint:
+        model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info, False)
+        model.load_weights(FLAGS.checkpoint).expect_partial()
+        keras_test(model_settings, audio_processor, model)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--batch_size',
+        type=int,
+        default=100,
+        help='How many items to train with at once',)
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--checkpoint',
+        type=str,
+        help='Checkpoint to load the weights from')
+    parser.add_argument(
+        '--tflite_path',
+        type=str,
+        help='Path to TFLite file to use for evaluation')
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+
+    FLAGS, _ = parser.parse_known_args()
+    evaluate()
diff --git a/models/keyword_spotting/dnn_large/model_package_tf/how_to_guidance.ipynb b/models/keyword_spotting/dnn_large/model_package_tf/how_to_guidance.ipynb
new file mode 100644
index 0000000..67b2031
--- /dev/null
+++ b/models/keyword_spotting/dnn_large/model_package_tf/how_to_guidance.ipynb
@@ -0,0 +1,428 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Copyright (C) 2023 Arm Limited or its affiliates. All rights reserved.\n",
+    "#\n",
+    "# SPDX-License-Identifier: Apache-2.0\n",
+    "#\n",
+    "# Licensed under the Apache License, Version 2.0 (the License); you may\n",
+    "# not use this file except in compliance with the License.\n",
+    "# You may obtain a copy of the License at\n",
+    "#\n",
+    "# www.apache.org/licenses/LICENSE-2.0\n",
+    "#\n",
+    "# Unless required by applicable law or agreed to in writing, software\n",
+    "# distributed under the License is distributed on an AS IS BASIS, WITHOUT\n",
+    "# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+    "# See the License for the specific language governing permissions and\n",
+    "# limitations under the License."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# DNN_Large - Optimised\n",
+    "\n",
+    "Here we reproduce the models with our established codebase and ModelPackage approach for your convenience.\n",
+    "\n",
+    "## Model-Package Overview:\n",
+    "\n",
+    "| Model           \t| DNN_Large                            \t|\n",
+    "|:---------------:\t|:---------------------------------------------------------------:\t|\n",
+    "| <u>**Format**</u>:          \t| Keras, Saved Model, TensorFlow Lite int8, TensorFlow Lite fp32 |\n",
+    "| <u>**Feature**</u>:         \t| Keyword spotting for Arm Cortex-M CPUs |\n",
+    "| <u>**Architectural Delta w.r.t. Vanilla**</u>: | None |\n",
+    "| <u>**Domain**</u>:         \t| Keyword spotting |\n",
+    "| <u>**Package Quality**</u>: \t| Optimised |"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Table of contents <a name=\"index_page\"></a>\n",
+    "\n",
+    "This how-to guidance presents the key steps to reproduce everything in this package. The contents are organised as below. We provided the internal navigation links for users to easy-jump among different sections.  \n",
+    "\n",
+    "    \n",
+    "* [1.0 Model recreation](#model_recreation)\n",
+    "\n",
+    "* [2.0 Training](#training)\n",
+    "\n",
+    "* [3.0 Testing](#testing)\n",
+    "\n",
+    "* [4.0 Optimization](#optimization)\n",
+    "\n",
+    "* [5.0 Quantization and TFLite conversion](#tflite_conversion)\n",
+    "\n",
+    "* [6.0 Inference the TFLite model files](#tflite_inference)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1.0 Model Recreation<a name=\"model_recreation\"></a>\n",
+    "\n",
+    "In order to recreate the model you will first need to be using ```Python3.7``` and install the requirements in ```requirements.txt```.\n",
+    "\n",
+    "Once you have these requirements satisfied you can execute the recreation script contained within this folder, just run:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-01-31 13:18:57.429502: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "Untarring speech_commands_v0.02.tar.gz...\n",
+      "2023-01-31 13:19:44.590405: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1\n",
+      "2023-01-31 13:19:44.627169: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 13:19:44.627205: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 13:19:44.650614: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublas.so.11\n",
+      "2023-01-31 13:19:44.650690: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublasLt.so.11\n",
+      "2023-01-31 13:19:44.653550: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcufft.so.10\n",
+      "2023-01-31 13:19:44.653884: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcurand.so.10\n",
+      "2023-01-31 13:19:44.654515: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusolver.so.11\n",
+      "2023-01-31 13:19:44.655280: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusparse.so.11\n",
+      "2023-01-31 13:19:44.655466: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudnn.so.8\n",
+      "2023-01-31 13:19:44.655866: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 13:19:44.656166: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2023-01-31 13:19:44.657031: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 13:19:44.657463: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 13:19:44.657531: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 13:19:45.095453: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 13:19:45.095490: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 13:19:45.095499: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 13:19:45.096006: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10942 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.\n",
+      "2023-01-31 13:19:46.231729: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.\n",
+      "2023-01-31 13:19:46.494512: I tensorflow/core/grappler/devices.cc:69] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1\n",
+      "2023-01-31 13:19:46.494713: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session\n",
+      "2023-01-31 13:19:46.495116: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 13:19:46.495381: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 13:19:46.495413: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 13:19:46.495422: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 13:19:46.495429: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 13:19:46.495705: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10942 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "2023-01-31 13:19:46.519581: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 3492140000 Hz\n",
+      "2023-01-31 13:19:46.520288: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1144] Optimization results for grappler item: graph_to_optimize\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.007ms.\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.001ms.\n",
+      "\n",
+      "2023-01-31 13:19:46.560745: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:345] Ignored output_format.\n",
+      "2023-01-31 13:19:46.560780: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:348] Ignored drop_control_dependency.\n",
+      "2023-01-31 13:19:46.564917: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:210] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n",
+      "2023-01-31 13:19:46.566851: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 13:19:46.567112: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 13:19:46.567143: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 13:19:46.567154: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 13:19:46.567161: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 13:19:46.567471: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10942 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "Converted model saved to dnn.tflite.\n",
+      "Running TFLite evaluation on validation set...\n",
+      "2023-01-31 13:19:46.612300: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)\n",
+      "[[371   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  0 265   7   9   5  21  10  10  15   6   5  18]\n",
+      " [  0   6 353   5   1   2  19   6   0   0   0   5]\n",
+      " [  0  11   7 340   5  14   3   1   3   1   5  16]\n",
+      " [  0   4   0   3 296   1   8   1   5  20   8   4]\n",
+      " [  0   3   0  17   0 334   0   1   6   1   1  14]\n",
+      " [  0   5  23   1   3   1 307   8   0   2   1   1]\n",
+      " [  0  10   1   2   2   2   3 339   1   2   0   1]\n",
+      " [  1   9   1   2   7   7   1   0 323   9   0   3]\n",
+      " [  0   3   0   1  28   2   3   1   9 323   3   0]\n",
+      " [  1   4   0   0  10   2   1   0   4   3 324   1]\n",
+      " [  0  11   1  34   5  17   1   1   3   3   1 295]]\n",
+      "Validation accuracy = 87.06%(N=4445)\n",
+      "Running TFLite evaluation on test set...\n",
+      "[[408   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  0 308   9  12   7  10   9   6  19   4   9  15]\n",
+      " [  0   9 382   2   0   7  14   2   0   0   1   2]\n",
+      " [  0   9   3 332   0  25   2   0   0   0   2  32]\n",
+      " [  0  11   1   2 366   4   3   0  11   9  13   5]\n",
+      " [  0  13   1  27   2 337   8   1   1   0   1  15]\n",
+      " [  0   9  13   5   4   2 365   9   1   1   2   1]\n",
+      " [  0  16   0   1   3   2   5 362   2   4   0   1]\n",
+      " [  0   9   1   0   2   9   1   0 351  21   1   1]\n",
+      " [  0  10   0   0  17   1   5   2  11 350   1   5]\n",
+      " [  0   3   1   4  15   4   0   1   0   2 377   4]\n",
+      " [  0  12   3  55   6   9   4   2   3   5   4 299]]\n",
+      "Test accuracy = 86.65%(N=4890)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-01-31 13:19:59.827495: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "Untarring speech_commands_v0.02.tar.gz...\n",
+      "2023-01-31 13:20:49.624250: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1\n",
+      "2023-01-31 13:20:49.663343: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 13:20:49.663382: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 13:20:49.683862: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublas.so.11\n",
+      "2023-01-31 13:20:49.683941: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublasLt.so.11\n",
+      "2023-01-31 13:20:49.686764: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcufft.so.10\n",
+      "2023-01-31 13:20:49.687075: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcurand.so.10\n",
+      "2023-01-31 13:20:49.687678: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusolver.so.11\n",
+      "2023-01-31 13:20:49.688414: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusparse.so.11\n",
+      "2023-01-31 13:20:49.688571: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudnn.so.8\n",
+      "2023-01-31 13:20:49.688929: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 13:20:49.689226: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2023-01-31 13:20:49.689923: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 13:20:49.690297: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 13:20:49.690365: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 13:20:50.138334: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 13:20:50.138374: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 13:20:50.138386: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 13:20:50.138892: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10942 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.\n",
+      "2023-01-31 13:20:51.250414: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.\n",
+      "2023-01-31 13:20:51.521477: I tensorflow/core/grappler/devices.cc:69] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1\n",
+      "2023-01-31 13:20:51.521575: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session\n",
+      "2023-01-31 13:20:51.522122: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 13:20:51.522382: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 13:20:51.522413: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 13:20:51.522424: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 13:20:51.522432: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 13:20:51.522720: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10942 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "2023-01-31 13:20:51.539458: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 3492140000 Hz\n",
+      "2023-01-31 13:20:51.540454: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1144] Optimization results for grappler item: graph_to_optimize\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.01ms.\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.001ms.\n",
+      "\n",
+      "2023-01-31 13:20:51.584213: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:345] Ignored output_format.\n",
+      "2023-01-31 13:20:51.584254: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:348] Ignored drop_control_dependency.\n",
+      "2023-01-31 13:20:51.588197: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:210] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n",
+      "2023-01-31 13:20:51.590131: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 13:20:51.590402: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 13:20:51.590432: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 13:20:51.590442: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 13:20:51.590450: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 13:20:51.590759: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10942 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "2023-01-31 13:20:51.621299: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)\n",
+      "fully_quantize: 0, inference_type: 6, input_inference_type: 9, output_inference_type: 9\n",
+      "Quantized model saved to dnn_quantized.tflite.\n",
+      "Running TFLite evaluation on validation set...\n",
+      "[[371   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  0 271   7   7   5  19   9  10  16   6   5  16]\n",
+      " [  0   8 354   6   2   2  17   4   0   0   1   3]\n",
+      " [  0  16   7 333   9  14   5   2   2   1   3  14]\n",
+      " [  0   6   1   3 293   2   8   1   4  19   8   5]\n",
+      " [  0   9   2  19   9 320   0   1   3   1   3  10]\n",
+      " [  0   3  29   1   5   1 297  11   0   2   1   2]\n",
+      " [  0  14   1   4   8   1   4 325   1   2   2   1]\n",
+      " [  1  10   2   1  10   4   1   1 323   7   0   3]\n",
+      " [  0   4   0   0  32   2   3   0   6 320   4   2]\n",
+      " [  1   7   0   1  16   3   0   3   3   2 314   0]\n",
+      " [  0  11   1  47   9  18   1   1   3   3   1 277]]\n",
+      "Validation accuracy = 85.44%(N=4445)\n",
+      "Running TFLite evaluation on test set...\n",
+      "[[408   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  0 319   8  13   7   8   9   5  17   4   7  11]\n",
+      " [  0  10 379   2   5   7  10   2   0   0   1   3]\n",
+      " [  0  13   3 332   4  20   2   2   0   2   1  26]\n",
+      " [  0  15   0   3 363   3   3   3  10  11  10   4]\n",
+      " [  0  17   6  26   5 327   6   2   1   0   2  14]\n",
+      " [  0  13  15   3  11   2 352  10   2   3   1   0]\n",
+      " [  0  18   2   0   7   2   9 350   3   2   1   2]\n",
+      " [  0  13   1   0   9   7   0   1 342  17   4   2]\n",
+      " [  0  14   1   0  27   0   6   3   8 334   2   7]\n",
+      " [  0   5   1   2  23   3   4   1   0   1 366   5]\n",
+      " [  0  13   2  62  13  13   4   2   1   3   6 283]]\n",
+      "Test accuracy = 84.97%(N=4890)\n"
+     ]
+    }
+   ],
+   "source": [
+    "!bash ./recreate_model.sh"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Running this script will use the pre-trained checkpoint files supplied in the ```./model_archive/model_source/weights``` folder to generate the TFLite files and perform evaluation on the test set. Both an fp32 version and a quantized version will be produced. The quantized version will use post-training quantization to fully quantize it.\n",
+    "\n",
+    "If you want to run training from scratch you can do this by supplying ```--train``` when running the script. For example:\n",
+    "\n",
+    "```bash\n",
+    "bash ./recreate_model.sh --train\n",
+    "```\n",
+    "\n",
+    "Training is then performed and should produce a model to the stated accuracy in this repository. Note that exporting to TFLite will still happen with the baseline pre-trained checkpoint files, so you will need to re-run the script and this time supply the path to the new checkpoint files you want to use, for example:\n",
+    "\n",
+    "```bash\n",
+    "bash ./recreate_model.sh --ckpt <checkpoint_path>\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2.0 Training<a name=\"training\"></a>\n",
+    "\n",
+    "The training scripts can be used to recreate any of the models from the [Hello Edge paper](https://arxiv.org/pdf/1711.07128.pdf) provided the right hyperparameters are used. The training commands with all the hyperparameters to reproduce the model in this repository are given [here](recreate_model.sh). The model in this part of the repository represents just one variation of the models from the paper, other varieties are covered in other parts of the repository.\n",
+    "\n",
+    "\n",
+    "As a general example of how to train a DNN with 3 fully-connected layers with 128 neurons in each layer, run:\n",
+    "```\n",
+    "python train.py --model_architecture dnn --model_size_info 128 128 128\n",
+    "```\n",
+    "\n",
+    "The command line argument *--model_size_info* is used to pass the neural network layer\n",
+    "dimensions such as number of layers, convolution filter size/stride as a list to models.py,\n",
+    "which builds the TensorFlow graph based on the provided model architecture\n",
+    "and layer dimensions. For more info on *model_size_info* for each network architecture see\n",
+    "[models.py](model_core_utils/models.py).\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3.0 Testing<a name=\"testing\"></a>\n",
+    "To run inference on the trained model from a checkpoint and get accuracy on validation and test sets, run:\n",
+    "```\n",
+    "python evaluation.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>\n",
+    "```\n",
+    "**The model and feature extraction parameters passed to this script should match those used in the Training step.**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4.0 Optimization<a name=\"optimization\"></a>\n",
+    "\n",
+    "We introduce an *optional* step to optimize the trained keyword spotting model for deployment.\n",
+    "\n",
+    "Here we use TensorFlow's [weight clustering API](https://www.tensorflow.org/model_optimization/guide/clustering) to reduce the compressed model size and optimize inference on supported hardware. 32 weight clusters and kmeans++ cluster intialization method are used as the clustering hyperparameters.\n",
+    "\n",
+    "To optimize your trained model (e.g. a DNN), a trained model checkpoint is needed to run clustering and fine-tuning on.\n",
+    "You can use the pre-trained checkpoints provided, or train your own model and use the resulting checkpoint.\n",
+    "\n",
+    "To apply the optimization and fine-tuning, run the following command:\n",
+    "```\n",
+    "python optimisations.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>\n",
+    "```\n",
+    "**The model and feature extraction parameters used here should match those used in the Training step, except for the number of training steps.\n",
+    "The number of training steps is reduced since the optimization step only requires fine-tuning.**\n",
+    "\n",
+    "This will generate a clustered model checkpoint that can be used in the quantization step to generate a quantized and clustered TFLite model."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5.0 Quantization and TFLite Conversion<a name=\"tflite_conversion\"></a>\n",
+    "\n",
+    "You can now use TensorFlow's\n",
+    "[post training quantization](https://www.tensorflow.org/lite/performance/post_training_quantization) to\n",
+    "make quantization of the trained models super simple.\n",
+    "\n",
+    "To quantize your trained model (e.g. a DNN) run:\n",
+    "```\n",
+    "python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> [--inference_type int8|int16]\n",
+    "```\n",
+    "**The model and feature extraction parameters used here should match those used in the Training step.**\n",
+    "\n",
+    "The ```inference_type``` parameter is *optional* and to be used if a fully quantized model with inputs and outputs of type int8 or int16 is needed. It defaults to fp32.\n",
+    "\n",
+    "In this example, this step will produce a quantized TFLite file *dnn_quantized.tflite*."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can test the accuracy of this quantized model on the test set by running:\n",
+    "```\n",
+    "python evaluation.py --tflite_path dnn_quantized.tflite\n",
+    "```\n",
+    "**The model and feature extraction parameters used here should match those used in the Training step.**\n",
+    "\n",
+    "`convert_to_tflite.py` uses post-training quantization to generate a quantized model by default. If you wish to convert to a floating point TFLite model, use the command below:\n",
+    "\n",
+    "```\n",
+    "python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> --no-quantize\n",
+    "```\n",
+    "\n",
+    "This will produce a floating point TFLite file *dnn.tflite*. You can test the accuracy of this floating point model using `evaluation.py` as above.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 6.0 Single inference of the TFLite model files <a name=\"tflite_inference\"></a>\n",
+    "\n",
+    "You can conduct TFLite inference for .fp32 and .int8 model files by using the following command: \n",
+    "\n",
+    "```python dnn_l_inference_tflite.py --labels validation_utils/labels.txt --wav <path_to_wav_file> --tflite_path <path_to_tflite_file>```\n",
+    "\n",
+    "**The feature extraction parameters used here should match those used in the Training step.**\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/README.md b/models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/README.md
new file mode 100644
index 0000000..6d3f666
--- /dev/null
+++ b/models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/README.md
@@ -0,0 +1,62 @@
+# keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_fp32
+
+## Description
+This is a floating point fp32 version of the DNN Large model developed by Arm, from the Hello Edge paper.
+
+## License
+[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
+
+## Network Information
+| Network Information | Value |
+|---------------------|-------|
+|  Framework          | TensorFlow Lite |
+|  Datatype           | fp32 |
+|  SHA-1 Hash         | 1ce04d01ed7decc016076a868f22858d8f092942 |
+|  Size (Bytes)       | 1985048 |
+|  Provenance         | https://arxiv.org/abs/1711.07128 |
+|  Training           | Trained by Arm |
+|  Paper | https://arxiv.org/abs/1711.07128 |
+
+## DataSet
+| Dataset Information | Value |
+|--------|-------|
+| Name | Google Speech Commands test set |
+
+## Accuracy
+
+| Metric | Value |
+|--------|-------|
+| accuracy | 86.65% |
+
+## HW Support
+| HW Support   | Value |
+|--------------|-------|
+| Cortex-A |:heavy_check_mark:         |
+| Cortex-M |:heavy_check_mark:         |
+| Mali GPU |:heavy_check_mark:         |
+| Ethos U  |:heavy_multiplication_x:          |
+
+### Key
+* :heavy_check_mark: - Will run on this platform.
+* :heavy_multiplication_x: - Will not run on this platform.
+
+## Network Quality
+| Network Quality         | Value |
+|-------------------------|-------|
+|  Recreate               | :heavy_check_mark:    |
+|  Quality level          | Deployable    |
+|  Vanilla                | :heavy_check_mark:    |
+|  Clustered              | :heavy_multiplication_x:    |
+|  Pruned                 | :heavy_multiplication_x:    |
+|  Quantization - default | :heavy_multiplication_x:    |
+|  Quantization - full    | :heavy_multiplication_x:    |
+
+## Network Inputs
+| Input Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| input | (1, 250) | fp32 | models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input | fp32 | [1, 250] | The input is a processed MFCCs |
+
+## Network Outputs
+| Output Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| Identity | (1, 12) | fp32 | models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity | fp32 | [1, 12] | The probability on 12 keywords |
\ No newline at end of file
diff --git a/models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/definition.yaml b/models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/definition.yaml
new file mode 100644
index 0000000..38082c2
--- /dev/null
+++ b/models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/definition.yaml
@@ -0,0 +1,62 @@
+benchmark:
+  benchmark_metrics:
+    accuracy: 86.65%
+  benchmark_name: Google Speech Commands test set
+description: This is a floating point fp32 version of the DNN Large model developed
+  by Arm, from the Hello Edge paper.
+license:
+- Apache-2.0
+network:
+  datatype: fp32
+  file_size_bytes: 1985048
+  filename: dnn_l.tflite
+  framework: TensorFlow Lite
+  hash:
+    algorithm: sha1
+    value: 1ce04d01ed7decc016076a868f22858d8f092942
+  provenance: https://arxiv.org/abs/1711.07128
+  training: Trained by Arm
+network_parameters:
+  input_nodes:
+  - description: The input is a processed MFCCs of shape (1, 490)
+    example_input:
+      path: models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input
+      shape:
+      - 1
+      - 250
+      type: fp32
+      use_case: Random input for model regression.
+    input_datatype: fp32
+    name: input
+    shape:
+    - 1
+    - 250
+  output_nodes:
+  - description: The probability on 12 keywords.
+    example_output:
+      path: models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity
+      shape:
+      - 1
+      - 12
+      type: fp32
+      use_case: output for model regression.
+    name: Identity
+    output_datatype: fp32
+    shape:
+    - 1
+    - 12
+network_quality:
+  clustered: false
+  is_vanilla: true
+  pruned: false
+  quality_level: Deployable
+  quality_level_hero_hw: cortex_m
+  quantization_default: false
+  quantization_full: false
+  recreate: true
+operators:
+  TensorFlow Lite:
+  - FULLY_CONNECTED
+  - RELU
+  - SOFTMAX
+paper: https://arxiv.org/abs/1711.07128
\ No newline at end of file
diff --git a/models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/dnn_l.tflite b/models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/dnn_l.tflite
new file mode 100644
index 0000000..e5cbfe0
--- /dev/null
+++ b/models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/dnn_l.tflite
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dffdcf515fce70988132d98f8007564e0b303d0b463c422f039e2074cb29fc51
+size 1985048
diff --git a/models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input/0.npy b/models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input/0.npy
new file mode 100644
index 0000000..5c996be
--- /dev/null
+++ b/models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input/0.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:77bdd1510d16c990db4276179453648d51e6526f4fbbe29091c183316184c827
+size 1128
diff --git a/models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity/0.npy b/models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity/0.npy
new file mode 100644
index 0000000..98bc3fd
--- /dev/null
+++ b/models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity/0.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5bc29017fbb5d27101b9b96399c1fbc857a07871d759ca39a20de0b39ecc0396
+size 176
diff --git a/models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_int8/README.md b/models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_int8/README.md
new file mode 100644
index 0000000..db3aa64
--- /dev/null
+++ b/models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_int8/README.md
@@ -0,0 +1,62 @@
+# keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_int8
+
+## Description
+This is a fully quantized int8 version of the DNN Large model developed by Arm, from the Hello Edge paper.
+
+## License
+[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
+
+## Network Information
+| Network Information | Value |
+|---------------------|-------|
+|  Framework          | TensorFlow Lite |
+|  Datatype           | int8 |
+|  SHA-1 Hash         | 2b1ee34e4c87ba6f24092c7457593227099efaf1 |
+|  Size (Bytes)       | 502272 |
+|  Provenance         | https://arxiv.org/abs/1711.07128 |
+|  Training           | Trained by Arm |
+|  Paper | https://arxiv.org/abs/1711.07128 |
+
+## DataSet
+| Dataset Information | Value |
+|--------|-------|
+| Name | Google Speech Commands test set |
+
+## Accuracy
+
+| Metric | Value |
+|--------|-------|
+| Accuracy | 86.01% |
+
+## HW Support
+| HW Support   | Value |
+|--------------|-------|
+| Cortex-A |:heavy_check_mark:          |
+| Cortex-M |:heavy_check_mark:          |
+| Mali GPU |:heavy_check_mark:          |
+| Ethos U  |:heavy_check_mark:          |
+
+### Key
+* :heavy_check_mark: - Will run on this platform.
+* :heavy_multiplication_x: - Will not run on this platform.
+
+## Network Quality
+| Network Quality         | Value |
+|-------------------------|-------|
+|  Recreate               | :heavy_check_mark:    |
+|  Quality level          | Deployable    |
+|  Vanilla                | :heavy_check_mark:    |
+|  Clustered              | :heavy_multiplication_x:    |
+|  Pruned                 | :heavy_multiplication_x:    |
+|  Quantization - default | :heavy_multiplication_x:    |
+|  Quantization - full    | :heavy_check_mark:    |
+
+## Network Inputs
+| Input Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| input | (1, 250) | int8 | models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input | int8 | [1, 250] | The input is a processed MFCCs |
+
+## Network Outputs
+| Output Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| Identity | (1, 12) | int8 | models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity | int8 | [1, 12] | The probability on 12 keywords |
\ No newline at end of file
diff --git a/models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_int8/definition.yaml b/models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_int8/definition.yaml
new file mode 100644
index 0000000..7040a89
--- /dev/null
+++ b/models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_int8/definition.yaml
@@ -0,0 +1,62 @@
+benchmark:
+  benchmark_metrics:
+    accuracy: 86.01%
+  benchmark_name: Google Speech Commands test set
+description: This is a fully quantized int8 version of the DNN Large model developed
+  by Arm, from the Hello Edge paper.
+license:
+- Apache-2.0
+network:
+  datatype: int8
+  file_size_bytes: 502272
+  filename: dnn_l_quantized.tflite
+  framework: TensorFlow Lite
+  hash:
+    algorithm: sha1
+    value: 2b1ee34e4c87ba6f24092c7457593227099efaf1
+  provenance: https://arxiv.org/abs/1711.07128
+  training: Trained by Arm
+network_parameters:
+  input_nodes:
+  - description: The input is a processed MFCCs of shape (1, 250)
+    example_input:
+      path: models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input
+      shape:
+      - 1
+      - 250
+      type: int8
+      use_case: Random input for model regression.
+    input_datatype: int8
+    name: input
+    shape:
+    - 1
+    - 250
+  output_nodes:
+  - description: The probability on 12 keywords.
+    example_output:
+      path: models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity
+      shape:
+      - 1
+      - 12
+      type: int8
+      use_case: output for model regression.
+    name: Identity
+    output_datatype: int8
+    shape:
+    - 1
+    - 12
+network_quality:
+  clustered: false
+  is_vanilla: true
+  pruned: false
+  quality_level: Deployable
+  quality_level_hero_hw: cortex_m
+  quantization_default: false
+  quantization_full: true
+  recreate: true
+operators:
+  TensorFlow Lite:
+  - FULLY_CONNECTED
+  - RELU
+  - SOFTMAX
+paper: https://arxiv.org/abs/1711.07128
\ No newline at end of file
diff --git a/models/keyword_spotting/dnn_large/tflite_int8/dnn_l_quantized.tflite b/models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_int8/dnn_l_quantized.tflite
similarity index 100%
rename from models/keyword_spotting/dnn_large/tflite_int8/dnn_l_quantized.tflite
rename to models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_int8/dnn_l_quantized.tflite
diff --git a/models/keyword_spotting/dnn_large/tflite_int8/testing_input/input/0.npy b/models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input/0.npy
similarity index 100%
rename from models/keyword_spotting/dnn_large/tflite_int8/testing_input/input/0.npy
rename to models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input/0.npy
diff --git a/models/keyword_spotting/dnn_large/tflite_int8/testing_output/Identity/0.npy b/models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity/0.npy
similarity index 100%
rename from models/keyword_spotting/dnn_large/tflite_int8/testing_output/Identity/0.npy
rename to models/keyword_spotting/dnn_large/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity/0.npy
diff --git a/models/keyword_spotting/dnn_large/model_package_tf/model_archive/model_source/saved_model/dnn_large/keras_metadata.pb b/models/keyword_spotting/dnn_large/model_package_tf/model_archive/model_source/saved_model/dnn_large/keras_metadata.pb
new file mode 100644
index 0000000..364939d
--- /dev/null
+++ b/models/keyword_spotting/dnn_large/model_package_tf/model_archive/model_source/saved_model/dnn_large/keras_metadata.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:06b16edf52376997d110f024184261ef588cd3309d8175c8769aa45482cd0164
+size 10087
diff --git a/models/keyword_spotting/dnn_large/model_package_tf/model_archive/model_source/saved_model/dnn_large/saved_model.pb b/models/keyword_spotting/dnn_large/model_package_tf/model_archive/model_source/saved_model/dnn_large/saved_model.pb
new file mode 100644
index 0000000..59d2022
--- /dev/null
+++ b/models/keyword_spotting/dnn_large/model_package_tf/model_archive/model_source/saved_model/dnn_large/saved_model.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:acb29f21ca89c9369eca08f583daaf3c7e64cd26ab5fec4cb0b95cf9d04435ef
+size 85126
diff --git a/models/keyword_spotting/dnn_large/model_package_tf/model_archive/model_source/saved_model/dnn_large/variables/variables.data-00000-of-00001 b/models/keyword_spotting/dnn_large/model_package_tf/model_archive/model_source/saved_model/dnn_large/variables/variables.data-00000-of-00001
new file mode 100644
index 0000000..4d554fc
--- /dev/null
+++ b/models/keyword_spotting/dnn_large/model_package_tf/model_archive/model_source/saved_model/dnn_large/variables/variables.data-00000-of-00001
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:11f3d672a01c44c0e86a5f485ddfe4b2e5c8c6770563a6e0520297ed1e029579
+size 1985615
diff --git a/models/keyword_spotting/dnn_large/model_package_tf/model_archive/model_source/saved_model/dnn_large/variables/variables.index b/models/keyword_spotting/dnn_large/model_package_tf/model_archive/model_source/saved_model/dnn_large/variables/variables.index
new file mode 100644
index 0000000..fc9e90c
--- /dev/null
+++ b/models/keyword_spotting/dnn_large/model_package_tf/model_archive/model_source/saved_model/dnn_large/variables/variables.index
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d1ae6b295e9da819138459f3684755e71c2fac683da141510581996541e509e6
+size 642
diff --git a/models/keyword_spotting/dnn_large/tflite_int8/ckpt/checkpoint b/models/keyword_spotting/dnn_large/model_package_tf/model_archive/model_source/weights/checkpoint
similarity index 100%
rename from models/keyword_spotting/dnn_large/tflite_int8/ckpt/checkpoint
rename to models/keyword_spotting/dnn_large/model_package_tf/model_archive/model_source/weights/checkpoint
diff --git a/models/keyword_spotting/dnn_large/tflite_int8/ckpt/dnn_0.87_ckpt.data-00000-of-00001 b/models/keyword_spotting/dnn_large/model_package_tf/model_archive/model_source/weights/dnn_0.87_ckpt.data-00000-of-00001
similarity index 100%
rename from models/keyword_spotting/dnn_large/tflite_int8/ckpt/dnn_0.87_ckpt.data-00000-of-00001
rename to models/keyword_spotting/dnn_large/model_package_tf/model_archive/model_source/weights/dnn_0.87_ckpt.data-00000-of-00001
diff --git a/models/keyword_spotting/dnn_large/tflite_int8/ckpt/dnn_0.87_ckpt.index b/models/keyword_spotting/dnn_large/model_package_tf/model_archive/model_source/weights/dnn_0.87_ckpt.index
similarity index 100%
rename from models/keyword_spotting/dnn_large/tflite_int8/ckpt/dnn_0.87_ckpt.index
rename to models/keyword_spotting/dnn_large/model_package_tf/model_archive/model_source/weights/dnn_0.87_ckpt.index
diff --git a/models/keyword_spotting/dnn_large/model_package_tf/model_core_utils/__init__.py b/models/keyword_spotting/dnn_large/model_package_tf/model_core_utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/models/keyword_spotting/dnn_large/model_package_tf/model_core_utils/models.py b/models/keyword_spotting/dnn_large/model_package_tf/model_core_utils/models.py
new file mode 100644
index 0000000..1978136
--- /dev/null
+++ b/models/keyword_spotting/dnn_large/model_package_tf/model_core_utils/models.py
@@ -0,0 +1,327 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Model definitions for simple keyword spotting."""
+
+import math
+
+import tensorflow as tf
+
+
+def prepare_model_settings(label_count, sample_rate, clip_duration_ms,
+                           window_size_ms, window_stride_ms,
+                           dct_coefficient_count):
+    """Calculates common settings needed for all models.
+
+    Args:
+        label_count: How many classes are to be recognized.
+        sample_rate: Number of audio samples per second.
+        clip_duration_ms: Length of each audio clip to be analyzed.
+        window_size_ms: Duration of frequency analysis window.
+        window_stride_ms: How far to move in time between frequency windows.
+        dct_coefficient_count: Number of frequency bins to use for analysis.
+
+    Returns:
+        Dictionary containing common settings.
+    """
+    desired_samples = int(sample_rate * clip_duration_ms / 1000)
+    window_size_samples = int(sample_rate * window_size_ms / 1000)
+    window_stride_samples = int(sample_rate * window_stride_ms / 1000)
+    length_minus_window = (desired_samples - window_size_samples)
+    if length_minus_window < 0:
+        spectrogram_length = 0
+    else:
+        spectrogram_length = 1 + int(length_minus_window / window_stride_samples)
+    fingerprint_size = dct_coefficient_count * spectrogram_length
+
+    return {
+        'desired_samples': desired_samples,
+        'window_size_samples': window_size_samples,
+        'window_stride_samples': window_stride_samples,
+        'spectrogram_length': spectrogram_length,
+        'dct_coefficient_count': dct_coefficient_count,
+        'fingerprint_size': fingerprint_size,
+        'label_count': label_count,
+        'sample_rate': sample_rate,
+    }
+
+
+def create_model(model_settings, model_architecture, model_size_info, is_training):
+    """Builds a tf.keras model of the requested architecture compatible with the settings.
+
+    Args:
+        model_settings: Dictionary of information about the model.
+        model_architecture: String specifying which kind of model to create.
+        model_size_info: Array with specific information for the chosen architecture
+            (e.g convolutional parameters, number of layers).
+
+    Returns:
+        A tf.keras Model with the requested architecture.
+
+    Raises:
+        Exception: If the architecture type isn't recognized.
+    """
+
+    if model_architecture == 'dnn':
+        return create_dnn_model(model_settings, model_size_info)
+
+    elif model_architecture == 'cnn':
+        return create_cnn_model(model_settings, model_size_info)
+
+    elif model_architecture == 'ds_cnn':
+        return create_ds_cnn_model(model_settings, model_size_info)
+    elif model_architecture == 'single_fc':
+        return create_single_fc_model(model_settings)
+    elif model_architecture == 'basic_lstm':
+        return create_basic_lstm_model(model_settings, model_size_info, is_training)
+    else:
+        raise Exception(f'model_architecture argument {model_architecture} not recognized'
+                        f', should be one of, "dnn", "cnn", "ds_cnn" ')
+
+
+def create_single_fc_model(model_settings):
+    """Builds a model with a single fully-connected layer.
+
+    For details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+
+    Returns:
+        tf.keras Model of the 'SINGLE_FC' architecture.
+    """
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size'],), name='input')
+    # Fully connected layer
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(inputs)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_basic_lstm_model(model_settings, model_size_info, is_training):
+    """Builds a model with a basic lstm layer.
+
+        For details see https://arxiv.org/abs/1711.07128.
+
+        Args:
+            model_settings: Dict of different settings for model training.
+            model_size_info: Length of the array defines the number of hidden-layers and
+                each element in the array represent the number of neurons in that layer.
+            is_training: Determining whether the use of the model is for training or for something else.
+
+        Returns:
+            tf.keras Model of the 'Basic_LSTM' architecture.
+        """
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size'], ), name='input')
+
+    input_frequency_size = model_settings['dct_coefficient_count']
+    input_time_size = model_settings['spectrogram_length']
+    x = tf.reshape(inputs, shape=(-1, input_time_size, input_frequency_size))
+
+    # LSTM layer, and unrolling depending on whether you are training or not
+    if is_training:
+        x = tf.keras.layers.LSTM(units=model_size_info[0], time_major=False, unroll=False)(x)
+    else:
+        x = tf.keras.layers.LSTM(units=model_size_info[0], time_major=False, unroll=True)(x)
+
+    # Outputs a fully connected layer
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_dnn_model(model_settings, model_size_info):
+    """Builds a model with multiple hidden fully-connected layers.
+
+    For details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+        model_size_info: Length of the array defines the number of hidden-layers and
+            each element in the array represent the number of neurons in that layer.
+
+    Returns:
+        tf.keras Model of the 'DNN' architecture.
+    """
+
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size'], ), name='input')
+
+    # First fully connected layer.
+    x = tf.keras.layers.Dense(units=model_size_info[0], activation='relu')(inputs)
+
+    # Hidden layers with ReLU activations.
+    for i in range(1, len(model_size_info)):
+        x = tf.keras.layers.Dense(units=model_size_info[i], activation='relu')(x)
+
+    # Output fully connected layer.
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_cnn_model(model_settings, model_size_info):
+    """Builds a model with 2 convolution layers followed by a linear layer and a hidden fully-connected layer.
+
+    For details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+        model_size_info: Defines the first and second convolution parameters in
+            {number of conv features, conv filter height, width, stride in y,x dir.},
+            followed by linear layer size and fully-connected layer size.
+
+    Returns:
+        tf.keras Model of the 'CNN' architecture.
+    """
+
+    input_frequency_size = model_settings['dct_coefficient_count']
+    input_time_size = model_settings['spectrogram_length']
+
+    first_filter_count = model_size_info[0]
+    first_filter_height = model_size_info[1]  # Time axis.
+    first_filter_width = model_size_info[2]  # Frequency axis.
+    first_filter_stride_y = model_size_info[3]  # Time axis.
+    first_filter_stride_x = model_size_info[4]  # Frequency_axis.
+
+    second_filter_count = model_size_info[5]
+    second_filter_height = model_size_info[6]  # Time axis.
+    second_filter_width = model_size_info[7]  # Frequency axis.
+    second_filter_stride_y = model_size_info[8]  # Time axis.
+    second_filter_stride_x = model_size_info[9]  # Frequency axis.
+
+    linear_layer_size = model_size_info[10]
+    fc_size = model_size_info[11]
+
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size']), name='input')
+
+    # Reshape the flattened input.
+    x = tf.reshape(inputs, shape=(-1, input_time_size, input_frequency_size, 1))
+
+    # First convolution.
+    x = tf.keras.layers.Conv2D(filters=first_filter_count,
+                               kernel_size=(first_filter_height, first_filter_width),
+                               strides=(first_filter_stride_y, first_filter_stride_x),
+                               padding='VALID')(x)
+    x = tf.keras.layers.BatchNormalization()(x)
+    x = tf.keras.layers.ReLU()(x)
+    x = tf.keras.layers.Dropout(rate=0)(x)
+
+    # Second convolution.
+    x = tf.keras.layers.Conv2D(filters=second_filter_count,
+                               kernel_size=(second_filter_height, second_filter_width),
+                               strides=(second_filter_stride_y, second_filter_stride_x),
+                               padding='VALID')(x)
+    x = tf.keras.layers.BatchNormalization()(x)
+    x = tf.keras.layers.ReLU()(x)
+    x = tf.keras.layers.Dropout(rate=0)(x)
+
+    # Flatten for fully connected layers.
+    x = tf.keras.layers.Flatten()(x)
+
+    # Fully connected layer with no activation.
+    x = tf.keras.layers.Dense(units=linear_layer_size)(x)
+
+    # Fully connected layer with ReLU activation.
+    x = tf.keras.layers.Dense(units=fc_size)(x)
+    x = tf.keras.layers.BatchNormalization()(x)
+    x = tf.keras.layers.ReLU()(x)
+    x = tf.keras.layers.Dropout(rate=0)(x)
+
+    # Output fully connected.
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_ds_cnn_model(model_settings, model_size_info):
+    """Builds a model with convolutional & depthwise separable convolutional layers.
+
+    For more details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+        model_size_info: Defines number of layers, followed by the DS-Conv layer
+            parameters in the order {number of conv features, conv filter height,
+            width and stride in y,x dir.} for each of the layers.
+
+    Returns:
+        tf.keras Model of the 'DS-CNN' architecture.
+    """
+
+    label_count = model_settings['label_count']
+    input_frequency_size = model_settings['dct_coefficient_count']
+    input_time_size = model_settings['spectrogram_length']
+
+    t_dim = input_time_size
+    f_dim = input_frequency_size
+
+    # Extract model dimensions from model_size_info.
+    num_layers = model_size_info[0]
+    conv_feat = [None]*num_layers
+    conv_kt = [None]*num_layers
+    conv_kf = [None]*num_layers
+    conv_st = [None]*num_layers
+    conv_sf = [None]*num_layers
+
+    i = 1
+    for layer_no in range(0, num_layers):
+        conv_feat[layer_no] = model_size_info[i]
+        i += 1
+        conv_kt[layer_no] = model_size_info[i]
+        i += 1
+        conv_kf[layer_no] = model_size_info[i]
+        i += 1
+        conv_st[layer_no] = model_size_info[i]
+        i += 1
+        conv_sf[layer_no] = model_size_info[i]
+        i += 1
+
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size']), name='input')
+
+    # Reshape the flattened input.
+    x = tf.reshape(inputs, shape=(-1, input_time_size, input_frequency_size, 1))
+
+    # Depthwise separable convolutions.
+    for layer_no in range(0, num_layers):
+        if layer_no == 0:
+            # First convolution.
+            x = tf.keras.layers.Conv2D(filters=conv_feat[0],
+                                       kernel_size=(conv_kt[0], conv_kf[0]),
+                                       strides=(conv_st[0], conv_sf[0]),
+                                       padding='SAME')(x)
+            x = tf.keras.layers.BatchNormalization()(x)
+            x = tf.keras.layers.ReLU()(x)
+        else:
+            # Depthwise convolution.
+            x = tf.keras.layers.DepthwiseConv2D(kernel_size=(conv_kt[layer_no], conv_kf[layer_no]),
+                                                strides=(conv_sf[layer_no], conv_st[layer_no]),
+                                                padding='SAME')(x)
+            x = tf.keras.layers.BatchNormalization()(x)
+            x = tf.keras.layers.ReLU()(x)
+
+            # Pointwise convolution.
+            x = tf.keras.layers.Conv2D(filters=conv_feat[layer_no], kernel_size=(1, 1))(x)
+            x = tf.keras.layers.BatchNormalization()(x)
+            x = tf.keras.layers.ReLU()(x)
+
+        t_dim = math.ceil(t_dim/float(conv_st[layer_no]))
+        f_dim = math.ceil(f_dim/float(conv_sf[layer_no]))
+
+    # Global average pool.
+    x = tf.keras.layers.AveragePooling2D(pool_size=(t_dim, f_dim), strides=1)(x)
+
+    # Squeeze before passing to output fully connected layer.
+    x = tf.reshape(x, shape=(-1, conv_feat[layer_no]))
+
+    # Output connected layer.
+    output = tf.keras.layers.Dense(units=label_count, activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
diff --git a/models/keyword_spotting/dnn_large/model_package_tf/optimisations.py b/models/keyword_spotting/dnn_large/model_package_tf/optimisations.py
new file mode 100644
index 0000000..16b6f4c
--- /dev/null
+++ b/models/keyword_spotting/dnn_large/model_package_tf/optimisations.py
@@ -0,0 +1,259 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for optimizing simple keyword spotting models using clustering API."""
+
+import argparse
+from pathlib import Path
+
+import tensorflow as tf
+import numpy as np
+import tensorflow_model_optimization as tfmot
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+
+
+def print_model_weight_clusters(model):
+
+    for layer in model.layers:
+        if isinstance(layer, tf.keras.layers.Wrapper):
+            weights = layer.trainable_weights
+        else:
+            weights = layer.weights
+        for weight in weights:
+            if "kernel" in weight.name:
+                unique_count = len(np.unique(weight))
+                print(
+                    f"{layer.name}/{weight.name}: {unique_count} clusters "
+                )
+
+
+def optimize():
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    # Create the model to optimize from checkpoint.
+    model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info)
+    model.load_weights(FLAGS.checkpoint).expect_partial()
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    # We decay learning rate in a constant piecewise way to help learning.
+    training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(',')))
+    learning_rates_list = list(map(float, FLAGS.learning_rate.split(',')))
+    lr_boundary_list = training_steps_list[:-1]  # Only need the values at which to change lr.
+    lr_schedule = tf.keras.optimizers.schedules.PiecewiseConstantDecay(boundaries=lr_boundary_list,
+                                                                       values=learning_rates_list)
+
+    cluster_weights = tfmot.clustering.keras.cluster_weights
+    CentroidInitialization = tfmot.clustering.keras.CentroidInitialization
+
+    clustering_params = {
+        'number_of_clusters': 32,
+        'cluster_centroids_init': CentroidInitialization.KMEANS_PLUS_PLUS}
+
+    clustered_model = cluster_weights(model, **clustering_params)
+
+    # Specify the optimizer configurations.
+    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
+    clustered_model.compile(optimizer=optimizer,
+                            loss=tf.keras.losses.SparseCategoricalCrossentropy(),
+                            metrics=['accuracy'])
+
+    train_data = audio_processor.get_data(audio_processor.Modes.TRAINING,
+                                          FLAGS.background_frequency, FLAGS.background_volume,
+                                          int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000))
+    train_data = train_data.repeat().batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION)
+    val_data = val_data.batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+
+    # We train for a max number of iterations so need to calculate how many 'epochs' this will be.
+    training_steps_max = np.sum(training_steps_list)
+    training_epoch_max = int(np.ceil(training_steps_max / FLAGS.eval_step_interval))
+
+    # Train the model with clustering applied.
+    clustered_model.fit(x=train_data,
+                        steps_per_epoch=FLAGS.eval_step_interval,
+                        epochs=training_epoch_max,
+                        validation_data=val_data)
+
+    stripped_clustered_model = tfmot.clustering.keras.strip_clustering(clustered_model)
+
+    print_model_weight_clusters(stripped_clustered_model)
+
+    # Save the clustered model weights
+    train_dir = Path(FLAGS.train_dir) / "optimized"
+    train_dir.mkdir(parents=True, exist_ok=True)
+
+    stripped_clustered_model.save_weights((train_dir /
+                                          (FLAGS.model_architecture +
+                                           "_clustered_ckpt")))
+
+    # Test the model.
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING)
+    test_data = test_data.batch(FLAGS.batch_size)
+
+    stripped_clustered_model.compile(optimizer=optimizer,
+                                     loss=tf.keras.losses.SparseCategoricalCrossentropy(),
+                                     metrics=['accuracy'])
+
+    test_loss, test_acc = stripped_clustered_model.evaluate(x=test_data)
+    print(f'Final test accuracy: {test_acc*100:.2f}%')
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--background_volume',
+        type=float,
+        default=0.1,
+        help="""\
+        How loud the background noise should be, between 0 and 1.
+        """)
+    parser.add_argument(
+        '--background_frequency',
+        type=float,
+        default=0.8,
+        help="""\
+        How many of the training samples have background noise mixed in.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--time_shift_ms',
+        type=float,
+        default=100.0,
+        help="""\
+        Range to randomly shift the training audio by in time.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--how_many_training_steps',
+        type=str,
+        default='3750,750',
+        help='How many training loops to run',)
+    parser.add_argument(
+        '--eval_step_interval',
+        type=int,
+        default=400,
+        help='How often to evaluate the training results.')
+    parser.add_argument(
+        '--learning_rate',
+        type=str,
+        default='0.001,0.0001',
+        help='How large a learning rate to use when training.')
+    parser.add_argument(
+        '--batch_size',
+        type=int,
+        default=100,
+        help='How many items to train with at once',)
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--train_dir',
+        type=str,
+        default='/tmp/speech_commands_train',
+        help='Directory to write event logs and checkpoint.')
+    parser.add_argument(
+        '--save_step_interval',
+        type=int,
+        default=100,
+        help='Save model checkpoint every save_steps.')
+    parser.add_argument(
+        '--checkpoint',
+        type=str,
+        help='Checkpoint to load the weights from before fine-tuning.')
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+
+    FLAGS, _ = parser.parse_known_args()
+    optimize()
diff --git a/models/keyword_spotting/dnn_large/model_package_tf/recreate_model.sh b/models/keyword_spotting/dnn_large/model_package_tf/recreate_model.sh
new file mode 100644
index 0000000..cb54318
--- /dev/null
+++ b/models/keyword_spotting/dnn_large/model_package_tf/recreate_model.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+# Copyright (C) 2023 Arm Limited or its affiliates. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+ckpt_path=model_archive/model_source/weights/dnn_0.87_ckpt
+train=false
+
+# Parse command line args
+while (( $# >= 1 )); do 
+    case $1 in
+    --ckpt)
+       if [ "$2" ]; then
+           ckpt_path=$2
+           shift
+       else
+           printf 'ERROR: "--ckpt" requires a path to be supplied.\n'
+           exit 1
+       fi
+       ;;
+    --train) 
+    	train=true
+	break;;
+    *) shift;
+    esac;
+done
+
+
+# DNN Large training
+if [ "$train" = true ]
+then
+python train.py --model_architecture dnn --model_size_info 436 436 436 --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 40 --learning_rate 0.0005,0.0001,0.00002 --how_many_training_steps 10000,10000,10000 --summaries_dir work/DNN/DNN_L/retrain_logs --train_dir work/DNN/DNN_L/training
+fi
+
+# Conversion to TFLite fp32
+python convert_to_tflite.py --model_architecture dnn --model_size_info 436 436 436 --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 40 --checkpoint $ckpt_path --no-quantize
+
+# Conversion to TFLite int8
+python convert_to_tflite.py --model_architecture dnn --model_size_info 436 436 436 --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 40 --checkpoint $ckpt_path --inference_type int8
+
diff --git a/models/keyword_spotting/dnn_large/model_package_tf/requirements.txt b/models/keyword_spotting/dnn_large/model_package_tf/requirements.txt
new file mode 100644
index 0000000..3448cff
--- /dev/null
+++ b/models/keyword_spotting/dnn_large/model_package_tf/requirements.txt
@@ -0,0 +1,3 @@
+numpy == 1.19.5
+tensorflow == 2.5.0
+tensorflow-model-optimization == 0.6.0
\ No newline at end of file
diff --git a/models/keyword_spotting/dnn_large/model_package_tf/train.py b/models/keyword_spotting/dnn_large/model_package_tf/train.py
new file mode 100644
index 0000000..8c488b3
--- /dev/null
+++ b/models/keyword_spotting/dnn_large/model_package_tf/train.py
@@ -0,0 +1,227 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for training simple keyword spotting models."""
+
+import argparse
+from pathlib import Path
+
+import tensorflow as tf
+import numpy as np
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+
+
+def train():
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    # Create the model.
+    model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info, True)
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    # We decay learning rate in a constant piecewise way to help learning.
+    training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(',')))
+    learning_rates_list = list(map(float, FLAGS.learning_rate.split(',')))
+    lr_boundary_list = training_steps_list[:-1]  # Only need the values at which to change lr.
+    lr_schedule = tf.keras.optimizers.schedules.PiecewiseConstantDecay(boundaries=lr_boundary_list,
+                                                                       values=learning_rates_list)
+
+    # Specify the optimizer configurations.
+    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
+    model.compile(optimizer=optimizer,
+                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
+                  metrics=['accuracy'])
+
+    train_data = audio_processor.get_data(audio_processor.Modes.TRAINING,
+                                          FLAGS.background_frequency, FLAGS.background_volume,
+                                          int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000))
+    train_data = train_data.repeat().batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION)
+    val_data = val_data.batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+
+    # We train for a max number of iterations so need to calculate how many 'epochs' this will be.
+    training_steps_max = np.sum(training_steps_list)
+    training_epoch_max = int(np.ceil(training_steps_max / FLAGS.eval_step_interval))
+
+    # Callbacks.
+    train_dir = Path(FLAGS.train_dir) / "best"
+    train_dir.mkdir(parents=True, exist_ok=True)
+    model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
+        filepath=(train_dir / (FLAGS.model_architecture + "_{val_accuracy:.3f}_ckpt")),
+        save_weights_only=True,
+        monitor='val_accuracy',
+        mode='max',
+        save_best_only=True)
+    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=FLAGS.summaries_dir)
+
+    # Train the model.
+    model.fit(x=train_data,
+              steps_per_epoch=FLAGS.eval_step_interval,
+              epochs=training_epoch_max,
+              validation_data=val_data,
+              callbacks=[model_checkpoint_callback, tensorboard_callback])
+
+    # Test and save the model.
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING)
+    test_data = test_data.batch(FLAGS.batch_size)
+
+    test_loss, test_acc = model.evaluate(x=test_data)
+    print(f'Final test accuracy: {test_acc*100:.2f}%')
+    model.save(f'saved_model/{FLAGS.model_architecture}')
+    model.save(f'keras/{FLAGS.model_architecture}.h5')
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--background_volume',
+        type=float,
+        default=0.1,
+        help="""\
+        How loud the background noise should be, between 0 and 1.
+        """)
+    parser.add_argument(
+        '--background_frequency',
+        type=float,
+        default=0.8,
+        help="""\
+        How many of the training samples have background noise mixed in.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--time_shift_ms',
+        type=float,
+        default=100.0,
+        help="""\
+        Range to randomly shift the training audio by in time.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--how_many_training_steps',
+        type=str,
+        default='15000,3000',
+        help='How many training loops to run',)
+    parser.add_argument(
+        '--eval_step_interval',
+        type=int,
+        default=400,
+        help='How often to evaluate the training results.')
+    parser.add_argument(
+        '--learning_rate',
+        type=str,
+        default='0.001,0.0001',
+        help='How large a learning rate to use when training.')
+    parser.add_argument(
+        '--batch_size',
+        type=int,
+        default=100,
+        help='How many items to train with at once',)
+    parser.add_argument(
+        '--summaries_dir',
+        type=str,
+        default='/tmp/retrain_logs',
+        help='Where to save summary logs for TensorBoard.')
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--train_dir',
+        type=str,
+        default='/tmp/speech_commands_train',
+        help='Directory to write event logs and checkpoint.')
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+
+    FLAGS, _ = parser.parse_known_args()
+    train()
diff --git a/models/keyword_spotting/dnn_large/model_package_tf/validation_utils/labels.txt b/models/keyword_spotting/dnn_large/model_package_tf/validation_utils/labels.txt
new file mode 100644
index 0000000..ba41645
--- /dev/null
+++ b/models/keyword_spotting/dnn_large/model_package_tf/validation_utils/labels.txt
@@ -0,0 +1,12 @@
+_silence_
+_unknown_
+yes
+no
+up
+down
+left
+right
+on
+off
+stop
+go
\ No newline at end of file
diff --git a/models/keyword_spotting/dnn_large/tflite_int8/README.md b/models/keyword_spotting/dnn_large/tflite_int8/README.md
deleted file mode 100644
index 40a0507..0000000
--- a/models/keyword_spotting/dnn_large/tflite_int8/README.md
+++ /dev/null
@@ -1,59 +0,0 @@
-# DNN Large INT8
-
-## Description
-This is a fully quantized version (asymmetrical int8) of the DNN Large model developed by Arm, with training checkpoints, from the Hello Edge paper. Code to recreate this model can be found here: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m
-
-## License
-[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
-
-## Related Materials
-### Class Labels
-The class labels associated with this model can be downloaded by running the script `get_class_labels.sh`.
-
-### Model Recreation Code
-Code to recreate this model can be found here: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m.
-
-## Network Information
-| Network Information |  Value         |
-|---------------------|------------------|
-|  Framework          | TensorFlow Lite |
-|  SHA-1 Hash         | 2b1ee34e4c87ba6f24092c7457593227099efaf1 |
-|  Size (Bytes)       | 502272 |
-|  Provenance         | https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m |
-|  Paper              | https://arxiv.org/abs/1711.07128 |
-
-## Accuracy
-Dataset: Google Speech Commands Test Set
-
-| Metric | Value |
-|--------|-------|
-| Accuracy | 0.863 |
-
-## Performance
-| Platform | Optimized |
-|----------|:---------:|
-| Cortex-A |:heavy_check_mark:         |
-| Cortex-M |:heavy_check_mark:         |
-| Mali GPU |:heavy_check_mark:         |
-| Ethos U  |:heavy_check_mark:         |
-
-### Key
-* :heavy_check_mark: - Will run on this platform.
-* :heavy_multiplication_x: - Will not run on this platform.
-
-
-
-## Optimizations
-| Optimization |  Value  |
-|-----------------|---------|
-| Quantization | INT8 |
-
-## Network Inputs
-| Input Node Name |  Shape  | Description |
-|-----------------|---------|-------------|
-| input | (1, 250) | The input is a processed MFCCs of shape (1, 250) |
-
-## Network Outputs
-| Output Node Name |  Shape  | Description |
-|------------------|---------|-------------|
-| Identity | (1, 12) | The probability on 12 keywords. |
diff --git a/models/keyword_spotting/dnn_large/tflite_int8/definition.yaml b/models/keyword_spotting/dnn_large/tflite_int8/definition.yaml
deleted file mode 100644
index 68c8968..0000000
--- a/models/keyword_spotting/dnn_large/tflite_int8/definition.yaml
+++ /dev/null
@@ -1,41 +0,0 @@
-benchmark:
-  Google Speech Commands test set:
-    Accuracy: 86.26%
-description: 'This is a fully quantized version (asymmetrical int8) of the DNN Large
-  model developed by Arm, with training checkpoints, from the Hello Edge paper. Code
-  to recreate this model can be found here: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m'
-license:
-- Apache-2.0
-network:
-  file_size_bytes: 502272
-  filename: dnn_l_quantized.tflite
-  framework: TensorFlow Lite
-  hash:
-    algorithm: sha1
-    value: 2b1ee34e4c87ba6f24092c7457593227099efaf1
-  provenance: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m
-  quality_level: null
-network_parameters:
-  input_nodes:
-  - description: The input is a processed MFCCs of shape (1, 250)
-    example_input:
-      path: models/keyword_spotting/dnn_large/tflite_int8/testing_input/input
-    name: input
-    shape:
-    - 1
-    - 250
-  output_nodes:
-  - description: The probability on 12 keywords.
-    name: Identity
-    shape:
-    - 1
-    - 12
-    test_output_path: models/keyword_spotting/dnn_large/tflite_int8/testing_output/Identity
-operators:
-  TensorFlow Lite:
-  - DEQUANTIZE
-  - FULLY_CONNECTED
-  - QUANTIZE
-  - RELU
-  - SOFTMAX
-paper: https://arxiv.org/abs/1711.07128
diff --git a/models/keyword_spotting/dnn_large/tflite_int8/get_class_labels.sh b/models/keyword_spotting/dnn_large/tflite_int8/get_class_labels.sh
deleted file mode 100755
index e59caf5..0000000
--- a/models/keyword_spotting/dnn_large/tflite_int8/get_class_labels.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (C) 2021 Arm Limited or its affiliates. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the License); you may
-# not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an AS IS BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#!/usr/bin/env bash
-
-wget https://raw.githubusercontent.com/ARM-software/ML-KWS-for-MCU/e9cf319e9aa2ff71d433e111477dd95329fb94cb/Pretrained_models/labels.txt
-mv labels.txt labelmappings.txt
\ No newline at end of file
diff --git a/models/keyword_spotting/dnn_medium/model_package_tf/README.md b/models/keyword_spotting/dnn_medium/model_package_tf/README.md
new file mode 100644
index 0000000..8005a3c
--- /dev/null
+++ b/models/keyword_spotting/dnn_medium/model_package_tf/README.md
@@ -0,0 +1,115 @@
+# DNN Medium model package
+
+This folder contains code that will allow you to recreate the DNN Medium keyword spotting model from
+the [Hello Edge paper](https://arxiv.org/pdf/1711.07128.pdf).
+
+## License
+[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
+
+## Model Package Overview
+| Model           	|  DNN_Medium                            	   |
+|:---------------:	|:------------------------------------------:|
+| <u>**Format**</u>:          	| Keras, Saved Model, TensorFlow Lite int8, TensorFlow Lite fp32 |
+| <u>**Feature**</u>:         	|   Keyword spotting for Arm Cortex-M CPUs   |
+| <u>**Architectural Delta w.r.t. Vanilla**</u>: |                    None                    |
+| <u>**Domain**</u>:         	|              Keyword spotting              |
+| <u>**Package Quality**</u>: 	|                 Optimised                  |
+
+## Model Recreation
+
+In order to recreate the model you will first need to be using ```Python3.7``` and install the requirements in ```requirements.txt```.
+
+Once you have these requirements satisfied you can execute the recreation script contained within this folder, just run:
+
+```bash
+bash ./recreate_model.sh
+```
+
+Running this script will use the pre-trained checkpoint files supplied in the ```./model_archive/model_source/weights``` folder
+to generate the TFLite files and perform evaluation on the test sets. Both an fp32 version and a quantized version will be produced.
+The quantized version will use post-training quantization to fully quantize it.
+
+If you want to run training from scratch you can do this by supplying ```--train``` when running the script. For example:
+
+```bash
+bash ./recreate_model.sh --train
+```
+
+Training is then performed and should produce a model to the stated accuracy in this repository.
+Note that exporting to TFLite will still happen with the pre-trained checkpoint files so you will need to re-run the script
+and this time supply the path to the new checkpoint files you want to use, for example:
+
+```bash
+bash ./recreate_model.sh --ckpt <checkpoint_path>
+```
+
+
+## Training
+
+To train a DNN with 3 fully-connected layers with 128 neurons in each layer, run:
+
+```
+python train.py --model_architecture dnn --model_size_info 128 128 128
+```
+The command line argument *--model_size_info* is used to pass the neural network layer
+dimensions such as number of layers, convolution filter size/stride as a list to models.py,
+which builds the TensorFlow graph based on the provided model architecture
+and layer dimensions. For more info on *model_size_info* for each network architecture see
+[models.py](models.py).
+
+The training commands with all the hyperparameters to reproduce the models shown in the
+[paper](https://arxiv.org/pdf/1711.07128.pdf) are given [here](recreate_model.sh).
+
+## Testing
+To run inference on the trained model from a checkpoint and get accuracy on validation and test sets, run:
+```
+python evaluation.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>
+```
+The parameters used here should match those used in the Training step.
+
+## Optimization
+
+We introduce a new *optional* step to optimize the trained keyword spotting model for deployment.
+
+Here we use TensorFlow's [weight clustering API](https://www.tensorflow.org/model_optimization/guide/clustering) to reduce the compressed model size and optimize inference on supported hardware. 32 weight clusters and kmeans++ cluster intialization method are used as the clustering hyperparameters.
+
+To optimize your trained model (e.g. a DNN), a trained model checkpoint is needed to run clustering and fine-tuning on.
+You can use the pre-trained checkpoints provided, or train your own model and use the resulting checkpoint.
+
+To apply the optimization and fine-tuning, run the following command:
+```
+python optimisations.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>
+```
+The parameters used here should match those used in the Training step, except for the number of training steps.
+The number of training steps is reduced since the optimization step only requires fine-tuning.
+
+This will generate a clustered model checkpoint that can be used in the quantization step to generate a quantized and clustered TFLite model.
+
+## Quantization and TFLite Conversion
+
+As part of the update we now use TensorFlow's
+[post training quantization](https://www.tensorflow.org/lite/performance/post_training_quantization) to
+make quantization of the trained models super simple.
+
+To quantize your trained model (e.g. a DNN) run:
+```
+python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> [--inference_type int8|int16]
+```
+The parameters used here should match those used in the Training step.
+
+The inference_type parameter is *optional* and to be used if a fully quantized model with inputs and outputs of type int8 or int16 is needed. It defaults to fp32.
+
+This step will produce a quantized TFLite file *dnn_quantized.tflite*.
+You can test the accuracy of this quantized model on the test set by running:
+```
+python evaluation.py --tflite_path dnn_quantized.tflite
+```
+The parameters used here should match those used in the Training step.
+
+`convert_to_tflite.py` uses post-training quantization to generate a quantized model by default. If you wish to convert to a floating point TFLite model, use the command below:
+
+```
+python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> --no-quantize
+```
+
+This will produce a floating point TFLite file *dnn.tflite*. You can test the accuracy of this floating point model using `evaluation.py` as above.
diff --git a/models/keyword_spotting/dnn_medium/model_package_tf/convert_to_tflite.py b/models/keyword_spotting/dnn_medium/model_package_tf/convert_to_tflite.py
new file mode 100644
index 0000000..64ab8df
--- /dev/null
+++ b/models/keyword_spotting/dnn_medium/model_package_tf/convert_to_tflite.py
@@ -0,0 +1,234 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for converting and quantizing a trained keyword spotting
+   model and saving to TFLite."""
+
+import argparse
+
+import tensorflow as tf
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+from evaluation import tflite_test
+
+NUM_REP_DATA_SAMPLES = 100  # How many samples to use for post training quantization.
+
+
+def convert(model_settings, audio_processor, checkpoint, quantize, inference_type, tflite_path):
+    """Load our trained floating point model and convert it.
+
+    TFLite conversion or post training quantization is performed and the
+    resulting model is saved as a TFLite file.
+    We use samples from the validation set to do post training quantization.
+
+    Args:
+        model_settings: Dictionary of common model settings.
+        audio_processor: Audio processor class object.
+        checkpoint: Path to training checkpoint to load.
+        quantize: Whether to quantize the model or convert to fp32 TFLite model.
+        inference_type: Input/output type of the quantized model.
+        tflite_path: Output TFLite file save path.
+    """
+    model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info, False)
+    model.load_weights(checkpoint).expect_partial()
+
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION).batch(1)
+
+    def _rep_dataset():
+        """Generator function to produce representative dataset."""
+        i = 0
+        for mfcc, label in val_data:
+            if i > NUM_REP_DATA_SAMPLES:
+                break
+            i += 1
+            yield [mfcc]
+
+    if quantize:
+        # Quantize model and save to disk.
+        tflite_model = post_training_quantize(model, inference_type, _rep_dataset)
+        with open(tflite_path, 'wb') as f:
+            f.write(tflite_model)
+        print(f'Quantized model saved to {tflite_path}.')
+    else:
+        converter = tf.lite.TFLiteConverter.from_keras_model(model)
+        tflite_model = converter.convert()
+        with open(tflite_path, 'wb') as f:
+            f.write(tflite_model)
+        print(f'Converted model saved to {tflite_path}.')
+
+
+def post_training_quantize(keras_model, inference_type, rep_dataset):
+    """Perform post training quantization and returns the TFLite model ready for saving.
+
+    See https://www.tensorflow.org/lite/performance/post_training_quantization#full_integer_quantization for
+    more details.
+
+    Args:
+        keras_model: The trained tf Keras model used for post training quantization.
+        inference_type: Input/output type of the quantized model.
+        rep_dataset: Function to use as a representative dataset, must be callable.
+
+    Returns:
+        Quantized TFLite model ready for saving to disk.
+    """
+    converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
+    converter.optimizations = [tf.lite.Optimize.DEFAULT]
+
+    if inference_type == 'int8':
+        converter.inference_input_type = tf.int8
+        converter.inference_output_type = tf.int8
+        supported_ops = tf.lite.OpsSet.TFLITE_BUILTINS_INT8
+    if inference_type == 'int16':
+        converter.inference_input_type = tf.int16
+        converter.inference_output_type = tf.int16
+        supported_ops = tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8
+
+    # Int8 post training quantization needs representative dataset.
+    converter.representative_dataset = rep_dataset
+    converter.target_spec.supported_ops = [supported_ops]
+
+    tflite_model = converter.convert()
+
+    return tflite_model
+
+
+def main():
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    if FLAGS.quantize:
+        tflite_path = f'{FLAGS.model_architecture}_quantized.tflite'
+    else:
+        tflite_path = f'{FLAGS.model_architecture}.tflite'
+
+    # Load floating point model from checkpoint and convert it.
+    convert(model_settings, audio_processor, FLAGS.checkpoint,
+            FLAGS.quantize, FLAGS.inference_type, tflite_path)
+
+    # Test the newly converted model on the test set.
+    tflite_test(model_settings, audio_processor, tflite_path)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+    parser.add_argument(
+        '--checkpoint',
+        type=str,
+        help='Checkpoint to load the weights from.')
+    parser.add_argument(
+        '--quantize',
+        dest='quantize',
+        action="store_true",
+        default=True,
+        help='Whether to quantize the model or convert to fp32 TFLite model. Defaults to True.')
+    parser.add_argument(
+        '--no-quantize',
+        dest='quantize',
+        action="store_false",
+        help='Whether to quantize the model or convert to fp32 TFLite model. Defaults to True.')
+    parser.add_argument(
+        '--inference_type',
+        type=str,
+        default='fp32',
+        help='If quantize is true, whether the model input and output is float32, int8 or int16')
+
+    FLAGS, _ = parser.parse_known_args()
+    main()
diff --git a/models/keyword_spotting/dnn_medium/model_package_tf/data_processing/__init__.py b/models/keyword_spotting/dnn_medium/model_package_tf/data_processing/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/models/keyword_spotting/dnn_medium/model_package_tf/data_processing/data_preprocessing.py b/models/keyword_spotting/dnn_medium/model_package_tf/data_processing/data_preprocessing.py
new file mode 100644
index 0000000..05cf5ba
--- /dev/null
+++ b/models/keyword_spotting/dnn_medium/model_package_tf/data_processing/data_preprocessing.py
@@ -0,0 +1,462 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Modifications Copyright 2023 Arm Inc. All Rights Reserved.
+# Modified to use TensorFlow 2.0 and data pipelines.
+#
+"""Functions for loading and preparing data for keyword spotting."""
+
+import os
+import re
+import sys
+import urllib
+from pathlib import Path
+import tarfile
+import hashlib
+import random
+import math
+from enum import Enum
+
+import numpy as np
+import tensorflow as tf
+from tensorflow.python.ops import gen_audio_ops as audio_ops
+
+MAX_NUM_WAVS_PER_CLASS = 2**27 - 1  # ~134M
+RANDOM_SEED = 59185
+BACKGROUND_NOISE_DIR_NAME = '_background_noise_'
+SILENCE_LABEL = '_silence_'
+SILENCE_INDEX = 0
+UNKNOWN_WORD_INDEX = 1
+UNKNOWN_WORD_LABEL = '_unknown_'
+
+
+def load_wav_file(wav_filename, desired_samples):
+    """Loads and then decodes a given 16bit PCM wav file.
+
+    Decoded audio is scaled to the range [-1, 1] and padded or cropped to the desired number of samples.
+
+    Args:
+        wav_filename: 16bit PCM wav file to load.
+        desired_samples: Number of samples wanted from the audio file.
+
+    Returns:
+        Tuple consisting of the decoded audio and sample rate.
+    """
+    wav_file = tf.io.read_file(wav_filename)
+    decoded_wav = audio_ops.decode_wav(wav_file, desired_channels=1, desired_samples=desired_samples)
+
+    return decoded_wav.audio, decoded_wav.sample_rate
+
+
+def calculate_mfcc(audio_signal, audio_sample_rate, window_size, window_stride, num_mfcc):
+    """Returns Mel Frequency Cepstral Coefficients (MFCC) for a given audio signal.
+
+    Args:
+        audio_signal: Raw audio signal in range [-1, 1]
+        audio_sample_rate: Audio signal sample rate
+        window_size: Window size in samples for calculating spectrogram
+        window_stride: Window stride in samples for calculating spectrogram
+        num_mfcc: The number of MFCC features wanted.
+
+    Returns:
+        Calculated mffc features.
+    """
+    spectrogram = audio_ops.audio_spectrogram(input=audio_signal, window_size=window_size, stride=window_stride,
+                                              magnitude_squared=True)
+
+    mfcc_features = audio_ops.mfcc(spectrogram, audio_sample_rate, dct_coefficient_count=num_mfcc)
+
+    return mfcc_features
+
+
+def which_set(filename, validation_percentage, testing_percentage):
+    """Determines which data partition the file should belong to.
+
+    We want to keep files in the same training, validation, or testing sets even
+    if new ones are added over time. This makes it less likely that testing
+    samples will accidentally be reused in training when long runs are restarted
+    for example. To keep this stability, a hash of the filename is taken and used
+    to determine which set it should belong to. This determination only depends on
+    the name and the set proportions, so it won't change as other files are added.
+    It's also useful to associate particular files as related (for example words
+    spoken by the same person), so anything after '_nohash_' in a filename is
+    ignored for set determination. This ensures that 'bobby_nohash_0.wav' and
+    'bobby_nohash_1.wav' are always in the same set, for example.
+
+    Args:
+        filename: File path of the data sample.
+        validation_percentage: How much of the data set to use for validation.
+        testing_percentage: How much of the data set to use for testing.
+
+    Returns:
+        String, one of 'training', 'validation', or 'testing'.
+    """
+    base_name = os.path.basename(filename)
+    # We want to ignore anything after '_nohash_' in the file name when
+    # deciding which set to put a wav in, so the data set creator has a way of
+    # grouping wavs that are close variations of each other.
+    hash_name = re.sub(r'_nohash_.*$', '', base_name)
+    # This looks a bit magical, but we need to decide whether this file should
+    # go into the training, testing, or validation sets, and we want to keep
+    # existing files in the same set even if more files are subsequently
+    # added.
+    # To do that, we need a stable way of deciding based on just the file name
+    # itself, so we do a hash of that and then use that to generate a
+    # probability value that we use to assign it.
+    hash_name_hashed = hashlib.sha1(tf.compat.as_bytes(hash_name)).hexdigest()
+    percentage_hash = ((int(hash_name_hashed, 16) %
+                       (MAX_NUM_WAVS_PER_CLASS + 1)) *
+                       (100.0 / MAX_NUM_WAVS_PER_CLASS))
+    if percentage_hash < validation_percentage:
+        result = 'validation'
+    elif percentage_hash < (testing_percentage + validation_percentage):
+        result = 'testing'
+    else:
+        result = 'training'
+    return result
+
+
+def prepare_words_list(wanted_words):
+    """Prepends common tokens to the custom word list.
+
+    Args:
+        wanted_words: List of strings containing custom words to spot.
+
+    Returns:
+        List of words with silence and unknown tokens added.
+    """
+    return [SILENCE_LABEL, UNKNOWN_WORD_LABEL] + wanted_words
+
+
+class AudioProcessor:
+    """Handles loading, partitioning, and preparing audio training data."""
+
+    class Modes(Enum):
+        TRAINING = 1
+        VALIDATION = 2
+        TESTING = 3
+
+    def __init__(self, data_url, data_dir, silence_percentage, unknown_percentage,
+                 wanted_words, validation_percentage, testing_percentage, model_settings):
+        self.data_dir = Path(data_dir)
+        self.model_settings = model_settings
+        self.words_list = prepare_words_list(wanted_words)
+
+        self._tf_datasets = {}
+        self.background_data = None
+        self._set_size = {'training': 0, 'validation': 0, 'testing': 0}
+
+        self._download_and_extract_data(data_url, data_dir)
+        self._prepare_datasets(silence_percentage, unknown_percentage, wanted_words,
+                               validation_percentage, testing_percentage)
+        self._prepare_background_data()
+
+    def get_data(self, mode, background_frequency=0, background_volume_range=0, time_shift=0):
+        """Returns the train, validation or test set for KWS as a TF Dataset.
+
+        Args:
+            mode: The set to return, see AudioProcessor.Modes enumeration.
+            background_frequency: How many of the samples have background noise mixed in.
+            background_volume_range: How loud the background noise should be, between 0 and 1.
+            time_shift: Range to randomly shift the training audio by in time.
+
+        Returns:
+            TF dataset that will generate tuples containing an mfcc and corresponding label.
+
+        Raises:
+            ValueError: If mode is not recognised.
+        """
+        if mode == AudioProcessor.Modes.TRAINING:
+            dataset = self._tf_datasets['training']
+        elif mode == AudioProcessor.Modes.VALIDATION:
+            dataset = self._tf_datasets['validation']
+        elif mode == AudioProcessor.Modes.TESTING:
+            dataset = self._tf_datasets['testing']
+        else:
+            ValueError("Incorrect dataset type given")
+
+        use_background = (self.background_data is not None) and (mode == AudioProcessor.Modes.TRAINING)
+        dataset = dataset.map(lambda path, label: self._process_path(path, label, self.model_settings,
+                                                                     background_frequency, background_volume_range,
+                                                                     time_shift, use_background, self.background_data),
+                              num_parallel_calls=tf.data.experimental.AUTOTUNE)
+
+        return dataset
+
+    def set_size(self, mode):
+        """Get the number of samples in the requested dataset partition.
+
+        Args:
+            mode: Which partition, see AudioProcessor.Modes enumeration.
+
+        Returns:
+            Number of samples in the partition.
+
+        Raises:
+            ValueError: If mode is not recognised.
+        """
+        if mode == AudioProcessor.Modes.TRAINING:
+            return self._set_size['training']
+        elif mode == AudioProcessor.Modes.VALIDATION:
+            return self._set_size['validation']
+        elif mode == AudioProcessor.Modes.TESTING:
+            return self._set_size['testing']
+        else:
+            ValueError('Incorrect dataset type given')
+
+    @staticmethod
+    def _process_path(path, label, model_settings, background_frequency, background_volume_range, time_shift_samples,
+                      use_background, background_data):
+        """Load wav files and calculate mfcc features.
+
+        Random shifting of samples and adding in background noise is done within this function as well.
+        This function is meant to be mapped onto a TF Dataset by using a lambda function.
+
+        Args:
+            path: Path to the wav file to load.
+            label: Integer label for classifying the audio clip.
+            model_settings: Dictionary of settings for model being trained.
+            background_frequency: How many clips will have background noise, 0.0 to 1.0.
+            background_volume_range: How loud the background noise will be.
+            time_shift_samples: How much to randomly shift the clips by.
+            use_background: Add in background noise to audio clips or not.
+            background_data: Ragged tensor of loaded background noise samples.
+
+        Returns:
+            Tuple of calculated flattened mfcc and its class label.
+        """
+
+        desired_samples = model_settings['desired_samples']
+        audio, sample_rate = load_wav_file(path, desired_samples=desired_samples)
+
+        # Make our own silence audio data.
+        if label == SILENCE_INDEX:
+            audio = tf.multiply(audio, 0)
+
+        # Shift samples start position and pad any gaps with zeros.
+        if time_shift_samples > 0:
+            time_shift_amount = tf.random.uniform(shape=(), minval=-time_shift_samples, maxval=time_shift_samples,
+                                                  dtype=tf.int32)
+        else:
+            time_shift_amount = 0
+        if time_shift_amount > 0:
+            time_shift_padding = [[time_shift_amount, 0], [0, 0]]
+            time_shift_offset = [0, 0]
+        else:
+            time_shift_padding = [[0, -time_shift_amount], [0, 0]]
+            time_shift_offset = [-time_shift_amount, 0]
+
+        padded_foreground = tf.pad(audio, time_shift_padding, mode='CONSTANT')
+        sliced_foreground = tf.slice(padded_foreground, time_shift_offset, [desired_samples, -1])
+
+        # Get a random section of background noise.
+        if use_background:
+            background_index = tf.random.uniform(shape=(), maxval=background_data.shape[0], dtype=tf.int32)
+            background_sample = background_data[background_index]
+            background_offset = tf.random.uniform(shape=(), maxval=len(background_sample)-desired_samples,
+                                                  dtype=tf.int32)
+            background_clipped = background_sample[background_offset:(background_offset + desired_samples)]
+            background_reshaped = tf.reshape(background_clipped, [desired_samples, 1])
+            if tf.random.uniform(shape=(), maxval=1) < background_frequency:
+                background_volume = tf.random.uniform(shape=(), maxval=background_volume_range)
+            else:
+                background_volume = tf.constant(0, dtype='float32')
+        else:
+            background_reshaped = np.zeros([desired_samples, 1], dtype=np.float32)
+            background_volume = tf.constant(0, dtype='float32')
+
+        # Mix in background noise.
+        background_mul = tf.multiply(background_reshaped, background_volume)
+        background_add = tf.add(background_mul, sliced_foreground)
+        background_clamp = tf.clip_by_value(background_add, -1.0, 1.0)
+
+        mfcc = calculate_mfcc(background_clamp, sample_rate, model_settings['window_size_samples'],
+                              model_settings['window_stride_samples'],
+                              model_settings['dct_coefficient_count'])
+        mfcc = tf.reshape(mfcc, [-1])
+
+        return mfcc, label
+
+    def _download_and_extract_data(self, data_url, target_directory):
+        """Downloads and extracts file to target directory.
+
+        If the file does not already exist download it and then untar into the target directory.
+
+        Args:
+            data_url: Web link to the tarred data to download.
+            target_directory: Directory to download and extract to.
+        """
+        target_directory = Path(target_directory)
+        target_directory.mkdir(exist_ok=True)
+
+        filename = data_url.split('/')[-1]
+        filepath = target_directory / filename
+
+        if not filepath.exists():
+            def _report_hook(block_num, block_size, total_size):
+                """Function to track download progress in urllib"""
+                read_so_far = block_num * block_size
+                percent = (read_so_far / total_size) * 100.0
+
+                s = f"\rDownloading {filename} {percent:.1f}%"
+
+                sys.stdout.write(s)
+                sys.stdout.flush()
+
+            filepath, _ = urllib.request.urlretrieve(data_url, filepath, _report_hook)
+            print()
+
+        print(f'Untarring {filename}...')
+        tarfile.open(filepath, 'r:gz').extractall(target_directory)
+
+    def _prepare_datasets(self, silence_percentage, unknown_percentage, wanted_words,
+                          validation_percentage, testing_percentage):
+        """Split the data into train, validation and testing sets.
+
+        Silence and unknown data is added, then sets are converted to TF Datasets.
+
+        Args:
+            silence_percentage: Percent of words should be silence.
+            unknown_percentage: Percent of words that should be unknown.
+            wanted_words: List of words wanted to classify.
+            validation_percentage: Percent to split off for validation.
+            testing_percentage: Percent to split off for testing.
+        """
+        # Make sure the shuffling and picking of unknowns is deterministic.
+        random.seed(RANDOM_SEED)
+        wanted_words_index = {}
+
+        for index, wanted_word in enumerate(wanted_words):
+            wanted_words_index[wanted_word] = index + 2
+
+        # Find all wav files in subfolders.
+        search_path = self.data_dir / '*' / '*.wav'
+        data_index, unknown_index, all_words = self._find_and_sort_wavs(search_path, validation_percentage,
+                                                                        testing_percentage, wanted_words_index)
+
+        for index, wanted_word in enumerate(wanted_words):
+            if wanted_word not in all_words:
+                raise Exception(f'Tried to find {wanted_word} in labels but only found: {", ".join(all_words.keys())}')
+
+        word_to_index = {}
+        for word in all_words:
+            if word in wanted_words_index:
+                word_to_index[word] = wanted_words_index[word]
+            else:
+                word_to_index[word] = UNKNOWN_WORD_INDEX
+        word_to_index[SILENCE_LABEL] = SILENCE_INDEX
+
+        # We need an arbitrary file to load as the input for the silence samples.
+        # It's multiplied by zero later, so the content doesn't matter.
+        silence_wav_path = data_index['training'][0]['file']
+        for set_index in ['validation', 'testing', 'training']:
+            set_size = len(data_index[set_index])  # Size before adding silence and unknown samples.
+            silence_size = int(math.ceil(set_size * silence_percentage / 100))
+            for _ in range(silence_size):
+                data_index[set_index].append({
+                    'label': SILENCE_LABEL,
+                    'file': silence_wav_path
+                })
+            # Pick some unknowns to add to each partition of the data set.
+            random.shuffle(unknown_index[set_index])
+            unknown_size = int(math.ceil(set_size * unknown_percentage / 100))
+            data_index[set_index].extend(unknown_index[set_index][:unknown_size])
+
+            self._set_size[set_index] = len(data_index[set_index])  # Size after adding silence and unknown samples.
+
+            # Make sure the ordering is random.
+            random.shuffle(data_index[set_index])
+
+            # Transform into TF Datasets ready for easier processing later.
+            labels, paths = list(zip(*[d.values() for d in data_index[set_index]]))
+            labels = [word_to_index[label] for label in labels]
+            self._tf_datasets[set_index] = tf.data.Dataset.from_tensor_slices((list(paths), labels))
+
+    def _find_and_sort_wavs(self, search_pattern, validation_percentage, testing_percentage, wanted_words_index):
+        """Find and sort wav files into known and unknown word sets.
+
+        Known words are files containing words in the list of wanted words.
+        Any other clip goes to the unknown label set. Labels come from the folder names.
+        All clips are also assigned to train, test and validation sets.
+
+        Args:
+            search_pattern: Path pattern used by glob to find wav files.
+            validation_percentage: Percent to split off for validation.
+            testing_percentage: Percent to split off for testing.
+            wanted_words_index: Dict mapping wanted words to their label index.
+
+        Returns:
+            3-tuple of known words, unknown words and mapping of all word labels.
+        """
+        data_index = {'validation': [], 'testing': [], 'training': []}
+        unknown_index = {'validation': [], 'testing': [], 'training': []}
+        all_words = {}
+
+        for wav_path in sorted(tf.io.gfile.glob(str(search_pattern))):
+            word = Path(wav_path).parent.name.lower()
+
+            # Treat the '_background_noise_' folder as a special case, since we expect
+            # it to contain long audio samples we mix in to improve training.
+            if word == BACKGROUND_NOISE_DIR_NAME:
+                continue
+
+            all_words[word] = True
+            set_index = which_set(wav_path, validation_percentage, testing_percentage)
+            # If it's a known class, store its detail, otherwise add it to the list
+            # we'll use to train the unknown label.
+            if word in wanted_words_index:
+                data_index[set_index].append({'label': word, 'file': wav_path})
+            else:
+                unknown_index[set_index].append({'label': word, 'file': wav_path})
+        if not all_words:
+            raise Exception('No .wavs found at ' + str(search_pattern))
+
+        return data_index, unknown_index, all_words
+
+    def _prepare_background_data(self):
+        """Searches a folder for background noise audio, and loads it into memory.
+
+        It's expected that the background audio samples will be in a subdirectory
+        named '_background_noise_' inside the 'data_dir' folder, as .wavs that match
+        the sample rate of the training data, but can be much longer in duration.
+
+        If the '_background_noise_' folder doesn't exist at all, this isn't an
+        error, it's just taken to mean that no background noise augmentation should
+        be used. If the folder does exist, but it's empty, that's treated as an
+        error.
+
+        Returns:
+          Ragged tensor of raw PCM-encoded audio samples of background noise.
+          None if '_background_noise_' folder doesnt exist.
+
+        Raises:
+          Exception: If files aren't found in the folder.
+        """
+        background_data = []
+        background_dir = Path(self.data_dir / BACKGROUND_NOISE_DIR_NAME)
+        if not background_dir.exists():
+            self.background_data = None
+            return
+
+        search_path = Path(background_dir / '*.wav')
+        for wav_path in tf.io.gfile.glob(str(search_path)):
+            wav_data, _ = load_wav_file(wav_path, desired_samples=-1)
+            background_data.append(tf.reshape(wav_data, [-1]))
+
+        if not background_data:
+            raise Exception('No background wav files were found in ' + str(search_path))
+
+        # Ragged tensor as we cant use lists in tf dataset map functions.
+        self.background_data = tf.ragged.stack(background_data)
diff --git a/models/keyword_spotting/dnn_medium/model_package_tf/dnn_m_inference_keras.py b/models/keyword_spotting/dnn_medium/model_package_tf/dnn_m_inference_keras.py
new file mode 100644
index 0000000..db7694a
--- /dev/null
+++ b/models/keyword_spotting/dnn_medium/model_package_tf/dnn_m_inference_keras.py
@@ -0,0 +1,76 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from data_processing.data_preprocessing import load_wav_file, calculate_mfcc
+
+import tensorflow as tf
+import argparse
+
+
+def load_labels(filename):
+    """Read in labels, one label per line."""
+    f = open(filename, "r")
+    return f.read().splitlines()
+
+
+def main():
+    window_size_samples = int(FLAGS.sample_rate * FLAGS.window_size_ms / 1000)
+    window_stride_samples = int(FLAGS.sample_rate * FLAGS.window_stride_ms / 1000)
+    decoded, sample = load_wav_file(FLAGS.wav, FLAGS.sample_rate)
+    x = calculate_mfcc(decoded, sample, window_size_samples, window_stride_samples, FLAGS.dct_coefficient_count)
+    x = tf.reshape(x, [1, -1])
+
+    model = tf.keras.models.load_model(FLAGS.keras_file_path)
+    predictions = model.predict(x)
+
+    # Sort to show labels in order of confidence
+    top_k = predictions[0].argsort()[-1:][::-1]
+    for node_id in top_k:
+        human_string = load_labels(FLAGS.labels)[int(node_id)]
+        score = predictions[0,node_id]
+        print(f'model predicted: {human_string} with score {score:.5f}')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--wav', type=str, default='', help='Audio file to be identified.')
+    parser.add_argument(
+        '--labels', type=str, default='', help='Path to file containing labels.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs', )
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint', )
+    parser.add_argument(
+        '--keras_file_path',
+        type=str,
+        default='',
+        help='Path to the .h5 Keras model file to use for testing.')
+    FLAGS, unparsed = parser.parse_known_args()
+    main()
diff --git a/models/keyword_spotting/dnn_medium/model_package_tf/dnn_m_inference_tflite.py b/models/keyword_spotting/dnn_medium/model_package_tf/dnn_m_inference_tflite.py
new file mode 100644
index 0000000..9f79d99
--- /dev/null
+++ b/models/keyword_spotting/dnn_medium/model_package_tf/dnn_m_inference_tflite.py
@@ -0,0 +1,120 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from data_processing.data_preprocessing import load_wav_file, calculate_mfcc
+
+import tensorflow as tf
+import numpy as np
+import argparse
+
+
+def tflite_inference(input_data, tflite_path):
+    """Call forwards pass of TFLite file and returns the result.
+
+    Args:
+        input_data: Input data to use on forward pass.
+        tflite_path: Path to TFLite file to run.
+
+    Returns:
+        Output from inference.
+    """
+    supported_quant_dtypes = (np.int8, np.int16)
+    interpreter = tf.lite.Interpreter(model_path=tflite_path)
+    interpreter.allocate_tensors()
+
+    input_details = interpreter.get_input_details()
+    output_details = interpreter.get_output_details()
+
+    input_dtype = input_details[0]["dtype"]
+    output_dtype = output_details[0]["dtype"]
+
+    # Check if the input/output type is quantized,
+    # set scale and zero-point accordingly
+    if input_dtype in supported_quant_dtypes:
+        input_scale, input_zero_point = input_details[0]["quantization"]
+    else:
+        input_scale, input_zero_point = 1, 0
+
+    input_data = input_data / input_scale + input_zero_point
+    input_data = np.round(input_data) if input_dtype in supported_quant_dtypes else input_data
+
+    if output_dtype in supported_quant_dtypes:
+        output_scale, output_zero_point = output_details[0]["quantization"]
+    else:
+        output_scale, output_zero_point = 1, 0
+
+    interpreter.set_tensor(input_details[0]['index'], tf.cast(input_data, input_dtype))
+    interpreter.invoke()
+
+    output_data = interpreter.get_tensor(output_details[0]['index'])
+
+    output_data = output_scale * (output_data.astype(np.float32) - output_zero_point)
+
+    return output_data
+
+
+def load_labels(filename):
+    """Read in labels, one label per line."""
+    f = open(filename, "r")
+    return f.read().splitlines()
+
+
+def main():
+    window_size_samples = int(FLAGS.sample_rate * FLAGS.window_size_ms / 1000)
+    window_stride_samples = int(FLAGS.sample_rate * FLAGS.window_stride_ms / 1000)
+    decoded, sample = load_wav_file(FLAGS.wav, FLAGS.sample_rate)
+    x = calculate_mfcc(decoded, sample, window_size_samples, window_stride_samples, FLAGS.dct_coefficient_count)
+    x = tf.reshape(x, [1, -1])
+    predictions = tflite_inference(x, FLAGS.tflite_path)
+
+    # Sort to show labels in order of confidence
+    top_k = predictions[0].argsort()[-1:][::-1]
+    for node_id in top_k:
+        human_string = load_labels(FLAGS.labels)[int(node_id)]
+        score = predictions[0,node_id]
+        print(f'model predicted: {human_string} with score {score:.5f}')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--wav', type=str, default='', help='Audio file to be identified.')
+    parser.add_argument(
+        '--labels', type=str, default='', help='Path to file containing labels.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs', )
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint', )
+    parser.add_argument(
+        '--tflite_path',
+        type=str,
+        default='',
+        help='Path to TFLite file to use for testing.')
+    FLAGS, unparsed = parser.parse_known_args()
+    main()
diff --git a/models/keyword_spotting/dnn_medium/model_package_tf/evaluation.py b/models/keyword_spotting/dnn_medium/model_package_tf/evaluation.py
new file mode 100644
index 0000000..4481dcd
--- /dev/null
+++ b/models/keyword_spotting/dnn_medium/model_package_tf/evaluation.py
@@ -0,0 +1,250 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for testing trained keyword spotting models from checkpoint files and TFLite files."""
+
+import argparse
+
+import numpy as np
+import tensorflow as tf
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+from dnn_m_inference_tflite import tflite_inference
+
+
+def tflite_test(model_settings, audio_processor, tflite_path):
+    """Calculate accuracy and confusion matrices on the validation and test sets.
+
+    A TFLite model is used for doing testing.
+
+    Args:
+        model_settings: Dictionary of common model settings.
+        audio_processor: Audio processor class object.
+        tflite_path: Path to TFLite file to use for inference.
+    """
+    # Evaluate on validation set.
+    print("Running TFLite evaluation on validation set...")
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION).batch(1)
+    expected_indices = np.concatenate([y for x, y in val_data])
+    predicted_indices = []
+
+    for mfcc, label in val_data:
+        prediction = tflite_inference(mfcc, tflite_path)
+        predicted_indices.append(np.squeeze(tf.argmax(prediction, axis=1)))
+
+    val_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+
+    print(confusion_matrix.numpy())
+    print(f'Validation accuracy = {val_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.VALIDATION)})')
+
+    # Evaluate on testing set.
+    print("Running TFLite evaluation on test set...")
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING).batch(1)
+    expected_indices = np.concatenate([y for x, y in test_data])
+    predicted_indices = []
+
+    for mfcc, label in test_data:
+        prediction = tflite_inference(mfcc, tflite_path)
+        predicted_indices.append(np.squeeze(tf.argmax(prediction, axis=1)))
+
+    test_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+
+    print(confusion_matrix.numpy())
+    print(f'Test accuracy = {test_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.TESTING)})')
+
+
+def keras_test(model_settings, audio_processor, model):
+    """Calculate accuracy and confusion matrices on the validation and test sets.
+
+    A loaded keras model is used for doing testing.
+
+    Args:
+        model_settings: Dictionary of common model settings.
+        audio_processor: Audio processor class object.
+        model: Loaded keras model.
+    """
+    # Evaluate on validation set.
+    print("Running TF evaluation on validation set...")
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION).batch(FLAGS.batch_size)
+    expected_indices = np.concatenate([y for x, y in val_data])
+
+    predictions = model.predict(val_data)
+    predicted_indices = tf.argmax(predictions, axis=1)
+
+    val_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+    print(confusion_matrix.numpy())
+    print(f'Validation accuracy = {val_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.VALIDATION)})')
+
+    # Evaluate on testing set.
+    print("Running TF evaluation on test set...")
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING).batch(FLAGS.batch_size)
+    expected_indices = np.concatenate([y for x, y in test_data])
+
+    predictions = model.predict(test_data)
+    predicted_indices = tf.argmax(predictions, axis=1)
+
+    test_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+    print(confusion_matrix.numpy())
+    print(f'Test accuracy = {test_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.TESTING)})')
+
+
+def calculate_accuracy(predicted_indices, expected_indices):
+    """Calculates and returns accuracy.
+
+    Args:
+        predicted_indices: List of predicted integer indices.
+        expected_indices: List of expected integer indices.
+
+    Returns:
+        Accuracy value between 0 and 1.
+    """
+    correct_prediction = tf.equal(predicted_indices, expected_indices)
+    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
+    return accuracy
+
+
+def evaluate():
+    """Calculate accuracy and confusion matrices on validation and test sets.
+
+    Model is created and weights loaded from supplied command line arguments.
+    """
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    if FLAGS.tflite_path:
+        tflite_test(model_settings, audio_processor, FLAGS.tflite_path)
+
+    if FLAGS.checkpoint:
+        model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info, False)
+        model.load_weights(FLAGS.checkpoint).expect_partial()
+        keras_test(model_settings, audio_processor, model)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--batch_size',
+        type=int,
+        default=100,
+        help='How many items to train with at once',)
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--checkpoint',
+        type=str,
+        help='Checkpoint to load the weights from')
+    parser.add_argument(
+        '--tflite_path',
+        type=str,
+        help='Path to TFLite file to use for evaluation')
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+
+    FLAGS, _ = parser.parse_known_args()
+    evaluate()
diff --git a/models/keyword_spotting/dnn_medium/model_package_tf/how_to_guidance.ipynb b/models/keyword_spotting/dnn_medium/model_package_tf/how_to_guidance.ipynb
new file mode 100644
index 0000000..ac8b78c
--- /dev/null
+++ b/models/keyword_spotting/dnn_medium/model_package_tf/how_to_guidance.ipynb
@@ -0,0 +1,428 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Copyright (C) 2023 Arm Limited or its affiliates. All rights reserved.\n",
+    "#\n",
+    "# SPDX-License-Identifier: Apache-2.0\n",
+    "#\n",
+    "# Licensed under the Apache License, Version 2.0 (the License); you may\n",
+    "# not use this file except in compliance with the License.\n",
+    "# You may obtain a copy of the License at\n",
+    "#\n",
+    "# www.apache.org/licenses/LICENSE-2.0\n",
+    "#\n",
+    "# Unless required by applicable law or agreed to in writing, software\n",
+    "# distributed under the License is distributed on an AS IS BASIS, WITHOUT\n",
+    "# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+    "# See the License for the specific language governing permissions and\n",
+    "# limitations under the License."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# DNN_Medium - Optimised\n",
+    "\n",
+    "Here we reproduce the models with our established codebase and ModelPackage approach for your convenience.\n",
+    "\n",
+    "## Model-Package Overview:\n",
+    "\n",
+    "| Model           \t| DNN_Medium                            \t|\n",
+    "|:---------------:\t|:---------------------------------------------------------------:\t|\n",
+    "| <u>**Format**</u>:          \t| Keras, Saved Model, TensorFlow Lite int8, TensorFlow Lite fp32 |\n",
+    "| <u>**Feature**</u>:         \t| Keyword spotting for Arm Cortex-M CPUs |\n",
+    "| <u>**Architectural Delta w.r.t. Vanilla**</u>: | None |\n",
+    "| <u>**Domain**</u>:         \t| Keyword spotting |\n",
+    "| <u>**Package Quality**</u>: \t| Optimised |"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Table of contents <a name=\"index_page\"></a>\n",
+    "\n",
+    "This how-to guidance presents the key steps to reproduce everything in this package. The contents are organised as below. We provided the internal navigation links for users to easy-jump among different sections.  \n",
+    "\n",
+    "    \n",
+    "* [1.0 Model recreation](#model_recreation)\n",
+    "\n",
+    "* [2.0 Training](#training)\n",
+    "\n",
+    "* [3.0 Testing](#testing)\n",
+    "\n",
+    "* [4.0 Optimization](#optimization)\n",
+    "\n",
+    "* [5.0 Quantization and TFLite conversion](#tflite_conversion)\n",
+    "\n",
+    "* [6.0 Inference the TFLite model files](#tflite_inference)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1.0 Model Recreation<a name=\"model_recreation\"></a>\n",
+    "\n",
+    "In order to recreate the model you will first need to be using ```Python3.7``` and install the requirements in ```requirements.txt```.\n",
+    "\n",
+    "Once you have these requirements satisfied you can execute the recreation script contained within this folder, just run:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-01-31 13:21:58.189962: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "Untarring speech_commands_v0.02.tar.gz...\n",
+      "2023-01-31 13:22:48.489206: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1\n",
+      "2023-01-31 13:22:48.528844: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 13:22:48.528880: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 13:22:48.548795: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublas.so.11\n",
+      "2023-01-31 13:22:48.548866: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublasLt.so.11\n",
+      "2023-01-31 13:22:48.551645: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcufft.so.10\n",
+      "2023-01-31 13:22:48.551935: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcurand.so.10\n",
+      "2023-01-31 13:22:48.552501: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusolver.so.11\n",
+      "2023-01-31 13:22:48.553238: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusparse.so.11\n",
+      "2023-01-31 13:22:48.553392: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudnn.so.8\n",
+      "2023-01-31 13:22:48.553886: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 13:22:48.554176: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2023-01-31 13:22:48.554998: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 13:22:48.555410: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 13:22:48.555527: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 13:22:48.994481: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 13:22:48.994520: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 13:22:48.994528: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 13:22:48.995028: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10939 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.\n",
+      "2023-01-31 13:22:50.146418: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.\n",
+      "2023-01-31 13:22:50.411740: I tensorflow/core/grappler/devices.cc:69] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1\n",
+      "2023-01-31 13:22:50.411969: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session\n",
+      "2023-01-31 13:22:50.412348: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 13:22:50.412596: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 13:22:50.412627: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 13:22:50.412636: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 13:22:50.412643: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 13:22:50.412919: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10939 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "2023-01-31 13:22:50.431567: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 3492140000 Hz\n",
+      "2023-01-31 13:22:50.433318: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1144] Optimization results for grappler item: graph_to_optimize\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.017ms.\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.003ms.\n",
+      "\n",
+      "2023-01-31 13:22:50.470457: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:345] Ignored output_format.\n",
+      "2023-01-31 13:22:50.470496: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:348] Ignored drop_control_dependency.\n",
+      "2023-01-31 13:22:50.473049: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:210] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n",
+      "2023-01-31 13:22:50.475051: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 13:22:50.475342: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 13:22:50.475376: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 13:22:50.475387: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 13:22:50.475395: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 13:22:50.475693: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10939 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "Converted model saved to dnn.tflite.\n",
+      "Running TFLite evaluation on validation set...\n",
+      "2023-01-31 13:22:50.520336: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)\n",
+      "[[371   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  0 265   9   7   5  18  11  12  17   5   8  14]\n",
+      " [  0   6 346   9   0   2  22   5   1   0   1   5]\n",
+      " [  0   9   8 323   8  14   3   5   0   1   2  33]\n",
+      " [  0   4   0   2 304   1   3   3   4  17   9   3]\n",
+      " [  0   8   1  19   1 326   2   1   7   0   0  12]\n",
+      " [  0   2  24   2   3   1 304  13   0   0   0   3]\n",
+      " [  0  10   1   1   4   1   4 336   1   2   0   3]\n",
+      " [  1  10   1   1   7   2   0   2 326   9   1   3]\n",
+      " [  1   2   0   1  27   0   1   1  11 321   4   4]\n",
+      " [  2   5   0   0  16   2   2   1   1   2 318   1]\n",
+      " [  0  13   0  43   6  13   1   2   3   3   1 287]]\n",
+      "Validation accuracy = 86.10%(N=4445)\n",
+      "Running TFLite evaluation on test set...\n",
+      "[[408   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  0 295   7  11   6   6  13  12  24   8   5  21]\n",
+      " [  0  12 380   3   0   4  15   1   0   0   0   4]\n",
+      " [  1  11   2 332   0  22   1   0   0   0   0  36]\n",
+      " [  0  14   1   2 357   2   2   5  12  11  11   8]\n",
+      " [  0  18   5  18   6 329   5   1   4   0   2  18]\n",
+      " [  0  10  25   3   4   1 347  15   1   0   2   4]\n",
+      " [  0  20   1   0   5   1  14 349   1   5   0   0]\n",
+      " [  0  12   0   1   5   9   0   0 347  16   2   4]\n",
+      " [  0  15   0   1  15   1   5   2  12 339   3   9]\n",
+      " [  0   5   0   3  21   2   4   1   2   1 368   4]\n",
+      " [  0  10   1  62   8  13   3   1   0   0   1 303]]\n",
+      "Test accuracy = 84.95%(N=4890)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-01-31 13:23:02.712653: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "Untarring speech_commands_v0.02.tar.gz...\n",
+      "2023-01-31 13:23:53.488800: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1\n",
+      "2023-01-31 13:23:53.524175: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 13:23:53.524209: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 13:23:53.544183: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublas.so.11\n",
+      "2023-01-31 13:23:53.544253: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublasLt.so.11\n",
+      "2023-01-31 13:23:53.546889: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcufft.so.10\n",
+      "2023-01-31 13:23:53.547146: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcurand.so.10\n",
+      "2023-01-31 13:23:53.547744: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusolver.so.11\n",
+      "2023-01-31 13:23:53.548454: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusparse.so.11\n",
+      "2023-01-31 13:23:53.548596: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudnn.so.8\n",
+      "2023-01-31 13:23:53.548947: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 13:23:53.549238: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2023-01-31 13:23:53.549958: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 13:23:53.550439: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 13:23:53.550510: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 13:23:53.960933: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 13:23:53.960972: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 13:23:53.960979: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 13:23:53.961483: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10940 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.\n",
+      "2023-01-31 13:23:55.053376: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.\n",
+      "2023-01-31 13:23:55.321894: I tensorflow/core/grappler/devices.cc:69] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1\n",
+      "2023-01-31 13:23:55.322084: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session\n",
+      "2023-01-31 13:23:55.322539: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 13:23:55.322808: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 13:23:55.322839: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 13:23:55.322850: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 13:23:55.322858: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 13:23:55.323143: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10940 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "2023-01-31 13:23:55.347442: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 3492140000 Hz\n",
+      "2023-01-31 13:23:55.348486: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1144] Optimization results for grappler item: graph_to_optimize\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.011ms.\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.002ms.\n",
+      "\n",
+      "2023-01-31 13:23:55.387556: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:345] Ignored output_format.\n",
+      "2023-01-31 13:23:55.387602: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:348] Ignored drop_control_dependency.\n",
+      "2023-01-31 13:23:55.390277: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:210] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n",
+      "2023-01-31 13:23:55.392318: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 13:23:55.392627: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 13:23:55.392665: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 13:23:55.392681: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 13:23:55.392693: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 13:23:55.393015: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10940 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "2023-01-31 13:23:55.414179: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)\n",
+      "fully_quantize: 0, inference_type: 6, input_inference_type: 9, output_inference_type: 9\n",
+      "Quantized model saved to dnn_quantized.tflite.\n",
+      "Running TFLite evaluation on validation set...\n",
+      "[[371   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  0 272   6   8   8  19   9  12  17   6   4  10]\n",
+      " [  0  11 341   9   5   2  20   6   0   0   0   3]\n",
+      " [  0  15   9 319  13  13   2   4   1   1   3  26]\n",
+      " [  0   6   0   3 307   1   1   2   3  16   9   2]\n",
+      " [  0  11   1  20  12 312   3   0   6   0   1  11]\n",
+      " [  0   7  26   3   5   1 294  11   1   1   1   2]\n",
+      " [  0  13   1   1   9   2   5 326   1   1   2   2]\n",
+      " [  2  13   0   0   7   4   1   2 318  10   4   2]\n",
+      " [  1   4   0   2  37   0   1   2  12 308   3   3]\n",
+      " [  2   5   0   0  21   2   2   1   1   3 312   1]\n",
+      " [  0  16   1  43   9  15   1   3   1   3   1 279]]\n",
+      "Validation accuracy = 84.57%(N=4445)\n",
+      "Running TFLite evaluation on test set...\n",
+      "[[408   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  0 303   7  13   6   4  12   9  22   8   6  18]\n",
+      " [  0  13 370   5   4   3  15   1   1   0   2   5]\n",
+      " [  0  12   6 335   4  19   1   1   1   0   0  26]\n",
+      " [  0  14   1   4 354   1   0   3  15  14  11   8]\n",
+      " [  0  26   5  26  10 316   5   2   3   0   1  12]\n",
+      " [  0  15  25   2   9   1 334  17   1   0   2   6]\n",
+      " [  0  19   1   0  10   1  14 338   4   4   4   1]\n",
+      " [  0  16   1   2   8   8   1   0 339  11   6   4]\n",
+      " [  0  15   0   1  27   0   6   2  12 329   3   7]\n",
+      " [  0   9   0   3  22   2   4   1   2   2 360   6]\n",
+      " [  0  20   0  63  16  12   1   3   1   1   6 279]]\n",
+      "Test accuracy = 83.13%(N=4890)\n"
+     ]
+    }
+   ],
+   "source": [
+    "!bash ./recreate_model.sh"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Running this script will use the pre-trained checkpoint files supplied in the ```./model_archive/model_source/weights``` folder to generate the TFLite files and perform evaluation on the test set. Both an fp32 version and a quantized version will be produced. The quantized version will use post-training quantization to fully quantize it.\n",
+    "\n",
+    "If you want to run training from scratch you can do this by supplying ```--train``` when running the script. For example:\n",
+    "\n",
+    "```bash\n",
+    "bash ./recreate_model.sh --train\n",
+    "```\n",
+    "\n",
+    "Training is then performed and should produce a model to the stated accuracy in this repository. Note that exporting to TFLite will still happen with the baseline pre-trained checkpoint files, so you will need to re-run the script and this time supply the path to the new checkpoint files you want to use, for example:\n",
+    "\n",
+    "```bash\n",
+    "bash ./recreate_model.sh --ckpt <checkpoint_path>\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2.0 Training<a name=\"training\"></a>\n",
+    "\n",
+    "The training scripts can be used to recreate any of the models from the [Hello Edge paper](https://arxiv.org/pdf/1711.07128.pdf) provided the right hyperparameters are used. The training commands with all the hyperparameters to reproduce the model in this repository are given [here](recreate_model.sh). The model in this part of the repository represents just one variation of the models from the paper, other varieties are covered in other parts of the repository.\n",
+    "\n",
+    "\n",
+    "As a general example of how to train a DNN with 3 fully-connected layers with 128 neurons in each layer, run:\n",
+    "```\n",
+    "python train.py --model_architecture dnn --model_size_info 128 128 128\n",
+    "```\n",
+    "\n",
+    "The command line argument *--model_size_info* is used to pass the neural network layer\n",
+    "dimensions such as number of layers, convolution filter size/stride as a list to models.py,\n",
+    "which builds the TensorFlow graph based on the provided model architecture\n",
+    "and layer dimensions. For more info on *model_size_info* for each network architecture see\n",
+    "[models.py](model_core_utils/models.py).\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3.0 Testing<a name=\"testing\"></a>\n",
+    "To run inference on the trained model from a checkpoint and get accuracy on validation and test sets, run:\n",
+    "```\n",
+    "python evaluation.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>\n",
+    "```\n",
+    "**The model and feature extraction parameters passed to this script should match those used in the Training step.**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4.0 Optimization<a name=\"optimization\"></a>\n",
+    "\n",
+    "We introduce an *optional* step to optimize the trained keyword spotting model for deployment.\n",
+    "\n",
+    "Here we use TensorFlow's [weight clustering API](https://www.tensorflow.org/model_optimization/guide/clustering) to reduce the compressed model size and optimize inference on supported hardware. 32 weight clusters and kmeans++ cluster intialization method are used as the clustering hyperparameters.\n",
+    "\n",
+    "To optimize your trained model (e.g. a DNN), a trained model checkpoint is needed to run clustering and fine-tuning on.\n",
+    "You can use the pre-trained checkpoints provided, or train your own model and use the resulting checkpoint.\n",
+    "\n",
+    "To apply the optimization and fine-tuning, run the following command:\n",
+    "```\n",
+    "python optimisations.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>\n",
+    "```\n",
+    "**The model and feature extraction parameters used here should match those used in the Training step, except for the number of training steps.\n",
+    "The number of training steps is reduced since the optimization step only requires fine-tuning.**\n",
+    "\n",
+    "This will generate a clustered model checkpoint that can be used in the quantization step to generate a quantized and clustered TFLite model."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5.0 Quantization and TFLite Conversion<a name=\"tflite_conversion\"></a>\n",
+    "\n",
+    "You can now use TensorFlow's\n",
+    "[post training quantization](https://www.tensorflow.org/lite/performance/post_training_quantization) to\n",
+    "make quantization of the trained models super simple.\n",
+    "\n",
+    "To quantize your trained model (e.g. a DNN) run:\n",
+    "```\n",
+    "python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> [--inference_type int8|int16]\n",
+    "```\n",
+    "**The model and feature extraction parameters used here should match those used in the Training step.**\n",
+    "\n",
+    "The ```inference_type``` parameter is *optional* and to be used if a fully quantized model with inputs and outputs of type int8 or int16 is needed. It defaults to fp32.\n",
+    "\n",
+    "In this example, this step will produce a quantized TFLite file *dnn_quantized.tflite*."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can test the accuracy of this quantized model on the test set by running:\n",
+    "```\n",
+    "python evaluation.py --tflite_path dnn_quantized.tflite\n",
+    "```\n",
+    "**The model and feature extraction parameters used here should match those used in the Training step.**\n",
+    "\n",
+    "`convert_to_tflite.py` uses post-training quantization to generate a quantized model by default. If you wish to convert to a floating point TFLite model, use the command below:\n",
+    "\n",
+    "```\n",
+    "python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> --no-quantize\n",
+    "```\n",
+    "\n",
+    "This will produce a floating point TFLite file *dnn.tflite*. You can test the accuracy of this floating point model using `evaluation.py` as above.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 6.0 Single inference of the TFLite model files <a name=\"tflite_inference\"></a>\n",
+    "\n",
+    "You can conduct TFLite inference for .fp32 and .int8 model files by using the following command: \n",
+    "\n",
+    "```python dnn_m_inference_tflite.py --labels validation_utils/labels.txt --wav <path_to_wav_file> --tflite_path <path_to_tflite_file>```\n",
+    "\n",
+    "**The feature extraction parameters used here should match those used in the Training step.**\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/README.md b/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/README.md
new file mode 100644
index 0000000..54631cd
--- /dev/null
+++ b/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/README.md
@@ -0,0 +1,62 @@
+# keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32
+
+## Description
+This is a floating point fp32 version of the DNN Medium model developed by Arm, from the Hello Edge paper.
+
+## License
+[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
+
+## Network Information
+| Network Information | Value |
+|---------------------|-------|
+|  Framework          | TensorFlow Lite |
+|  Datatype           | fp32 |
+|  SHA-1 Hash         | 3c20c6ee24ee41ed6db968ff58d69f5823c94036 |
+|  Size (Bytes)       | 797768 |
+|  Provenance         | https://arxiv.org/abs/1711.07128 |
+|  Training           | Trained by Arm |
+|  Paper | https://arxiv.org/abs/1711.07128 |
+
+## DataSet
+| Dataset Information | Value |
+|--------|-------|
+| Name | Google Speech Commands test set |
+
+## Accuracy
+
+| Metric | Value |
+|--------|-------|
+| accuracy | 84.95% |
+
+## HW Support
+| HW Support   | Value |
+|--------------|-------|
+| Cortex-A |:heavy_check_mark:         |
+| Cortex-M |:heavy_check_mark:         |
+| Mali GPU |:heavy_check_mark:         |
+| Ethos U  |:heavy_multiplication_x:          |
+
+### Key
+* :heavy_check_mark: - Will run on this platform.
+* :heavy_multiplication_x: - Will not run on this platform.
+
+## Network Quality
+| Network Quality         | Value |
+|-------------------------|-------|
+|  Recreate               | :heavy_check_mark:    |
+|  Quality level          | Deployable    |
+|  Vanilla                | :heavy_check_mark:    |
+|  Clustered              | :heavy_multiplication_x:    |
+|  Pruned                 | :heavy_multiplication_x:    |
+|  Quantization - default | :heavy_multiplication_x:    |
+|  Quantization - full    | :heavy_multiplication_x:    |
+
+## Network Inputs
+| Input Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| input | (1, 250) | fp32 | models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input | fp32 | [1, 250] | The input is a processed MFCCs |
+
+## Network Outputs
+| Output Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| Identity | (1, 12) | fp32 | models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity | fp32 | [1, 12] | The probability on 12 keywords |
\ No newline at end of file
diff --git a/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/definition.yaml b/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/definition.yaml
new file mode 100644
index 0000000..a650fd3
--- /dev/null
+++ b/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/definition.yaml
@@ -0,0 +1,62 @@
+benchmark:
+  benchmark_metrics:
+    accuracy: 84.95%
+  benchmark_name: Google Speech Commands test set
+description: This is a floating point fp32 version of the DNN Medium model developed
+  by Arm, from the Hello Edge paper.
+license:
+- Apache-2.0
+network:
+  datatype: fp32
+  file_size_bytes: 797768
+  filename: dnn_m.tflite
+  framework: TensorFlow Lite
+  hash:
+    algorithm: sha1
+    value: 3c20c6ee24ee41ed6db968ff58d69f5823c94036
+  provenance: https://arxiv.org/abs/1711.07128
+  training: Trained by Arm
+network_parameters:
+  input_nodes:
+  - description: The input is a processed MFCCs of shape (1, 490)
+    example_input:
+      path: models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input
+      shape:
+      - 1
+      - 250
+      type: fp32
+      use_case: Random input for model regression.
+    input_datatype: fp32
+    name: input
+    shape:
+    - 1
+    - 250
+  output_nodes:
+  - description: The probability on 12 keywords.
+    example_output:
+      path: models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity
+      shape:
+      - 1
+      - 12
+      type: fp32
+      use_case: output for model regression.
+    name: Identity
+    output_datatype: fp32
+    shape:
+    - 1
+    - 12
+network_quality:
+  clustered: false
+  is_vanilla: true
+  pruned: false
+  quality_level: Deployable
+  quality_level_hero_hw: cortex_m
+  quantization_default: false
+  quantization_full: false
+  recreate: true
+operators:
+  TensorFlow Lite:
+  - FULLY_CONNECTED
+  - RELU
+  - SOFTMAX
+paper: https://arxiv.org/abs/1711.07128
\ No newline at end of file
diff --git a/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/dnn_m.tflite b/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/dnn_m.tflite
new file mode 100644
index 0000000..e4e30d7
--- /dev/null
+++ b/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/dnn_m.tflite
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8380c3ff3a3152c5ab5cc2a226c73707924d906e468f708513ffa84d6e9a1d96
+size 797768
diff --git a/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input/0.npy b/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input/0.npy
new file mode 100644
index 0000000..85f3e34
--- /dev/null
+++ b/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input/0.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b1f811913684442a9517879b173e29799094e4261cbef84c0a84536564179349
+size 1128
diff --git a/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity/0.npy b/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity/0.npy
new file mode 100644
index 0000000..6af5cd7
--- /dev/null
+++ b/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity/0.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fed63e3ed1b354a3927bf735223654a482c6745299f5e2a57ed3974dfef295f1
+size 176
diff --git a/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/README.md b/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/README.md
new file mode 100644
index 0000000..1e65aad
--- /dev/null
+++ b/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/README.md
@@ -0,0 +1,62 @@
+# keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_int8
+
+## Description
+This is a fully quantized int8 version of the DNN Medium model developed by Arm, from the Hello Edge paper.
+
+## License
+[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
+
+## Network Information
+| Network Information | Value |
+|---------------------|-------|
+|  Framework          | TensorFlow Lite |
+|  Datatype           | int8 |
+|  SHA-1 Hash         | 7e138f99cfc6a603a1fc735a2d9c3e28a41a6a43 |
+|  Size (Bytes)       | 203832 |
+|  Provenance         | https://arxiv.org/abs/1711.07128 |
+|  Training           | Trained by Arm |
+|  Paper | https://arxiv.org/abs/1711.07128 |
+
+## DataSet
+| Dataset Information | Value |
+|--------|-------|
+| Name | Google Speech Commands test set |
+
+## Accuracy
+
+| Metric | Value |
+|--------|-------|
+| Accuracy | 83.93% |
+
+## HW Support
+| HW Support   | Value |
+|--------------|-------|
+| Cortex-A |:heavy_check_mark:          |
+| Cortex-M |:heavy_check_mark:          |
+| Mali GPU |:heavy_check_mark:          |
+| Ethos U  |:heavy_check_mark:          |
+
+### Key
+* :heavy_check_mark: - Will run on this platform.
+* :heavy_multiplication_x: - Will not run on this platform.
+
+## Network Quality
+| Network Quality         | Value |
+|-------------------------|-------|
+|  Recreate               | :heavy_check_mark:    |
+|  Quality level          | Deployable    |
+|  Vanilla                | :heavy_check_mark:    |
+|  Clustered              | :heavy_multiplication_x:    |
+|  Pruned                 | :heavy_multiplication_x:    |
+|  Quantization - default | :heavy_multiplication_x:    |
+|  Quantization - full    | :heavy_check_mark:    |
+
+## Network Inputs
+| Input Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| input | (1, 250) | int8 | models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input | int8 | [1, 250] | The input is a processed MFCCs |
+
+## Network Outputs
+| Output Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| Identity | (1, 12) | int8 | models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity | int8 | [1, 12] | The probability on 12 keywords |
\ No newline at end of file
diff --git a/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/definition.yaml b/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/definition.yaml
new file mode 100644
index 0000000..c519ab1
--- /dev/null
+++ b/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/definition.yaml
@@ -0,0 +1,62 @@
+benchmark:
+  benchmark_metrics:
+    Accuracy: 83.93%
+  benchmark_name: Google Speech Commands test set
+description: This is a fully quantized int8 version of the DNN Medium model developed
+  by Arm, from the Hello Edge paper.
+license:
+- Apache-2.0
+network:
+  datatype: int8
+  file_size_bytes: 203832
+  filename: dnn_m_quantized.tflite
+  framework: TensorFlow Lite
+  hash:
+    algorithm: sha1
+    value: 7e138f99cfc6a603a1fc735a2d9c3e28a41a6a43
+  provenance: https://arxiv.org/abs/1711.07128
+  training: Trained by Arm
+network_parameters:
+  input_nodes:
+  - description: The input is a processed MFCCs of shape (1, 250)
+    example_input:
+      path: models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input
+      shape:
+      - 1
+      - 250
+      type: int8
+      use_case: Random input for model regression.
+    input_datatype: int8
+    name: input
+    shape:
+    - 1
+    - 250
+  output_nodes:
+  - description: The probability on 12 keywords.
+    example_output:
+      path: models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity
+      shape:
+      - 1
+      - 12
+      type: int8
+      use_case: output for model regression.
+    name: Identity
+    output_datatype: int8
+    shape:
+    - 1
+    - 12
+network_quality:
+  clustered: false
+  is_vanilla: true
+  pruned: false
+  quality_level: Deployable
+  quality_level_hero_hw: cortex_m
+  quantization_default: false
+  quantization_full: true
+  recreate: true
+operators:
+  TensorFlow Lite:
+  - FULLY_CONNECTED
+  - RELU
+  - SOFTMAX
+paper: https://arxiv.org/abs/1711.07128
\ No newline at end of file
diff --git a/models/keyword_spotting/dnn_medium/tflite_int8/dnn_m_quantized.tflite b/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/dnn_m_quantized.tflite
similarity index 100%
rename from models/keyword_spotting/dnn_medium/tflite_int8/dnn_m_quantized.tflite
rename to models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/dnn_m_quantized.tflite
diff --git a/models/keyword_spotting/dnn_medium/tflite_int8/testing_input/input/0.npy b/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input/0.npy
similarity index 100%
rename from models/keyword_spotting/dnn_medium/tflite_int8/testing_input/input/0.npy
rename to models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input/0.npy
diff --git a/models/keyword_spotting/dnn_medium/tflite_int8/testing_output/Identity/0.npy b/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity/0.npy
similarity index 100%
rename from models/keyword_spotting/dnn_medium/tflite_int8/testing_output/Identity/0.npy
rename to models/keyword_spotting/dnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity/0.npy
diff --git a/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/model_source/saved_model/dnn_medium/keras_metadata.pb b/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/model_source/saved_model/dnn_medium/keras_metadata.pb
new file mode 100644
index 0000000..08ef7e5
--- /dev/null
+++ b/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/model_source/saved_model/dnn_medium/keras_metadata.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d4dfba08e6695d3429dc605cf00dd1e6950f646faf61fc9876de9471f66ee419
+size 10087
diff --git a/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/model_source/saved_model/dnn_medium/saved_model.pb b/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/model_source/saved_model/dnn_medium/saved_model.pb
new file mode 100644
index 0000000..770dcc1
--- /dev/null
+++ b/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/model_source/saved_model/dnn_medium/saved_model.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef3a9281ac9bc4de4ce805938bfdb673c0c06627ce977e11521c0782c1999256
+size 85126
diff --git a/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/model_source/saved_model/dnn_medium/variables/variables.data-00000-of-00001 b/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/model_source/saved_model/dnn_medium/variables/variables.data-00000-of-00001
new file mode 100644
index 0000000..afb21fe
--- /dev/null
+++ b/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/model_source/saved_model/dnn_medium/variables/variables.data-00000-of-00001
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69f2943b2684b7c153e67808422daa3f61b229dd3a6092b5ae5af95d1eaf3ff6
+size 798335
diff --git a/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/model_source/saved_model/dnn_medium/variables/variables.index b/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/model_source/saved_model/dnn_medium/variables/variables.index
new file mode 100644
index 0000000..7a51ce6
--- /dev/null
+++ b/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/model_source/saved_model/dnn_medium/variables/variables.index
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4d85b2373052882c55abdeb07a4c061ad4aa23c0c36a72db08dc17a515d30363
+size 641
diff --git a/models/keyword_spotting/dnn_medium/tflite_int8/ckpt/checkpoint b/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/model_source/weights/checkpoint
similarity index 100%
rename from models/keyword_spotting/dnn_medium/tflite_int8/ckpt/checkpoint
rename to models/keyword_spotting/dnn_medium/model_package_tf/model_archive/model_source/weights/checkpoint
diff --git a/models/keyword_spotting/dnn_medium/tflite_int8/ckpt/dnn_0.86_ckpt.data-00000-of-00001 b/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/model_source/weights/dnn_0.86_ckpt.data-00000-of-00001
similarity index 100%
rename from models/keyword_spotting/dnn_medium/tflite_int8/ckpt/dnn_0.86_ckpt.data-00000-of-00001
rename to models/keyword_spotting/dnn_medium/model_package_tf/model_archive/model_source/weights/dnn_0.86_ckpt.data-00000-of-00001
diff --git a/models/keyword_spotting/dnn_medium/tflite_int8/ckpt/dnn_0.86_ckpt.index b/models/keyword_spotting/dnn_medium/model_package_tf/model_archive/model_source/weights/dnn_0.86_ckpt.index
similarity index 100%
rename from models/keyword_spotting/dnn_medium/tflite_int8/ckpt/dnn_0.86_ckpt.index
rename to models/keyword_spotting/dnn_medium/model_package_tf/model_archive/model_source/weights/dnn_0.86_ckpt.index
diff --git a/models/keyword_spotting/dnn_medium/model_package_tf/model_core_utils/__init__.py b/models/keyword_spotting/dnn_medium/model_package_tf/model_core_utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/models/keyword_spotting/dnn_medium/model_package_tf/model_core_utils/models.py b/models/keyword_spotting/dnn_medium/model_package_tf/model_core_utils/models.py
new file mode 100644
index 0000000..1978136
--- /dev/null
+++ b/models/keyword_spotting/dnn_medium/model_package_tf/model_core_utils/models.py
@@ -0,0 +1,327 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Model definitions for simple keyword spotting."""
+
+import math
+
+import tensorflow as tf
+
+
+def prepare_model_settings(label_count, sample_rate, clip_duration_ms,
+                           window_size_ms, window_stride_ms,
+                           dct_coefficient_count):
+    """Calculates common settings needed for all models.
+
+    Args:
+        label_count: How many classes are to be recognized.
+        sample_rate: Number of audio samples per second.
+        clip_duration_ms: Length of each audio clip to be analyzed.
+        window_size_ms: Duration of frequency analysis window.
+        window_stride_ms: How far to move in time between frequency windows.
+        dct_coefficient_count: Number of frequency bins to use for analysis.
+
+    Returns:
+        Dictionary containing common settings.
+    """
+    desired_samples = int(sample_rate * clip_duration_ms / 1000)
+    window_size_samples = int(sample_rate * window_size_ms / 1000)
+    window_stride_samples = int(sample_rate * window_stride_ms / 1000)
+    length_minus_window = (desired_samples - window_size_samples)
+    if length_minus_window < 0:
+        spectrogram_length = 0
+    else:
+        spectrogram_length = 1 + int(length_minus_window / window_stride_samples)
+    fingerprint_size = dct_coefficient_count * spectrogram_length
+
+    return {
+        'desired_samples': desired_samples,
+        'window_size_samples': window_size_samples,
+        'window_stride_samples': window_stride_samples,
+        'spectrogram_length': spectrogram_length,
+        'dct_coefficient_count': dct_coefficient_count,
+        'fingerprint_size': fingerprint_size,
+        'label_count': label_count,
+        'sample_rate': sample_rate,
+    }
+
+
+def create_model(model_settings, model_architecture, model_size_info, is_training):
+    """Builds a tf.keras model of the requested architecture compatible with the settings.
+
+    Args:
+        model_settings: Dictionary of information about the model.
+        model_architecture: String specifying which kind of model to create.
+        model_size_info: Array with specific information for the chosen architecture
+            (e.g convolutional parameters, number of layers).
+
+    Returns:
+        A tf.keras Model with the requested architecture.
+
+    Raises:
+        Exception: If the architecture type isn't recognized.
+    """
+
+    if model_architecture == 'dnn':
+        return create_dnn_model(model_settings, model_size_info)
+
+    elif model_architecture == 'cnn':
+        return create_cnn_model(model_settings, model_size_info)
+
+    elif model_architecture == 'ds_cnn':
+        return create_ds_cnn_model(model_settings, model_size_info)
+    elif model_architecture == 'single_fc':
+        return create_single_fc_model(model_settings)
+    elif model_architecture == 'basic_lstm':
+        return create_basic_lstm_model(model_settings, model_size_info, is_training)
+    else:
+        raise Exception(f'model_architecture argument {model_architecture} not recognized'
+                        f', should be one of, "dnn", "cnn", "ds_cnn" ')
+
+
+def create_single_fc_model(model_settings):
+    """Builds a model with a single fully-connected layer.
+
+    For details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+
+    Returns:
+        tf.keras Model of the 'SINGLE_FC' architecture.
+    """
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size'],), name='input')
+    # Fully connected layer
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(inputs)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_basic_lstm_model(model_settings, model_size_info, is_training):
+    """Builds a model with a basic lstm layer.
+
+        For details see https://arxiv.org/abs/1711.07128.
+
+        Args:
+            model_settings: Dict of different settings for model training.
+            model_size_info: Length of the array defines the number of hidden-layers and
+                each element in the array represent the number of neurons in that layer.
+            is_training: Determining whether the use of the model is for training or for something else.
+
+        Returns:
+            tf.keras Model of the 'Basic_LSTM' architecture.
+        """
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size'], ), name='input')
+
+    input_frequency_size = model_settings['dct_coefficient_count']
+    input_time_size = model_settings['spectrogram_length']
+    x = tf.reshape(inputs, shape=(-1, input_time_size, input_frequency_size))
+
+    # LSTM layer, and unrolling depending on whether you are training or not
+    if is_training:
+        x = tf.keras.layers.LSTM(units=model_size_info[0], time_major=False, unroll=False)(x)
+    else:
+        x = tf.keras.layers.LSTM(units=model_size_info[0], time_major=False, unroll=True)(x)
+
+    # Outputs a fully connected layer
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_dnn_model(model_settings, model_size_info):
+    """Builds a model with multiple hidden fully-connected layers.
+
+    For details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+        model_size_info: Length of the array defines the number of hidden-layers and
+            each element in the array represent the number of neurons in that layer.
+
+    Returns:
+        tf.keras Model of the 'DNN' architecture.
+    """
+
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size'], ), name='input')
+
+    # First fully connected layer.
+    x = tf.keras.layers.Dense(units=model_size_info[0], activation='relu')(inputs)
+
+    # Hidden layers with ReLU activations.
+    for i in range(1, len(model_size_info)):
+        x = tf.keras.layers.Dense(units=model_size_info[i], activation='relu')(x)
+
+    # Output fully connected layer.
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_cnn_model(model_settings, model_size_info):
+    """Builds a model with 2 convolution layers followed by a linear layer and a hidden fully-connected layer.
+
+    For details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+        model_size_info: Defines the first and second convolution parameters in
+            {number of conv features, conv filter height, width, stride in y,x dir.},
+            followed by linear layer size and fully-connected layer size.
+
+    Returns:
+        tf.keras Model of the 'CNN' architecture.
+    """
+
+    input_frequency_size = model_settings['dct_coefficient_count']
+    input_time_size = model_settings['spectrogram_length']
+
+    first_filter_count = model_size_info[0]
+    first_filter_height = model_size_info[1]  # Time axis.
+    first_filter_width = model_size_info[2]  # Frequency axis.
+    first_filter_stride_y = model_size_info[3]  # Time axis.
+    first_filter_stride_x = model_size_info[4]  # Frequency_axis.
+
+    second_filter_count = model_size_info[5]
+    second_filter_height = model_size_info[6]  # Time axis.
+    second_filter_width = model_size_info[7]  # Frequency axis.
+    second_filter_stride_y = model_size_info[8]  # Time axis.
+    second_filter_stride_x = model_size_info[9]  # Frequency axis.
+
+    linear_layer_size = model_size_info[10]
+    fc_size = model_size_info[11]
+
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size']), name='input')
+
+    # Reshape the flattened input.
+    x = tf.reshape(inputs, shape=(-1, input_time_size, input_frequency_size, 1))
+
+    # First convolution.
+    x = tf.keras.layers.Conv2D(filters=first_filter_count,
+                               kernel_size=(first_filter_height, first_filter_width),
+                               strides=(first_filter_stride_y, first_filter_stride_x),
+                               padding='VALID')(x)
+    x = tf.keras.layers.BatchNormalization()(x)
+    x = tf.keras.layers.ReLU()(x)
+    x = tf.keras.layers.Dropout(rate=0)(x)
+
+    # Second convolution.
+    x = tf.keras.layers.Conv2D(filters=second_filter_count,
+                               kernel_size=(second_filter_height, second_filter_width),
+                               strides=(second_filter_stride_y, second_filter_stride_x),
+                               padding='VALID')(x)
+    x = tf.keras.layers.BatchNormalization()(x)
+    x = tf.keras.layers.ReLU()(x)
+    x = tf.keras.layers.Dropout(rate=0)(x)
+
+    # Flatten for fully connected layers.
+    x = tf.keras.layers.Flatten()(x)
+
+    # Fully connected layer with no activation.
+    x = tf.keras.layers.Dense(units=linear_layer_size)(x)
+
+    # Fully connected layer with ReLU activation.
+    x = tf.keras.layers.Dense(units=fc_size)(x)
+    x = tf.keras.layers.BatchNormalization()(x)
+    x = tf.keras.layers.ReLU()(x)
+    x = tf.keras.layers.Dropout(rate=0)(x)
+
+    # Output fully connected.
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_ds_cnn_model(model_settings, model_size_info):
+    """Builds a model with convolutional & depthwise separable convolutional layers.
+
+    For more details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+        model_size_info: Defines number of layers, followed by the DS-Conv layer
+            parameters in the order {number of conv features, conv filter height,
+            width and stride in y,x dir.} for each of the layers.
+
+    Returns:
+        tf.keras Model of the 'DS-CNN' architecture.
+    """
+
+    label_count = model_settings['label_count']
+    input_frequency_size = model_settings['dct_coefficient_count']
+    input_time_size = model_settings['spectrogram_length']
+
+    t_dim = input_time_size
+    f_dim = input_frequency_size
+
+    # Extract model dimensions from model_size_info.
+    num_layers = model_size_info[0]
+    conv_feat = [None]*num_layers
+    conv_kt = [None]*num_layers
+    conv_kf = [None]*num_layers
+    conv_st = [None]*num_layers
+    conv_sf = [None]*num_layers
+
+    i = 1
+    for layer_no in range(0, num_layers):
+        conv_feat[layer_no] = model_size_info[i]
+        i += 1
+        conv_kt[layer_no] = model_size_info[i]
+        i += 1
+        conv_kf[layer_no] = model_size_info[i]
+        i += 1
+        conv_st[layer_no] = model_size_info[i]
+        i += 1
+        conv_sf[layer_no] = model_size_info[i]
+        i += 1
+
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size']), name='input')
+
+    # Reshape the flattened input.
+    x = tf.reshape(inputs, shape=(-1, input_time_size, input_frequency_size, 1))
+
+    # Depthwise separable convolutions.
+    for layer_no in range(0, num_layers):
+        if layer_no == 0:
+            # First convolution.
+            x = tf.keras.layers.Conv2D(filters=conv_feat[0],
+                                       kernel_size=(conv_kt[0], conv_kf[0]),
+                                       strides=(conv_st[0], conv_sf[0]),
+                                       padding='SAME')(x)
+            x = tf.keras.layers.BatchNormalization()(x)
+            x = tf.keras.layers.ReLU()(x)
+        else:
+            # Depthwise convolution.
+            x = tf.keras.layers.DepthwiseConv2D(kernel_size=(conv_kt[layer_no], conv_kf[layer_no]),
+                                                strides=(conv_sf[layer_no], conv_st[layer_no]),
+                                                padding='SAME')(x)
+            x = tf.keras.layers.BatchNormalization()(x)
+            x = tf.keras.layers.ReLU()(x)
+
+            # Pointwise convolution.
+            x = tf.keras.layers.Conv2D(filters=conv_feat[layer_no], kernel_size=(1, 1))(x)
+            x = tf.keras.layers.BatchNormalization()(x)
+            x = tf.keras.layers.ReLU()(x)
+
+        t_dim = math.ceil(t_dim/float(conv_st[layer_no]))
+        f_dim = math.ceil(f_dim/float(conv_sf[layer_no]))
+
+    # Global average pool.
+    x = tf.keras.layers.AveragePooling2D(pool_size=(t_dim, f_dim), strides=1)(x)
+
+    # Squeeze before passing to output fully connected layer.
+    x = tf.reshape(x, shape=(-1, conv_feat[layer_no]))
+
+    # Output connected layer.
+    output = tf.keras.layers.Dense(units=label_count, activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
diff --git a/models/keyword_spotting/dnn_medium/model_package_tf/optimisations.py b/models/keyword_spotting/dnn_medium/model_package_tf/optimisations.py
new file mode 100644
index 0000000..16b6f4c
--- /dev/null
+++ b/models/keyword_spotting/dnn_medium/model_package_tf/optimisations.py
@@ -0,0 +1,259 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for optimizing simple keyword spotting models using clustering API."""
+
+import argparse
+from pathlib import Path
+
+import tensorflow as tf
+import numpy as np
+import tensorflow_model_optimization as tfmot
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+
+
+def print_model_weight_clusters(model):
+
+    for layer in model.layers:
+        if isinstance(layer, tf.keras.layers.Wrapper):
+            weights = layer.trainable_weights
+        else:
+            weights = layer.weights
+        for weight in weights:
+            if "kernel" in weight.name:
+                unique_count = len(np.unique(weight))
+                print(
+                    f"{layer.name}/{weight.name}: {unique_count} clusters "
+                )
+
+
+def optimize():
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    # Create the model to optimize from checkpoint.
+    model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info)
+    model.load_weights(FLAGS.checkpoint).expect_partial()
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    # We decay learning rate in a constant piecewise way to help learning.
+    training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(',')))
+    learning_rates_list = list(map(float, FLAGS.learning_rate.split(',')))
+    lr_boundary_list = training_steps_list[:-1]  # Only need the values at which to change lr.
+    lr_schedule = tf.keras.optimizers.schedules.PiecewiseConstantDecay(boundaries=lr_boundary_list,
+                                                                       values=learning_rates_list)
+
+    cluster_weights = tfmot.clustering.keras.cluster_weights
+    CentroidInitialization = tfmot.clustering.keras.CentroidInitialization
+
+    clustering_params = {
+        'number_of_clusters': 32,
+        'cluster_centroids_init': CentroidInitialization.KMEANS_PLUS_PLUS}
+
+    clustered_model = cluster_weights(model, **clustering_params)
+
+    # Specify the optimizer configurations.
+    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
+    clustered_model.compile(optimizer=optimizer,
+                            loss=tf.keras.losses.SparseCategoricalCrossentropy(),
+                            metrics=['accuracy'])
+
+    train_data = audio_processor.get_data(audio_processor.Modes.TRAINING,
+                                          FLAGS.background_frequency, FLAGS.background_volume,
+                                          int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000))
+    train_data = train_data.repeat().batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION)
+    val_data = val_data.batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+
+    # We train for a max number of iterations so need to calculate how many 'epochs' this will be.
+    training_steps_max = np.sum(training_steps_list)
+    training_epoch_max = int(np.ceil(training_steps_max / FLAGS.eval_step_interval))
+
+    # Train the model with clustering applied.
+    clustered_model.fit(x=train_data,
+                        steps_per_epoch=FLAGS.eval_step_interval,
+                        epochs=training_epoch_max,
+                        validation_data=val_data)
+
+    stripped_clustered_model = tfmot.clustering.keras.strip_clustering(clustered_model)
+
+    print_model_weight_clusters(stripped_clustered_model)
+
+    # Save the clustered model weights
+    train_dir = Path(FLAGS.train_dir) / "optimized"
+    train_dir.mkdir(parents=True, exist_ok=True)
+
+    stripped_clustered_model.save_weights((train_dir /
+                                          (FLAGS.model_architecture +
+                                           "_clustered_ckpt")))
+
+    # Test the model.
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING)
+    test_data = test_data.batch(FLAGS.batch_size)
+
+    stripped_clustered_model.compile(optimizer=optimizer,
+                                     loss=tf.keras.losses.SparseCategoricalCrossentropy(),
+                                     metrics=['accuracy'])
+
+    test_loss, test_acc = stripped_clustered_model.evaluate(x=test_data)
+    print(f'Final test accuracy: {test_acc*100:.2f}%')
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--background_volume',
+        type=float,
+        default=0.1,
+        help="""\
+        How loud the background noise should be, between 0 and 1.
+        """)
+    parser.add_argument(
+        '--background_frequency',
+        type=float,
+        default=0.8,
+        help="""\
+        How many of the training samples have background noise mixed in.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--time_shift_ms',
+        type=float,
+        default=100.0,
+        help="""\
+        Range to randomly shift the training audio by in time.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--how_many_training_steps',
+        type=str,
+        default='3750,750',
+        help='How many training loops to run',)
+    parser.add_argument(
+        '--eval_step_interval',
+        type=int,
+        default=400,
+        help='How often to evaluate the training results.')
+    parser.add_argument(
+        '--learning_rate',
+        type=str,
+        default='0.001,0.0001',
+        help='How large a learning rate to use when training.')
+    parser.add_argument(
+        '--batch_size',
+        type=int,
+        default=100,
+        help='How many items to train with at once',)
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--train_dir',
+        type=str,
+        default='/tmp/speech_commands_train',
+        help='Directory to write event logs and checkpoint.')
+    parser.add_argument(
+        '--save_step_interval',
+        type=int,
+        default=100,
+        help='Save model checkpoint every save_steps.')
+    parser.add_argument(
+        '--checkpoint',
+        type=str,
+        help='Checkpoint to load the weights from before fine-tuning.')
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+
+    FLAGS, _ = parser.parse_known_args()
+    optimize()
diff --git a/models/keyword_spotting/dnn_medium/model_package_tf/recreate_model.sh b/models/keyword_spotting/dnn_medium/model_package_tf/recreate_model.sh
new file mode 100644
index 0000000..2a465cf
--- /dev/null
+++ b/models/keyword_spotting/dnn_medium/model_package_tf/recreate_model.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+# Copyright (C) 2023 Arm Limited or its affiliates. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+ckpt_path=model_archive/model_source/weights/dnn_0.86_ckpt
+train=false
+
+# Parse command line args
+while (( $# >= 1 )); do 
+    case $1 in
+    --ckpt)
+       if [ "$2" ]; then
+           ckpt_path=$2
+           shift
+       else
+           printf 'ERROR: "--ckpt" requires a path to be supplied.\n'
+           exit 1
+       fi
+       ;;
+    --train) 
+    	train=true
+	break;;
+    *) shift;
+    esac;
+done
+
+
+# DNN Medium training
+if [ "$train" = true ]
+then
+python train.py --model_architecture dnn --model_size_info 256 256 256 --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 40 --learning_rate 0.0005,0.0001,0.00002 --how_many_training_steps 10000,10000,10000 --summaries_dir work/DNN/DNN_M/retrain_logs --train_dir work/DNN/DNN_M/training
+fi
+
+# Conversion to TFLite fp32
+python convert_to_tflite.py --model_architecture dnn --model_size_info 256 256 256 --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 40 --checkpoint $ckpt_path --no-quantize
+
+# Conversion to TFLite int8
+python convert_to_tflite.py --model_architecture dnn --model_size_info 256 256 256 --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 40 --checkpoint $ckpt_path --inference_type int8
+
diff --git a/models/keyword_spotting/dnn_medium/model_package_tf/requirements.txt b/models/keyword_spotting/dnn_medium/model_package_tf/requirements.txt
new file mode 100644
index 0000000..3448cff
--- /dev/null
+++ b/models/keyword_spotting/dnn_medium/model_package_tf/requirements.txt
@@ -0,0 +1,3 @@
+numpy == 1.19.5
+tensorflow == 2.5.0
+tensorflow-model-optimization == 0.6.0
\ No newline at end of file
diff --git a/models/keyword_spotting/dnn_medium/model_package_tf/train.py b/models/keyword_spotting/dnn_medium/model_package_tf/train.py
new file mode 100644
index 0000000..8c488b3
--- /dev/null
+++ b/models/keyword_spotting/dnn_medium/model_package_tf/train.py
@@ -0,0 +1,227 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for training simple keyword spotting models."""
+
+import argparse
+from pathlib import Path
+
+import tensorflow as tf
+import numpy as np
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+
+
+def train():
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    # Create the model.
+    model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info, True)
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    # We decay learning rate in a constant piecewise way to help learning.
+    training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(',')))
+    learning_rates_list = list(map(float, FLAGS.learning_rate.split(',')))
+    lr_boundary_list = training_steps_list[:-1]  # Only need the values at which to change lr.
+    lr_schedule = tf.keras.optimizers.schedules.PiecewiseConstantDecay(boundaries=lr_boundary_list,
+                                                                       values=learning_rates_list)
+
+    # Specify the optimizer configurations.
+    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
+    model.compile(optimizer=optimizer,
+                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
+                  metrics=['accuracy'])
+
+    train_data = audio_processor.get_data(audio_processor.Modes.TRAINING,
+                                          FLAGS.background_frequency, FLAGS.background_volume,
+                                          int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000))
+    train_data = train_data.repeat().batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION)
+    val_data = val_data.batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+
+    # We train for a max number of iterations so need to calculate how many 'epochs' this will be.
+    training_steps_max = np.sum(training_steps_list)
+    training_epoch_max = int(np.ceil(training_steps_max / FLAGS.eval_step_interval))
+
+    # Callbacks.
+    train_dir = Path(FLAGS.train_dir) / "best"
+    train_dir.mkdir(parents=True, exist_ok=True)
+    model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
+        filepath=(train_dir / (FLAGS.model_architecture + "_{val_accuracy:.3f}_ckpt")),
+        save_weights_only=True,
+        monitor='val_accuracy',
+        mode='max',
+        save_best_only=True)
+    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=FLAGS.summaries_dir)
+
+    # Train the model.
+    model.fit(x=train_data,
+              steps_per_epoch=FLAGS.eval_step_interval,
+              epochs=training_epoch_max,
+              validation_data=val_data,
+              callbacks=[model_checkpoint_callback, tensorboard_callback])
+
+    # Test and save the model.
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING)
+    test_data = test_data.batch(FLAGS.batch_size)
+
+    test_loss, test_acc = model.evaluate(x=test_data)
+    print(f'Final test accuracy: {test_acc*100:.2f}%')
+    model.save(f'saved_model/{FLAGS.model_architecture}')
+    model.save(f'keras/{FLAGS.model_architecture}.h5')
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--background_volume',
+        type=float,
+        default=0.1,
+        help="""\
+        How loud the background noise should be, between 0 and 1.
+        """)
+    parser.add_argument(
+        '--background_frequency',
+        type=float,
+        default=0.8,
+        help="""\
+        How many of the training samples have background noise mixed in.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--time_shift_ms',
+        type=float,
+        default=100.0,
+        help="""\
+        Range to randomly shift the training audio by in time.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--how_many_training_steps',
+        type=str,
+        default='15000,3000',
+        help='How many training loops to run',)
+    parser.add_argument(
+        '--eval_step_interval',
+        type=int,
+        default=400,
+        help='How often to evaluate the training results.')
+    parser.add_argument(
+        '--learning_rate',
+        type=str,
+        default='0.001,0.0001',
+        help='How large a learning rate to use when training.')
+    parser.add_argument(
+        '--batch_size',
+        type=int,
+        default=100,
+        help='How many items to train with at once',)
+    parser.add_argument(
+        '--summaries_dir',
+        type=str,
+        default='/tmp/retrain_logs',
+        help='Where to save summary logs for TensorBoard.')
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--train_dir',
+        type=str,
+        default='/tmp/speech_commands_train',
+        help='Directory to write event logs and checkpoint.')
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+
+    FLAGS, _ = parser.parse_known_args()
+    train()
diff --git a/models/keyword_spotting/dnn_medium/model_package_tf/validation_utils/labels.txt b/models/keyword_spotting/dnn_medium/model_package_tf/validation_utils/labels.txt
new file mode 100644
index 0000000..ba41645
--- /dev/null
+++ b/models/keyword_spotting/dnn_medium/model_package_tf/validation_utils/labels.txt
@@ -0,0 +1,12 @@
+_silence_
+_unknown_
+yes
+no
+up
+down
+left
+right
+on
+off
+stop
+go
\ No newline at end of file
diff --git a/models/keyword_spotting/dnn_medium/tflite_int8/README.md b/models/keyword_spotting/dnn_medium/tflite_int8/README.md
deleted file mode 100644
index cfc52ce..0000000
--- a/models/keyword_spotting/dnn_medium/tflite_int8/README.md
+++ /dev/null
@@ -1,59 +0,0 @@
-# DNN Medium INT8
-
-## Description
-This is a fully quantized version (asymmetrical int8) of the DNN Medium model developed by Arm, with training checkpoints, from the Hello Edge paper. Code to recreate this model can be found here: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m
-
-## License
-[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
-
-## Related Materials
-### Class Labels
-The class labels associated with this model can be downloaded by running the script `get_class_labels.sh`.
-
-### Model Recreation Code
-Code to recreate this model can be found here: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m.
-
-## Network Information
-| Network Information |  Value         |
-|---------------------|------------------|
-|  Framework          | TensorFlow Lite |
-|  SHA-1 Hash         | 7e138f99cfc6a603a1fc735a2d9c3e28a41a6a43 |
-|  Size (Bytes)       | 203832 |
-|  Provenance         | https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m |
-|  Paper              | https://arxiv.org/abs/1711.07128 |
-
-## Accuracy
-Dataset: Google Speech Commands Test Set
-
-| Metric | Value |
-|--------|-------|
-| Accuracy | 0.844 |
-
-## Performance
-| Platform | Optimized |
-|----------|:---------:|
-| Cortex-A |:heavy_check_mark:         |
-| Cortex-M |:heavy_check_mark:         |
-| Mali GPU |:heavy_check_mark:         |
-| Ethos U  |:heavy_check_mark:         |
-
-### Key
-* :heavy_check_mark: - Will run on this platform.
-* :heavy_multiplication_x: - Will not run on this platform.
-
-
-
-## Optimizations
-| Optimization |  Value  |
-|-----------------|---------|
-| Quantization | INT8 |
-
-## Network Inputs
-| Input Node Name |  Shape  | Description |
-|-----------------|---------|-------------|
-| input | (1, 250) | The input is a processed MFCCs of shape (1, 250) |
-
-## Network Outputs
-| Output Node Name |  Shape  | Description |
-|------------------|---------|-------------|
-| Identity | (1, 12) | The probability on 12 keywords. |
diff --git a/models/keyword_spotting/dnn_medium/tflite_int8/definition.yaml b/models/keyword_spotting/dnn_medium/tflite_int8/definition.yaml
deleted file mode 100644
index abcfbd8..0000000
--- a/models/keyword_spotting/dnn_medium/tflite_int8/definition.yaml
+++ /dev/null
@@ -1,41 +0,0 @@
-benchmark:
-  Google Speech Commands test set:
-    Accuracy: 84.44%
-description: 'This is a fully quantized version (asymmetrical int8) of the DNN Medium
-  model developed by Arm, with training checkpoints, from the Hello Edge paper. Code
-  to recreate this model can be found here: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m'
-license:
-- Apache-2.0
-network:
-  file_size_bytes: 203832
-  filename: dnn_m_quantized.tflite
-  framework: TensorFlow Lite
-  hash:
-    algorithm: sha1
-    value: 7e138f99cfc6a603a1fc735a2d9c3e28a41a6a43
-  provenance: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m
-  quality_level: null
-network_parameters:
-  input_nodes:
-  - description: The input is a processed MFCCs of shape (1, 250)
-    example_input:
-      path: models/keyword_spotting/dnn_medium/tflite_int8/testing_input/input
-    name: input
-    shape:
-    - 1
-    - 250
-  output_nodes:
-  - description: The probability on 12 keywords.
-    name: Identity
-    shape:
-    - 1
-    - 12
-    test_output_path: models/keyword_spotting/dnn_medium/tflite_int8/testing_output/Identity
-operators:
-  TensorFlow Lite:
-  - DEQUANTIZE
-  - FULLY_CONNECTED
-  - QUANTIZE
-  - RELU
-  - SOFTMAX
-paper: https://arxiv.org/abs/1711.07128
diff --git a/models/keyword_spotting/dnn_medium/tflite_int8/get_class_labels.sh b/models/keyword_spotting/dnn_medium/tflite_int8/get_class_labels.sh
deleted file mode 100755
index e59caf5..0000000
--- a/models/keyword_spotting/dnn_medium/tflite_int8/get_class_labels.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (C) 2021 Arm Limited or its affiliates. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the License); you may
-# not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an AS IS BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#!/usr/bin/env bash
-
-wget https://raw.githubusercontent.com/ARM-software/ML-KWS-for-MCU/e9cf319e9aa2ff71d433e111477dd95329fb94cb/Pretrained_models/labels.txt
-mv labels.txt labelmappings.txt
\ No newline at end of file
diff --git a/models/keyword_spotting/dnn_small/model_package_tf/README.md b/models/keyword_spotting/dnn_small/model_package_tf/README.md
new file mode 100644
index 0000000..7d73dab
--- /dev/null
+++ b/models/keyword_spotting/dnn_small/model_package_tf/README.md
@@ -0,0 +1,115 @@
+# DNN Small model package
+
+This folder contains code that will allow you to recreate the DNN Small keyword spotting model from
+the [Hello Edge paper](https://arxiv.org/pdf/1711.07128.pdf).
+
+## License
+[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
+
+## Model Package Overview
+| Model           	|   DNN_Small                            	   |
+|:---------------:	|:------------------------------------------:|
+| <u>**Format**</u>:          	| Keras, Saved Model, TensorFlow Lite int8, TensorFlow Lite fp32 |
+| <u>**Feature**</u>:         	|   Keyword spotting for Arm Cortex-M CPUs   |
+| <u>**Architectural Delta w.r.t. Vanilla**</u>: |                    None                    |
+| <u>**Domain**</u>:         	|              Keyword spotting              |
+| <u>**Package Quality**</u>: 	|                 Optimised                  |
+
+## Model Recreation
+
+In order to recreate the model you will first need to be using ```Python3.7``` and install the requirements in ```requirements.txt```.
+
+Once you have these requirements satisfied you can execute the recreation script contained within this folder, just run:
+
+```bash
+bash ./recreate_model.sh
+```
+
+Running this script will use the pre-trained checkpoint files supplied in the ```./model_archive/model_source/weights``` folder
+to generate the TFLite files and perform evaluation on the test sets. Both an fp32 version and a quantized version will be produced.
+The quantized version will use post-training quantization to fully quantize it.
+
+If you want to run training from scratch you can do this by supplying ```--train``` when running the script. For example:
+
+```bash
+bash ./recreate_model.sh --train
+```
+
+Training is then performed and should produce a model to the stated accuracy in this repository.
+Note that exporting to TFLite will still happen with the pre-trained checkpoint files so you will need to re-run the script
+and this time supply the path to the new checkpoint files you want to use, for example:
+
+```bash
+bash ./recreate_model.sh --ckpt <checkpoint_path>
+```
+
+
+## Training
+
+To train a DNN with 3 fully-connected layers with 128 neurons in each layer, run:
+
+```
+python train.py --model_architecture dnn --model_size_info 128 128 128
+```
+The command line argument *--model_size_info* is used to pass the neural network layer
+dimensions such as number of layers, convolution filter size/stride as a list to models.py,
+which builds the TensorFlow graph based on the provided model architecture
+and layer dimensions. For more info on *model_size_info* for each network architecture see
+[models.py](models.py).
+
+The training commands with all the hyperparameters to reproduce the models shown in the
+[paper](https://arxiv.org/pdf/1711.07128.pdf) are given [here](recreate_model.sh).
+
+## Testing
+To run inference on the trained model from a checkpoint and get accuracy on validation and test sets, run:
+```
+python evaluation.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>
+```
+The parameters used here should match those used in the Training step.
+
+## Optimization
+
+We introduce a new *optional* step to optimize the trained keyword spotting model for deployment.
+
+Here we use TensorFlow's [weight clustering API](https://www.tensorflow.org/model_optimization/guide/clustering) to reduce the compressed model size and optimize inference on supported hardware. 32 weight clusters and kmeans++ cluster intialization method are used as the clustering hyperparameters.
+
+To optimize your trained model (e.g. a DNN), a trained model checkpoint is needed to run clustering and fine-tuning on.
+You can use the pre-trained checkpoints provided, or train your own model and use the resulting checkpoint.
+
+To apply the optimization and fine-tuning, run the following command:
+```
+python optimisations.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>
+```
+The parameters used here should match those used in the Training step, except for the number of training steps.
+The number of training steps is reduced since the optimization step only requires fine-tuning.
+
+This will generate a clustered model checkpoint that can be used in the quantization step to generate a quantized and clustered TFLite model.
+
+## Quantization and TFLite Conversion
+
+As part of the update we now use TensorFlow's
+[post training quantization](https://www.tensorflow.org/lite/performance/post_training_quantization) to
+make quantization of the trained models super simple.
+
+To quantize your trained model (e.g. a DNN) run:
+```
+python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> [--inference_type int8|int16]
+```
+The parameters used here should match those used in the Training step.
+
+The inference_type parameter is *optional* and to be used if a fully quantized model with inputs and outputs of type int8 or int16 is needed. It defaults to fp32.
+
+This step will produce a quantized TFLite file *dnn_quantized.tflite*.
+You can test the accuracy of this quantized model on the test set by running:
+```
+python evaluation.py --tflite_path dnn_quantized.tflite
+```
+The parameters used here should match those used in the Training step.
+
+`convert_to_tflite.py` uses post-training quantization to generate a quantized model by default. If you wish to convert to a floating point TFLite model, use the command below:
+
+```
+python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> --no-quantize
+```
+
+This will produce a floating point TFLite file *dnn.tflite*. You can test the accuracy of this floating point model using `evaluation.py` as above.
diff --git a/models/keyword_spotting/dnn_small/model_package_tf/convert_to_tflite.py b/models/keyword_spotting/dnn_small/model_package_tf/convert_to_tflite.py
new file mode 100644
index 0000000..64ab8df
--- /dev/null
+++ b/models/keyword_spotting/dnn_small/model_package_tf/convert_to_tflite.py
@@ -0,0 +1,234 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for converting and quantizing a trained keyword spotting
+   model and saving to TFLite."""
+
+import argparse
+
+import tensorflow as tf
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+from evaluation import tflite_test
+
+NUM_REP_DATA_SAMPLES = 100  # How many samples to use for post training quantization.
+
+
+def convert(model_settings, audio_processor, checkpoint, quantize, inference_type, tflite_path):
+    """Load our trained floating point model and convert it.
+
+    TFLite conversion or post training quantization is performed and the
+    resulting model is saved as a TFLite file.
+    We use samples from the validation set to do post training quantization.
+
+    Args:
+        model_settings: Dictionary of common model settings.
+        audio_processor: Audio processor class object.
+        checkpoint: Path to training checkpoint to load.
+        quantize: Whether to quantize the model or convert to fp32 TFLite model.
+        inference_type: Input/output type of the quantized model.
+        tflite_path: Output TFLite file save path.
+    """
+    model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info, False)
+    model.load_weights(checkpoint).expect_partial()
+
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION).batch(1)
+
+    def _rep_dataset():
+        """Generator function to produce representative dataset."""
+        i = 0
+        for mfcc, label in val_data:
+            if i > NUM_REP_DATA_SAMPLES:
+                break
+            i += 1
+            yield [mfcc]
+
+    if quantize:
+        # Quantize model and save to disk.
+        tflite_model = post_training_quantize(model, inference_type, _rep_dataset)
+        with open(tflite_path, 'wb') as f:
+            f.write(tflite_model)
+        print(f'Quantized model saved to {tflite_path}.')
+    else:
+        converter = tf.lite.TFLiteConverter.from_keras_model(model)
+        tflite_model = converter.convert()
+        with open(tflite_path, 'wb') as f:
+            f.write(tflite_model)
+        print(f'Converted model saved to {tflite_path}.')
+
+
+def post_training_quantize(keras_model, inference_type, rep_dataset):
+    """Perform post training quantization and returns the TFLite model ready for saving.
+
+    See https://www.tensorflow.org/lite/performance/post_training_quantization#full_integer_quantization for
+    more details.
+
+    Args:
+        keras_model: The trained tf Keras model used for post training quantization.
+        inference_type: Input/output type of the quantized model.
+        rep_dataset: Function to use as a representative dataset, must be callable.
+
+    Returns:
+        Quantized TFLite model ready for saving to disk.
+    """
+    converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
+    converter.optimizations = [tf.lite.Optimize.DEFAULT]
+
+    if inference_type == 'int8':
+        converter.inference_input_type = tf.int8
+        converter.inference_output_type = tf.int8
+        supported_ops = tf.lite.OpsSet.TFLITE_BUILTINS_INT8
+    if inference_type == 'int16':
+        converter.inference_input_type = tf.int16
+        converter.inference_output_type = tf.int16
+        supported_ops = tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8
+
+    # Int8 post training quantization needs representative dataset.
+    converter.representative_dataset = rep_dataset
+    converter.target_spec.supported_ops = [supported_ops]
+
+    tflite_model = converter.convert()
+
+    return tflite_model
+
+
+def main():
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    if FLAGS.quantize:
+        tflite_path = f'{FLAGS.model_architecture}_quantized.tflite'
+    else:
+        tflite_path = f'{FLAGS.model_architecture}.tflite'
+
+    # Load floating point model from checkpoint and convert it.
+    convert(model_settings, audio_processor, FLAGS.checkpoint,
+            FLAGS.quantize, FLAGS.inference_type, tflite_path)
+
+    # Test the newly converted model on the test set.
+    tflite_test(model_settings, audio_processor, tflite_path)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+    parser.add_argument(
+        '--checkpoint',
+        type=str,
+        help='Checkpoint to load the weights from.')
+    parser.add_argument(
+        '--quantize',
+        dest='quantize',
+        action="store_true",
+        default=True,
+        help='Whether to quantize the model or convert to fp32 TFLite model. Defaults to True.')
+    parser.add_argument(
+        '--no-quantize',
+        dest='quantize',
+        action="store_false",
+        help='Whether to quantize the model or convert to fp32 TFLite model. Defaults to True.')
+    parser.add_argument(
+        '--inference_type',
+        type=str,
+        default='fp32',
+        help='If quantize is true, whether the model input and output is float32, int8 or int16')
+
+    FLAGS, _ = parser.parse_known_args()
+    main()
diff --git a/models/keyword_spotting/dnn_small/model_package_tf/data_processing/__init__.py b/models/keyword_spotting/dnn_small/model_package_tf/data_processing/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/models/keyword_spotting/dnn_small/model_package_tf/data_processing/data_preprocessing.py b/models/keyword_spotting/dnn_small/model_package_tf/data_processing/data_preprocessing.py
new file mode 100644
index 0000000..05cf5ba
--- /dev/null
+++ b/models/keyword_spotting/dnn_small/model_package_tf/data_processing/data_preprocessing.py
@@ -0,0 +1,462 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Modifications Copyright 2023 Arm Inc. All Rights Reserved.
+# Modified to use TensorFlow 2.0 and data pipelines.
+#
+"""Functions for loading and preparing data for keyword spotting."""
+
+import os
+import re
+import sys
+import urllib
+from pathlib import Path
+import tarfile
+import hashlib
+import random
+import math
+from enum import Enum
+
+import numpy as np
+import tensorflow as tf
+from tensorflow.python.ops import gen_audio_ops as audio_ops
+
+MAX_NUM_WAVS_PER_CLASS = 2**27 - 1  # ~134M
+RANDOM_SEED = 59185
+BACKGROUND_NOISE_DIR_NAME = '_background_noise_'
+SILENCE_LABEL = '_silence_'
+SILENCE_INDEX = 0
+UNKNOWN_WORD_INDEX = 1
+UNKNOWN_WORD_LABEL = '_unknown_'
+
+
+def load_wav_file(wav_filename, desired_samples):
+    """Loads and then decodes a given 16bit PCM wav file.
+
+    Decoded audio is scaled to the range [-1, 1] and padded or cropped to the desired number of samples.
+
+    Args:
+        wav_filename: 16bit PCM wav file to load.
+        desired_samples: Number of samples wanted from the audio file.
+
+    Returns:
+        Tuple consisting of the decoded audio and sample rate.
+    """
+    wav_file = tf.io.read_file(wav_filename)
+    decoded_wav = audio_ops.decode_wav(wav_file, desired_channels=1, desired_samples=desired_samples)
+
+    return decoded_wav.audio, decoded_wav.sample_rate
+
+
+def calculate_mfcc(audio_signal, audio_sample_rate, window_size, window_stride, num_mfcc):
+    """Returns Mel Frequency Cepstral Coefficients (MFCC) for a given audio signal.
+
+    Args:
+        audio_signal: Raw audio signal in range [-1, 1]
+        audio_sample_rate: Audio signal sample rate
+        window_size: Window size in samples for calculating spectrogram
+        window_stride: Window stride in samples for calculating spectrogram
+        num_mfcc: The number of MFCC features wanted.
+
+    Returns:
+        Calculated mffc features.
+    """
+    spectrogram = audio_ops.audio_spectrogram(input=audio_signal, window_size=window_size, stride=window_stride,
+                                              magnitude_squared=True)
+
+    mfcc_features = audio_ops.mfcc(spectrogram, audio_sample_rate, dct_coefficient_count=num_mfcc)
+
+    return mfcc_features
+
+
+def which_set(filename, validation_percentage, testing_percentage):
+    """Determines which data partition the file should belong to.
+
+    We want to keep files in the same training, validation, or testing sets even
+    if new ones are added over time. This makes it less likely that testing
+    samples will accidentally be reused in training when long runs are restarted
+    for example. To keep this stability, a hash of the filename is taken and used
+    to determine which set it should belong to. This determination only depends on
+    the name and the set proportions, so it won't change as other files are added.
+    It's also useful to associate particular files as related (for example words
+    spoken by the same person), so anything after '_nohash_' in a filename is
+    ignored for set determination. This ensures that 'bobby_nohash_0.wav' and
+    'bobby_nohash_1.wav' are always in the same set, for example.
+
+    Args:
+        filename: File path of the data sample.
+        validation_percentage: How much of the data set to use for validation.
+        testing_percentage: How much of the data set to use for testing.
+
+    Returns:
+        String, one of 'training', 'validation', or 'testing'.
+    """
+    base_name = os.path.basename(filename)
+    # We want to ignore anything after '_nohash_' in the file name when
+    # deciding which set to put a wav in, so the data set creator has a way of
+    # grouping wavs that are close variations of each other.
+    hash_name = re.sub(r'_nohash_.*$', '', base_name)
+    # This looks a bit magical, but we need to decide whether this file should
+    # go into the training, testing, or validation sets, and we want to keep
+    # existing files in the same set even if more files are subsequently
+    # added.
+    # To do that, we need a stable way of deciding based on just the file name
+    # itself, so we do a hash of that and then use that to generate a
+    # probability value that we use to assign it.
+    hash_name_hashed = hashlib.sha1(tf.compat.as_bytes(hash_name)).hexdigest()
+    percentage_hash = ((int(hash_name_hashed, 16) %
+                       (MAX_NUM_WAVS_PER_CLASS + 1)) *
+                       (100.0 / MAX_NUM_WAVS_PER_CLASS))
+    if percentage_hash < validation_percentage:
+        result = 'validation'
+    elif percentage_hash < (testing_percentage + validation_percentage):
+        result = 'testing'
+    else:
+        result = 'training'
+    return result
+
+
+def prepare_words_list(wanted_words):
+    """Prepends common tokens to the custom word list.
+
+    Args:
+        wanted_words: List of strings containing custom words to spot.
+
+    Returns:
+        List of words with silence and unknown tokens added.
+    """
+    return [SILENCE_LABEL, UNKNOWN_WORD_LABEL] + wanted_words
+
+
+class AudioProcessor:
+    """Handles loading, partitioning, and preparing audio training data."""
+
+    class Modes(Enum):
+        TRAINING = 1
+        VALIDATION = 2
+        TESTING = 3
+
+    def __init__(self, data_url, data_dir, silence_percentage, unknown_percentage,
+                 wanted_words, validation_percentage, testing_percentage, model_settings):
+        self.data_dir = Path(data_dir)
+        self.model_settings = model_settings
+        self.words_list = prepare_words_list(wanted_words)
+
+        self._tf_datasets = {}
+        self.background_data = None
+        self._set_size = {'training': 0, 'validation': 0, 'testing': 0}
+
+        self._download_and_extract_data(data_url, data_dir)
+        self._prepare_datasets(silence_percentage, unknown_percentage, wanted_words,
+                               validation_percentage, testing_percentage)
+        self._prepare_background_data()
+
+    def get_data(self, mode, background_frequency=0, background_volume_range=0, time_shift=0):
+        """Returns the train, validation or test set for KWS as a TF Dataset.
+
+        Args:
+            mode: The set to return, see AudioProcessor.Modes enumeration.
+            background_frequency: How many of the samples have background noise mixed in.
+            background_volume_range: How loud the background noise should be, between 0 and 1.
+            time_shift: Range to randomly shift the training audio by in time.
+
+        Returns:
+            TF dataset that will generate tuples containing an mfcc and corresponding label.
+
+        Raises:
+            ValueError: If mode is not recognised.
+        """
+        if mode == AudioProcessor.Modes.TRAINING:
+            dataset = self._tf_datasets['training']
+        elif mode == AudioProcessor.Modes.VALIDATION:
+            dataset = self._tf_datasets['validation']
+        elif mode == AudioProcessor.Modes.TESTING:
+            dataset = self._tf_datasets['testing']
+        else:
+            ValueError("Incorrect dataset type given")
+
+        use_background = (self.background_data is not None) and (mode == AudioProcessor.Modes.TRAINING)
+        dataset = dataset.map(lambda path, label: self._process_path(path, label, self.model_settings,
+                                                                     background_frequency, background_volume_range,
+                                                                     time_shift, use_background, self.background_data),
+                              num_parallel_calls=tf.data.experimental.AUTOTUNE)
+
+        return dataset
+
+    def set_size(self, mode):
+        """Get the number of samples in the requested dataset partition.
+
+        Args:
+            mode: Which partition, see AudioProcessor.Modes enumeration.
+
+        Returns:
+            Number of samples in the partition.
+
+        Raises:
+            ValueError: If mode is not recognised.
+        """
+        if mode == AudioProcessor.Modes.TRAINING:
+            return self._set_size['training']
+        elif mode == AudioProcessor.Modes.VALIDATION:
+            return self._set_size['validation']
+        elif mode == AudioProcessor.Modes.TESTING:
+            return self._set_size['testing']
+        else:
+            ValueError('Incorrect dataset type given')
+
+    @staticmethod
+    def _process_path(path, label, model_settings, background_frequency, background_volume_range, time_shift_samples,
+                      use_background, background_data):
+        """Load wav files and calculate mfcc features.
+
+        Random shifting of samples and adding in background noise is done within this function as well.
+        This function is meant to be mapped onto a TF Dataset by using a lambda function.
+
+        Args:
+            path: Path to the wav file to load.
+            label: Integer label for classifying the audio clip.
+            model_settings: Dictionary of settings for model being trained.
+            background_frequency: How many clips will have background noise, 0.0 to 1.0.
+            background_volume_range: How loud the background noise will be.
+            time_shift_samples: How much to randomly shift the clips by.
+            use_background: Add in background noise to audio clips or not.
+            background_data: Ragged tensor of loaded background noise samples.
+
+        Returns:
+            Tuple of calculated flattened mfcc and its class label.
+        """
+
+        desired_samples = model_settings['desired_samples']
+        audio, sample_rate = load_wav_file(path, desired_samples=desired_samples)
+
+        # Make our own silence audio data.
+        if label == SILENCE_INDEX:
+            audio = tf.multiply(audio, 0)
+
+        # Shift samples start position and pad any gaps with zeros.
+        if time_shift_samples > 0:
+            time_shift_amount = tf.random.uniform(shape=(), minval=-time_shift_samples, maxval=time_shift_samples,
+                                                  dtype=tf.int32)
+        else:
+            time_shift_amount = 0
+        if time_shift_amount > 0:
+            time_shift_padding = [[time_shift_amount, 0], [0, 0]]
+            time_shift_offset = [0, 0]
+        else:
+            time_shift_padding = [[0, -time_shift_amount], [0, 0]]
+            time_shift_offset = [-time_shift_amount, 0]
+
+        padded_foreground = tf.pad(audio, time_shift_padding, mode='CONSTANT')
+        sliced_foreground = tf.slice(padded_foreground, time_shift_offset, [desired_samples, -1])
+
+        # Get a random section of background noise.
+        if use_background:
+            background_index = tf.random.uniform(shape=(), maxval=background_data.shape[0], dtype=tf.int32)
+            background_sample = background_data[background_index]
+            background_offset = tf.random.uniform(shape=(), maxval=len(background_sample)-desired_samples,
+                                                  dtype=tf.int32)
+            background_clipped = background_sample[background_offset:(background_offset + desired_samples)]
+            background_reshaped = tf.reshape(background_clipped, [desired_samples, 1])
+            if tf.random.uniform(shape=(), maxval=1) < background_frequency:
+                background_volume = tf.random.uniform(shape=(), maxval=background_volume_range)
+            else:
+                background_volume = tf.constant(0, dtype='float32')
+        else:
+            background_reshaped = np.zeros([desired_samples, 1], dtype=np.float32)
+            background_volume = tf.constant(0, dtype='float32')
+
+        # Mix in background noise.
+        background_mul = tf.multiply(background_reshaped, background_volume)
+        background_add = tf.add(background_mul, sliced_foreground)
+        background_clamp = tf.clip_by_value(background_add, -1.0, 1.0)
+
+        mfcc = calculate_mfcc(background_clamp, sample_rate, model_settings['window_size_samples'],
+                              model_settings['window_stride_samples'],
+                              model_settings['dct_coefficient_count'])
+        mfcc = tf.reshape(mfcc, [-1])
+
+        return mfcc, label
+
+    def _download_and_extract_data(self, data_url, target_directory):
+        """Downloads and extracts file to target directory.
+
+        If the file does not already exist download it and then untar into the target directory.
+
+        Args:
+            data_url: Web link to the tarred data to download.
+            target_directory: Directory to download and extract to.
+        """
+        target_directory = Path(target_directory)
+        target_directory.mkdir(exist_ok=True)
+
+        filename = data_url.split('/')[-1]
+        filepath = target_directory / filename
+
+        if not filepath.exists():
+            def _report_hook(block_num, block_size, total_size):
+                """Function to track download progress in urllib"""
+                read_so_far = block_num * block_size
+                percent = (read_so_far / total_size) * 100.0
+
+                s = f"\rDownloading {filename} {percent:.1f}%"
+
+                sys.stdout.write(s)
+                sys.stdout.flush()
+
+            filepath, _ = urllib.request.urlretrieve(data_url, filepath, _report_hook)
+            print()
+
+        print(f'Untarring {filename}...')
+        tarfile.open(filepath, 'r:gz').extractall(target_directory)
+
+    def _prepare_datasets(self, silence_percentage, unknown_percentage, wanted_words,
+                          validation_percentage, testing_percentage):
+        """Split the data into train, validation and testing sets.
+
+        Silence and unknown data is added, then sets are converted to TF Datasets.
+
+        Args:
+            silence_percentage: Percent of words should be silence.
+            unknown_percentage: Percent of words that should be unknown.
+            wanted_words: List of words wanted to classify.
+            validation_percentage: Percent to split off for validation.
+            testing_percentage: Percent to split off for testing.
+        """
+        # Make sure the shuffling and picking of unknowns is deterministic.
+        random.seed(RANDOM_SEED)
+        wanted_words_index = {}
+
+        for index, wanted_word in enumerate(wanted_words):
+            wanted_words_index[wanted_word] = index + 2
+
+        # Find all wav files in subfolders.
+        search_path = self.data_dir / '*' / '*.wav'
+        data_index, unknown_index, all_words = self._find_and_sort_wavs(search_path, validation_percentage,
+                                                                        testing_percentage, wanted_words_index)
+
+        for index, wanted_word in enumerate(wanted_words):
+            if wanted_word not in all_words:
+                raise Exception(f'Tried to find {wanted_word} in labels but only found: {", ".join(all_words.keys())}')
+
+        word_to_index = {}
+        for word in all_words:
+            if word in wanted_words_index:
+                word_to_index[word] = wanted_words_index[word]
+            else:
+                word_to_index[word] = UNKNOWN_WORD_INDEX
+        word_to_index[SILENCE_LABEL] = SILENCE_INDEX
+
+        # We need an arbitrary file to load as the input for the silence samples.
+        # It's multiplied by zero later, so the content doesn't matter.
+        silence_wav_path = data_index['training'][0]['file']
+        for set_index in ['validation', 'testing', 'training']:
+            set_size = len(data_index[set_index])  # Size before adding silence and unknown samples.
+            silence_size = int(math.ceil(set_size * silence_percentage / 100))
+            for _ in range(silence_size):
+                data_index[set_index].append({
+                    'label': SILENCE_LABEL,
+                    'file': silence_wav_path
+                })
+            # Pick some unknowns to add to each partition of the data set.
+            random.shuffle(unknown_index[set_index])
+            unknown_size = int(math.ceil(set_size * unknown_percentage / 100))
+            data_index[set_index].extend(unknown_index[set_index][:unknown_size])
+
+            self._set_size[set_index] = len(data_index[set_index])  # Size after adding silence and unknown samples.
+
+            # Make sure the ordering is random.
+            random.shuffle(data_index[set_index])
+
+            # Transform into TF Datasets ready for easier processing later.
+            labels, paths = list(zip(*[d.values() for d in data_index[set_index]]))
+            labels = [word_to_index[label] for label in labels]
+            self._tf_datasets[set_index] = tf.data.Dataset.from_tensor_slices((list(paths), labels))
+
+    def _find_and_sort_wavs(self, search_pattern, validation_percentage, testing_percentage, wanted_words_index):
+        """Find and sort wav files into known and unknown word sets.
+
+        Known words are files containing words in the list of wanted words.
+        Any other clip goes to the unknown label set. Labels come from the folder names.
+        All clips are also assigned to train, test and validation sets.
+
+        Args:
+            search_pattern: Path pattern used by glob to find wav files.
+            validation_percentage: Percent to split off for validation.
+            testing_percentage: Percent to split off for testing.
+            wanted_words_index: Dict mapping wanted words to their label index.
+
+        Returns:
+            3-tuple of known words, unknown words and mapping of all word labels.
+        """
+        data_index = {'validation': [], 'testing': [], 'training': []}
+        unknown_index = {'validation': [], 'testing': [], 'training': []}
+        all_words = {}
+
+        for wav_path in sorted(tf.io.gfile.glob(str(search_pattern))):
+            word = Path(wav_path).parent.name.lower()
+
+            # Treat the '_background_noise_' folder as a special case, since we expect
+            # it to contain long audio samples we mix in to improve training.
+            if word == BACKGROUND_NOISE_DIR_NAME:
+                continue
+
+            all_words[word] = True
+            set_index = which_set(wav_path, validation_percentage, testing_percentage)
+            # If it's a known class, store its detail, otherwise add it to the list
+            # we'll use to train the unknown label.
+            if word in wanted_words_index:
+                data_index[set_index].append({'label': word, 'file': wav_path})
+            else:
+                unknown_index[set_index].append({'label': word, 'file': wav_path})
+        if not all_words:
+            raise Exception('No .wavs found at ' + str(search_pattern))
+
+        return data_index, unknown_index, all_words
+
+    def _prepare_background_data(self):
+        """Searches a folder for background noise audio, and loads it into memory.
+
+        It's expected that the background audio samples will be in a subdirectory
+        named '_background_noise_' inside the 'data_dir' folder, as .wavs that match
+        the sample rate of the training data, but can be much longer in duration.
+
+        If the '_background_noise_' folder doesn't exist at all, this isn't an
+        error, it's just taken to mean that no background noise augmentation should
+        be used. If the folder does exist, but it's empty, that's treated as an
+        error.
+
+        Returns:
+          Ragged tensor of raw PCM-encoded audio samples of background noise.
+          None if '_background_noise_' folder doesnt exist.
+
+        Raises:
+          Exception: If files aren't found in the folder.
+        """
+        background_data = []
+        background_dir = Path(self.data_dir / BACKGROUND_NOISE_DIR_NAME)
+        if not background_dir.exists():
+            self.background_data = None
+            return
+
+        search_path = Path(background_dir / '*.wav')
+        for wav_path in tf.io.gfile.glob(str(search_path)):
+            wav_data, _ = load_wav_file(wav_path, desired_samples=-1)
+            background_data.append(tf.reshape(wav_data, [-1]))
+
+        if not background_data:
+            raise Exception('No background wav files were found in ' + str(search_path))
+
+        # Ragged tensor as we cant use lists in tf dataset map functions.
+        self.background_data = tf.ragged.stack(background_data)
diff --git a/models/keyword_spotting/dnn_small/model_package_tf/dnn_s_inference_keras.py b/models/keyword_spotting/dnn_small/model_package_tf/dnn_s_inference_keras.py
new file mode 100644
index 0000000..db7694a
--- /dev/null
+++ b/models/keyword_spotting/dnn_small/model_package_tf/dnn_s_inference_keras.py
@@ -0,0 +1,76 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from data_processing.data_preprocessing import load_wav_file, calculate_mfcc
+
+import tensorflow as tf
+import argparse
+
+
+def load_labels(filename):
+    """Read in labels, one label per line."""
+    f = open(filename, "r")
+    return f.read().splitlines()
+
+
+def main():
+    window_size_samples = int(FLAGS.sample_rate * FLAGS.window_size_ms / 1000)
+    window_stride_samples = int(FLAGS.sample_rate * FLAGS.window_stride_ms / 1000)
+    decoded, sample = load_wav_file(FLAGS.wav, FLAGS.sample_rate)
+    x = calculate_mfcc(decoded, sample, window_size_samples, window_stride_samples, FLAGS.dct_coefficient_count)
+    x = tf.reshape(x, [1, -1])
+
+    model = tf.keras.models.load_model(FLAGS.keras_file_path)
+    predictions = model.predict(x)
+
+    # Sort to show labels in order of confidence
+    top_k = predictions[0].argsort()[-1:][::-1]
+    for node_id in top_k:
+        human_string = load_labels(FLAGS.labels)[int(node_id)]
+        score = predictions[0,node_id]
+        print(f'model predicted: {human_string} with score {score:.5f}')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--wav', type=str, default='', help='Audio file to be identified.')
+    parser.add_argument(
+        '--labels', type=str, default='', help='Path to file containing labels.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs', )
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint', )
+    parser.add_argument(
+        '--keras_file_path',
+        type=str,
+        default='',
+        help='Path to the .h5 Keras model file to use for testing.')
+    FLAGS, unparsed = parser.parse_known_args()
+    main()
diff --git a/models/keyword_spotting/dnn_small/model_package_tf/dnn_s_inference_tflite.py b/models/keyword_spotting/dnn_small/model_package_tf/dnn_s_inference_tflite.py
new file mode 100644
index 0000000..9f79d99
--- /dev/null
+++ b/models/keyword_spotting/dnn_small/model_package_tf/dnn_s_inference_tflite.py
@@ -0,0 +1,120 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from data_processing.data_preprocessing import load_wav_file, calculate_mfcc
+
+import tensorflow as tf
+import numpy as np
+import argparse
+
+
+def tflite_inference(input_data, tflite_path):
+    """Call forwards pass of TFLite file and returns the result.
+
+    Args:
+        input_data: Input data to use on forward pass.
+        tflite_path: Path to TFLite file to run.
+
+    Returns:
+        Output from inference.
+    """
+    supported_quant_dtypes = (np.int8, np.int16)
+    interpreter = tf.lite.Interpreter(model_path=tflite_path)
+    interpreter.allocate_tensors()
+
+    input_details = interpreter.get_input_details()
+    output_details = interpreter.get_output_details()
+
+    input_dtype = input_details[0]["dtype"]
+    output_dtype = output_details[0]["dtype"]
+
+    # Check if the input/output type is quantized,
+    # set scale and zero-point accordingly
+    if input_dtype in supported_quant_dtypes:
+        input_scale, input_zero_point = input_details[0]["quantization"]
+    else:
+        input_scale, input_zero_point = 1, 0
+
+    input_data = input_data / input_scale + input_zero_point
+    input_data = np.round(input_data) if input_dtype in supported_quant_dtypes else input_data
+
+    if output_dtype in supported_quant_dtypes:
+        output_scale, output_zero_point = output_details[0]["quantization"]
+    else:
+        output_scale, output_zero_point = 1, 0
+
+    interpreter.set_tensor(input_details[0]['index'], tf.cast(input_data, input_dtype))
+    interpreter.invoke()
+
+    output_data = interpreter.get_tensor(output_details[0]['index'])
+
+    output_data = output_scale * (output_data.astype(np.float32) - output_zero_point)
+
+    return output_data
+
+
+def load_labels(filename):
+    """Read in labels, one label per line."""
+    f = open(filename, "r")
+    return f.read().splitlines()
+
+
+def main():
+    window_size_samples = int(FLAGS.sample_rate * FLAGS.window_size_ms / 1000)
+    window_stride_samples = int(FLAGS.sample_rate * FLAGS.window_stride_ms / 1000)
+    decoded, sample = load_wav_file(FLAGS.wav, FLAGS.sample_rate)
+    x = calculate_mfcc(decoded, sample, window_size_samples, window_stride_samples, FLAGS.dct_coefficient_count)
+    x = tf.reshape(x, [1, -1])
+    predictions = tflite_inference(x, FLAGS.tflite_path)
+
+    # Sort to show labels in order of confidence
+    top_k = predictions[0].argsort()[-1:][::-1]
+    for node_id in top_k:
+        human_string = load_labels(FLAGS.labels)[int(node_id)]
+        score = predictions[0,node_id]
+        print(f'model predicted: {human_string} with score {score:.5f}')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--wav', type=str, default='', help='Audio file to be identified.')
+    parser.add_argument(
+        '--labels', type=str, default='', help='Path to file containing labels.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs', )
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint', )
+    parser.add_argument(
+        '--tflite_path',
+        type=str,
+        default='',
+        help='Path to TFLite file to use for testing.')
+    FLAGS, unparsed = parser.parse_known_args()
+    main()
diff --git a/models/keyword_spotting/dnn_small/model_package_tf/evaluation.py b/models/keyword_spotting/dnn_small/model_package_tf/evaluation.py
new file mode 100644
index 0000000..9cf3d0c
--- /dev/null
+++ b/models/keyword_spotting/dnn_small/model_package_tf/evaluation.py
@@ -0,0 +1,250 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for testing trained keyword spotting models from checkpoint files and TFLite files."""
+
+import argparse
+
+import numpy as np
+import tensorflow as tf
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+from dnn_s_inference_tflite import tflite_inference
+
+
+def tflite_test(model_settings, audio_processor, tflite_path):
+    """Calculate accuracy and confusion matrices on the validation and test sets.
+
+    A TFLite model is used for doing testing.
+
+    Args:
+        model_settings: Dictionary of common model settings.
+        audio_processor: Audio processor class object.
+        tflite_path: Path to TFLite file to use for inference.
+    """
+    # Evaluate on validation set.
+    print("Running TFLite evaluation on validation set...")
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION).batch(1)
+    expected_indices = np.concatenate([y for x, y in val_data])
+    predicted_indices = []
+
+    for mfcc, label in val_data:
+        prediction = tflite_inference(mfcc, tflite_path)
+        predicted_indices.append(np.squeeze(tf.argmax(prediction, axis=1)))
+
+    val_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+
+    print(confusion_matrix.numpy())
+    print(f'Validation accuracy = {val_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.VALIDATION)})')
+
+    # Evaluate on testing set.
+    print("Running TFLite evaluation on test set...")
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING).batch(1)
+    expected_indices = np.concatenate([y for x, y in test_data])
+    predicted_indices = []
+
+    for mfcc, label in test_data:
+        prediction = tflite_inference(mfcc, tflite_path)
+        predicted_indices.append(np.squeeze(tf.argmax(prediction, axis=1)))
+
+    test_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+
+    print(confusion_matrix.numpy())
+    print(f'Test accuracy = {test_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.TESTING)})')
+
+
+def keras_test(model_settings, audio_processor, model):
+    """Calculate accuracy and confusion matrices on the validation and test sets.
+
+    A loaded keras model is used for doing testing.
+
+    Args:
+        model_settings: Dictionary of common model settings.
+        audio_processor: Audio processor class object.
+        model: Loaded keras model.
+    """
+    # Evaluate on validation set.
+    print("Running TF evaluation on validation set...")
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION).batch(FLAGS.batch_size)
+    expected_indices = np.concatenate([y for x, y in val_data])
+
+    predictions = model.predict(val_data)
+    predicted_indices = tf.argmax(predictions, axis=1)
+
+    val_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+    print(confusion_matrix.numpy())
+    print(f'Validation accuracy = {val_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.VALIDATION)})')
+
+    # Evaluate on testing set.
+    print("Running TF evaluation on test set...")
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING).batch(FLAGS.batch_size)
+    expected_indices = np.concatenate([y for x, y in test_data])
+
+    predictions = model.predict(test_data)
+    predicted_indices = tf.argmax(predictions, axis=1)
+
+    test_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+    print(confusion_matrix.numpy())
+    print(f'Test accuracy = {test_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.TESTING)})')
+
+
+def calculate_accuracy(predicted_indices, expected_indices):
+    """Calculates and returns accuracy.
+
+    Args:
+        predicted_indices: List of predicted integer indices.
+        expected_indices: List of expected integer indices.
+
+    Returns:
+        Accuracy value between 0 and 1.
+    """
+    correct_prediction = tf.equal(predicted_indices, expected_indices)
+    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
+    return accuracy
+
+
+def evaluate():
+    """Calculate accuracy and confusion matrices on validation and test sets.
+
+    Model is created and weights loaded from supplied command line arguments.
+    """
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    if FLAGS.tflite_path:
+        tflite_test(model_settings, audio_processor, FLAGS.tflite_path)
+
+    if FLAGS.checkpoint:
+        model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info, False)
+        model.load_weights(FLAGS.checkpoint).expect_partial()
+        keras_test(model_settings, audio_processor, model)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--batch_size',
+        type=int,
+        default=100,
+        help='How many items to train with at once',)
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--checkpoint',
+        type=str,
+        help='Checkpoint to load the weights from')
+    parser.add_argument(
+        '--tflite_path',
+        type=str,
+        help='Path to TFLite file to use for evaluation')
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+
+    FLAGS, _ = parser.parse_known_args()
+    evaluate()
diff --git a/models/keyword_spotting/dnn_small/model_package_tf/how_to_guidance.ipynb b/models/keyword_spotting/dnn_small/model_package_tf/how_to_guidance.ipynb
new file mode 100644
index 0000000..1332d4e
--- /dev/null
+++ b/models/keyword_spotting/dnn_small/model_package_tf/how_to_guidance.ipynb
@@ -0,0 +1,428 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Copyright (C) 2023 Arm Limited or its affiliates. All rights reserved.\n",
+    "#\n",
+    "# SPDX-License-Identifier: Apache-2.0\n",
+    "#\n",
+    "# Licensed under the Apache License, Version 2.0 (the License); you may\n",
+    "# not use this file except in compliance with the License.\n",
+    "# You may obtain a copy of the License at\n",
+    "#\n",
+    "# www.apache.org/licenses/LICENSE-2.0\n",
+    "#\n",
+    "# Unless required by applicable law or agreed to in writing, software\n",
+    "# distributed under the License is distributed on an AS IS BASIS, WITHOUT\n",
+    "# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+    "# See the License for the specific language governing permissions and\n",
+    "# limitations under the License."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# DNN_Small - Optimised\n",
+    "\n",
+    "Here we reproduce the models with our established codebase and ModelPackage approach for your convenience.\n",
+    "\n",
+    "## Model-Package Overview:\n",
+    "\n",
+    "| Model           \t| DNN_Small                            \t|\n",
+    "|:---------------:\t|:---------------------------------------------------------------:\t|\n",
+    "| <u>**Format**</u>:          \t| Keras, Saved Model, TensorFlow Lite int8, TensorFlow Lite fp32 |\n",
+    "| <u>**Feature**</u>:         \t| Keyword spotting for Arm Cortex-M CPUs |\n",
+    "| <u>**Architectural Delta w.r.t. Vanilla**</u>: | None |\n",
+    "| <u>**Domain**</u>:         \t| Keyword spotting |\n",
+    "| <u>**Package Quality**</u>: \t| Optimised |"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Table of contents <a name=\"index_page\"></a>\n",
+    "\n",
+    "This how-to guidance presents the key steps to reproduce everything in this package. The contents are organised as below. We provided the internal navigation links for users to easy-jump among different sections.  \n",
+    "\n",
+    "    \n",
+    "* [1.0 Model recreation](#model_recreation)\n",
+    "\n",
+    "* [2.0 Training](#training)\n",
+    "\n",
+    "* [3.0 Testing](#testing)\n",
+    "\n",
+    "* [4.0 Optimization](#optimization)\n",
+    "\n",
+    "* [5.0 Quantization and TFLite conversion](#tflite_conversion)\n",
+    "\n",
+    "* [6.0 Inference the TFLite model files](#tflite_inference)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1.0 Model Recreation<a name=\"model_recreation\"></a>\n",
+    "\n",
+    "In order to recreate the model you will first need to be using ```Python3.7``` and install the requirements in ```requirements.txt```.\n",
+    "\n",
+    "Once you have these requirements satisfied you can execute the recreation script contained within this folder, just run:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-01-31 13:25:23.242199: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "Untarring speech_commands_v0.02.tar.gz...\n",
+      "2023-01-31 13:26:16.311986: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1\n",
+      "2023-01-31 13:26:16.348776: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 13:26:16.348818: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 13:26:16.369436: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublas.so.11\n",
+      "2023-01-31 13:26:16.369509: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublasLt.so.11\n",
+      "2023-01-31 13:26:16.372294: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcufft.so.10\n",
+      "2023-01-31 13:26:16.372684: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcurand.so.10\n",
+      "2023-01-31 13:26:16.373267: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusolver.so.11\n",
+      "2023-01-31 13:26:16.374012: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusparse.so.11\n",
+      "2023-01-31 13:26:16.374168: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudnn.so.8\n",
+      "2023-01-31 13:26:16.374680: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 13:26:16.374967: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2023-01-31 13:26:16.375884: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 13:26:16.376614: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 13:26:16.376682: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 13:26:16.822126: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 13:26:16.822161: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 13:26:16.822173: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 13:26:16.822780: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10939 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.\n",
+      "2023-01-31 13:26:17.956358: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.\n",
+      "2023-01-31 13:26:18.216079: I tensorflow/core/grappler/devices.cc:69] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1\n",
+      "2023-01-31 13:26:18.216285: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session\n",
+      "2023-01-31 13:26:18.216661: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 13:26:18.216906: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 13:26:18.216936: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 13:26:18.216946: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 13:26:18.216953: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 13:26:18.217236: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10939 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "2023-01-31 13:26:18.235442: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 3492140000 Hz\n",
+      "2023-01-31 13:26:18.236450: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1144] Optimization results for grappler item: graph_to_optimize\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.011ms.\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.001ms.\n",
+      "\n",
+      "2023-01-31 13:26:18.268723: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:345] Ignored output_format.\n",
+      "2023-01-31 13:26:18.268758: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:348] Ignored drop_control_dependency.\n",
+      "2023-01-31 13:26:18.271003: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:210] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n",
+      "2023-01-31 13:26:18.272912: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 13:26:18.273329: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 13:26:18.273362: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 13:26:18.273373: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 13:26:18.273385: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 13:26:18.273700: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10939 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "Converted model saved to dnn.tflite.\n",
+      "Running TFLite evaluation on validation set...\n",
+      "2023-01-31 13:26:18.314546: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)\n",
+      "[[371   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  0 238  16   9   5  30  11  10  15   7   9  21]\n",
+      " [  0   7 341   8   0   6  26   5   1   0   0   3]\n",
+      " [  0   8   7 316   5  17   0   5   2   1   4  41]\n",
+      " [  0   8   1   2 287   3   5   4   6  19   7   8]\n",
+      " [  0  10   1  22   2 317   2   0   5   2   1  15]\n",
+      " [  0   5  27   2   1   2 299   9   0   3   0   4]\n",
+      " [  1  13   2   2   0   2   3 334   2   2   0   2]\n",
+      " [  2   9   1   1   6   6   2   0 318  13   1   4]\n",
+      " [  1   4   1   0  29   0   1   1  17 311   4   4]\n",
+      " [  2   2   0   1  15   5   0   1   4   5 310   5]\n",
+      " [  0  10   1  38   8  26   2   1   3   1   1 281]]\n",
+      "Validation accuracy = 83.76%(N=4445)\n",
+      "Running TFLite evaluation on test set...\n",
+      "[[408   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  0 274  12  20   8  11  16  13  20   7   8  19]\n",
+      " [  1   9 375   3   0   6  20   0   0   0   1   4]\n",
+      " [  0  14   5 312   1  33   6   0   0   0   3  31]\n",
+      " [  0  12   0   3 362   5   3   5   8  11  13   3]\n",
+      " [  0  10   2  34   2 332   5   0   5   0   3  13]\n",
+      " [  0  12  27   5   4   1 339  17   1   2   2   2]\n",
+      " [  0  12   0   2   4   1   9 362   1   3   0   2]\n",
+      " [  1  12   0   3   3  14   1   1 336  20   1   4]\n",
+      " [  1   6   3   2  16   0   3   1  19 338   2  11]\n",
+      " [  0   5   1   2  22   4   3   0   0   2 367   5]\n",
+      " [  0  17   0  65   6  17   3   2   2   5   2 283]]\n",
+      "Test accuracy = 83.60%(N=4890)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-01-31 13:26:30.279559: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "Untarring speech_commands_v0.02.tar.gz...\n",
+      "2023-01-31 13:27:20.964068: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1\n",
+      "2023-01-31 13:27:21.007726: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 13:27:21.007765: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 13:27:21.028042: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublas.so.11\n",
+      "2023-01-31 13:27:21.028131: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublasLt.so.11\n",
+      "2023-01-31 13:27:21.030956: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcufft.so.10\n",
+      "2023-01-31 13:27:21.031218: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcurand.so.10\n",
+      "2023-01-31 13:27:21.031788: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusolver.so.11\n",
+      "2023-01-31 13:27:21.032512: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusparse.so.11\n",
+      "2023-01-31 13:27:21.032668: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudnn.so.8\n",
+      "2023-01-31 13:27:21.033033: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 13:27:21.033325: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2023-01-31 13:27:21.034039: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 13:27:21.034415: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 13:27:21.034486: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 13:27:21.478837: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 13:27:21.478873: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 13:27:21.478882: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 13:27:21.479411: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10939 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.\n",
+      "2023-01-31 13:27:22.568489: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.\n",
+      "2023-01-31 13:27:22.830822: I tensorflow/core/grappler/devices.cc:69] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1\n",
+      "2023-01-31 13:27:22.831041: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session\n",
+      "2023-01-31 13:27:22.831444: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 13:27:22.831775: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 13:27:22.831807: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 13:27:22.831816: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 13:27:22.831823: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 13:27:22.832109: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10939 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "2023-01-31 13:27:22.851539: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 3492140000 Hz\n",
+      "2023-01-31 13:27:22.852738: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1144] Optimization results for grappler item: graph_to_optimize\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.013ms.\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.001ms.\n",
+      "\n",
+      "2023-01-31 13:27:22.888443: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:345] Ignored output_format.\n",
+      "2023-01-31 13:27:22.888491: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:348] Ignored drop_control_dependency.\n",
+      "2023-01-31 13:27:22.891172: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:210] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n",
+      "2023-01-31 13:27:22.893139: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 13:27:22.893390: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 13:27:22.893420: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 13:27:22.893430: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 13:27:22.893437: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 13:27:22.893709: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10939 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "2023-01-31 13:27:22.923079: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)\n",
+      "fully_quantize: 0, inference_type: 6, input_inference_type: 9, output_inference_type: 9\n",
+      "Quantized model saved to dnn_quantized.tflite.\n",
+      "Running TFLite evaluation on validation set...\n",
+      "[[371   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  0 241  17  14   5  27  12   9  17   6   7  16]\n",
+      " [  0  11 340  11   4   5  21   4   1   0   0   0]\n",
+      " [  0  15   7 315  10  14   1   2   2   1   6  33]\n",
+      " [  0  10   1   6 282   4   6   3   5  19  10   4]\n",
+      " [  0  17   2  26   8 300   1   0   6   0   4  13]\n",
+      " [  0   8  30   3   6   1 293   7   0   1   2   1]\n",
+      " [  0  17   2   4   6   1   9 316   1   2   4   1]\n",
+      " [  2   9   1   1  10   4   2   2 317  11   0   4]\n",
+      " [  1   8   1   2  33   0   0   2  15 303   6   2]\n",
+      " [  2   6   0   2  25   5   0   0   2   1 304   3]\n",
+      " [  0  16   1  47  15  27   2   1   3   1   4 255]]\n",
+      "Validation accuracy = 81.82%(N=4445)\n",
+      "Running TFLite evaluation on test set...\n",
+      "[[408   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  0 281  13  23  11  10  14  14  21   6   6   9]\n",
+      " [  0  12 372   6   6   6  13   0   0   0   1   3]\n",
+      " [  0  19   8 311   6  28   4   0   0   0   4  25]\n",
+      " [  0  20   2   6 359   6   1   2   6   7  14   2]\n",
+      " [  0  15   5  36  10 318   3   0   4   2   2  11]\n",
+      " [  0  12  33   6  13   3 320  19   0   2   2   2]\n",
+      " [  1  17   0   4   5   1  11 347   1   1   7   1]\n",
+      " [  0  16   0   6   8  16   1   1 326  18   3   1]\n",
+      " [  1   6   3   4  37   1   3   2  19 314   3   9]\n",
+      " [  0  10   0   6  28   3   4   0   0   1 354   5]\n",
+      " [  0  19   0  73  18  19   3   2   3   4   2 259]]\n",
+      "Test accuracy = 81.17%(N=4890)\n"
+     ]
+    }
+   ],
+   "source": [
+    "!bash ./recreate_model.sh"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Running this script will use the pre-trained checkpoint files supplied in the ```./model_archive/model_source/weights``` folder to generate the TFLite files and perform evaluation on the test set. Both an fp32 version and a quantized version will be produced. The quantized version will use post-training quantization to fully quantize it.\n",
+    "\n",
+    "If you want to run training from scratch you can do this by supplying ```--train``` when running the script. For example:\n",
+    "\n",
+    "```bash\n",
+    "bash ./recreate_model.sh --train\n",
+    "```\n",
+    "\n",
+    "Training is then performed and should produce a model to the stated accuracy in this repository. Note that exporting to TFLite will still happen with the baseline pre-trained checkpoint files, so you will need to re-run the script and this time supply the path to the new checkpoint files you want to use, for example:\n",
+    "\n",
+    "```bash\n",
+    "bash ./recreate_model.sh --ckpt <checkpoint_path>\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2.0 Training<a name=\"training\"></a>\n",
+    "\n",
+    "The training scripts can be used to recreate any of the models from the [Hello Edge paper](https://arxiv.org/pdf/1711.07128.pdf) provided the right hyperparameters are used. The training commands with all the hyperparameters to reproduce the model in this repository are given [here](recreate_model.sh). The model in this part of the repository represents just one variation of the models from the paper, other varieties are covered in other parts of the repository.\n",
+    "\n",
+    "\n",
+    "As a general example of how to train a DNN with 3 fully-connected layers with 128 neurons in each layer, run:\n",
+    "```\n",
+    "python train.py --model_architecture dnn --model_size_info 128 128 128\n",
+    "```\n",
+    "\n",
+    "The command line argument *--model_size_info* is used to pass the neural network layer\n",
+    "dimensions such as number of layers, convolution filter size/stride as a list to models.py,\n",
+    "which builds the TensorFlow graph based on the provided model architecture\n",
+    "and layer dimensions. For more info on *model_size_info* for each network architecture see\n",
+    "[models.py](model_core_utils/models.py).\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3.0 Testing<a name=\"testing\"></a>\n",
+    "To run inference on the trained model from a checkpoint and get accuracy on validation and test sets, run:\n",
+    "```\n",
+    "python evaluation.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>\n",
+    "```\n",
+    "**The model and feature extraction parameters passed to this script should match those used in the Training step.**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4.0 Optimization<a name=\"optimization\"></a>\n",
+    "\n",
+    "We introduce an *optional* step to optimize the trained keyword spotting model for deployment.\n",
+    "\n",
+    "Here we use TensorFlow's [weight clustering API](https://www.tensorflow.org/model_optimization/guide/clustering) to reduce the compressed model size and optimize inference on supported hardware. 32 weight clusters and kmeans++ cluster intialization method are used as the clustering hyperparameters.\n",
+    "\n",
+    "To optimize your trained model (e.g. a DNN), a trained model checkpoint is needed to run clustering and fine-tuning on.\n",
+    "You can use the pre-trained checkpoints provided, or train your own model and use the resulting checkpoint.\n",
+    "\n",
+    "To apply the optimization and fine-tuning, run the following command:\n",
+    "```\n",
+    "python optimisations.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>\n",
+    "```\n",
+    "**The model and feature extraction parameters used here should match those used in the Training step, except for the number of training steps.\n",
+    "The number of training steps is reduced since the optimization step only requires fine-tuning.**\n",
+    "\n",
+    "This will generate a clustered model checkpoint that can be used in the quantization step to generate a quantized and clustered TFLite model."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5.0 Quantization and TFLite Conversion<a name=\"tflite_conversion\"></a>\n",
+    "\n",
+    "You can now use TensorFlow's\n",
+    "[post training quantization](https://www.tensorflow.org/lite/performance/post_training_quantization) to\n",
+    "make quantization of the trained models super simple.\n",
+    "\n",
+    "To quantize your trained model (e.g. a DNN) run:\n",
+    "```\n",
+    "python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> [--inference_type int8|int16]\n",
+    "```\n",
+    "**The model and feature extraction parameters used here should match those used in the Training step.**\n",
+    "\n",
+    "The ```inference_type``` parameter is *optional* and to be used if a fully quantized model with inputs and outputs of type int8 or int16 is needed. It defaults to fp32.\n",
+    "\n",
+    "In this example, this step will produce a quantized TFLite file *dnn_quantized.tflite*."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can test the accuracy of this quantized model on the test set by running:\n",
+    "```\n",
+    "python evaluation.py --tflite_path dnn_quantized.tflite\n",
+    "```\n",
+    "**The model and feature extraction parameters used here should match those used in the Training step.**\n",
+    "\n",
+    "`convert_to_tflite.py` uses post-training quantization to generate a quantized model by default. If you wish to convert to a floating point TFLite model, use the command below:\n",
+    "\n",
+    "```\n",
+    "python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> --no-quantize\n",
+    "```\n",
+    "\n",
+    "This will produce a floating point TFLite file *dnn.tflite*. You can test the accuracy of this floating point model using `evaluation.py` as above.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 6.0 Single inference of the TFLite model files <a name=\"tflite_inference\"></a>\n",
+    "\n",
+    "You can conduct TFLite inference for .fp32 and .int8 model files by using the following command: \n",
+    "\n",
+    "```python dnn_s_inference_tflite.py --labels validation_utils/labels.txt --wav <path_to_wav_file> --tflite_path <path_to_tflite_file>```\n",
+    "\n",
+    "**The feature extraction parameters used here should match those used in the Training step.**\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/README.md b/models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/README.md
new file mode 100644
index 0000000..78f4f45
--- /dev/null
+++ b/models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/README.md
@@ -0,0 +1,62 @@
+# keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_fp32
+
+## Description
+This is a floating point fp32 version of the DNN Small model developed by Arm, from the Hello Edge paper.
+
+## License
+[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
+
+## Network Information
+| Network Information | Value |
+|---------------------|-------|
+|  Framework          | TensorFlow Lite |
+|  Datatype           | fp32 |
+|  SHA-1 Hash         | 7491539a547ee30b87c266e6bbb4455e0c8f556d |
+|  Size (Bytes)       | 320648 |
+|  Provenance         | https://arxiv.org/abs/1711.07128 |
+|  Training           | Trained by Arm |
+|  Paper | https://arxiv.org/abs/1711.07128 |
+
+## DataSet
+| Dataset Information | Value |
+|--------|-------|
+| Name | Google Speech Commands test set |
+
+## Accuracy
+
+| Metric | Value |
+|--------|-------|
+| accuracy | 83.60% |
+
+## HW Support
+| HW Support   | Value |
+|--------------|-------|
+| Cortex-A |:heavy_check_mark:         |
+| Cortex-M |:heavy_check_mark:         |
+| Mali GPU |:heavy_check_mark:         |
+| Ethos U  |:heavy_multiplication_x:          |
+
+### Key
+* :heavy_check_mark: - Will run on this platform.
+* :heavy_multiplication_x: - Will not run on this platform.
+
+## Network Quality
+| Network Quality         | Value |
+|-------------------------|-------|
+|  Recreate               | :heavy_check_mark:    |
+|  Quality level          | Deployable    |
+|  Vanilla                | :heavy_check_mark:    |
+|  Clustered              | :heavy_multiplication_x:    |
+|  Pruned                 | :heavy_multiplication_x:    |
+|  Quantization - default | :heavy_multiplication_x:    |
+|  Quantization - full    | :heavy_multiplication_x:    |
+
+## Network Inputs
+| Input Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| input | (1, 250) | fp32 | models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input | fp32 | [1, 250] | The input is a processed MFCCs |
+
+## Network Outputs
+| Output Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| Identity | (1, 12) | fp32 | models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity | fp32 | [1, 12] | The probability on 12 keywords |
\ No newline at end of file
diff --git a/models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/definition.yaml b/models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/definition.yaml
new file mode 100644
index 0000000..0458507
--- /dev/null
+++ b/models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/definition.yaml
@@ -0,0 +1,62 @@
+benchmark:
+  benchmark_metrics:
+    accuracy: 83.60%
+  benchmark_name: Google Speech Commands test set
+description: This is a floating point fp32 version of the DNN Small model developed
+  by Arm, from the Hello Edge paper.
+license:
+- Apache-2.0
+network:
+  datatype: fp32
+  file_size_bytes: 320648
+  filename: dnn_s.tflite
+  framework: TensorFlow Lite
+  hash:
+    algorithm: sha1
+    value: 7491539a547ee30b87c266e6bbb4455e0c8f556d
+  provenance: https://arxiv.org/abs/1711.07128
+  training: Trained by Arm
+network_parameters:
+  input_nodes:
+  - description: The input is a processed MFCCs of shape (1, 490)
+    example_input:
+      path: models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input
+      shape:
+      - 1
+      - 250
+      type: fp32
+      use_case: Random input for model regression.
+    input_datatype: fp32
+    name: input
+    shape:
+    - 1
+    - 250
+  output_nodes:
+  - description: The probability on 12 keywords.
+    example_output:
+      path: models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity
+      shape:
+      - 1
+      - 12
+      type: fp32
+      use_case: output for model regression.
+    name: Identity
+    output_datatype: fp32
+    shape:
+    - 1
+    - 12
+network_quality:
+  clustered: false
+  is_vanilla: true
+  pruned: false
+  quality_level: Deployable
+  quality_level_hero_hw: cortex_m
+  quantization_default: false
+  quantization_full: false
+  recreate: true
+operators:
+  TensorFlow Lite:
+  - FULLY_CONNECTED
+  - RELU
+  - SOFTMAX
+paper: https://arxiv.org/abs/1711.07128
\ No newline at end of file
diff --git a/models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/dnn_s.tflite b/models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/dnn_s.tflite
new file mode 100644
index 0000000..84cf83d
--- /dev/null
+++ b/models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/dnn_s.tflite
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7beaf5a4b740228324fc48db72eb2dab16854278676cb3f67268fee5910ab5f8
+size 320648
diff --git a/models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input/0.npy b/models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input/0.npy
new file mode 100644
index 0000000..fd525dc
--- /dev/null
+++ b/models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input/0.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f9883bea3889da8d87477965f034c7f8a453636a4ed5897c34c0798a41924f8
+size 1128
diff --git a/models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity/0.npy b/models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity/0.npy
new file mode 100644
index 0000000..3d71018
--- /dev/null
+++ b/models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity/0.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b74580f29a9cea2e7f1f179e930c05d4d2ac884c70b535d7c5f988bc38c47258
+size 176
diff --git a/models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_int8/README.md b/models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_int8/README.md
new file mode 100644
index 0000000..91932d2
--- /dev/null
+++ b/models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_int8/README.md
@@ -0,0 +1,62 @@
+# keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_int8
+
+## Description
+This is a fully quantized int8 version of the DNN Small model developed by Arm, from the Hello Edge paper.
+
+## License
+[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
+
+## Network Information
+| Network Information | Value |
+|---------------------|-------|
+|  Framework          | TensorFlow Lite |
+|  Datatype           | int8 |
+|  SHA-1 Hash         | 4b92e09fb43b2f042ce2811b91c7c67bf7186b6b |
+|  Size (Bytes)       | 83544 |
+|  Provenance         | https://arxiv.org/abs/1711.07128 |
+|  Training           | Trained by Arm |
+|  Paper | https://arxiv.org/abs/1711.07128 |
+
+## DataSet
+| Dataset Information | Value |
+|--------|-------|
+| Name | Google Speech Commands test set |
+
+## Accuracy
+
+| Metric | Value |
+|--------|-------|
+| Accuracy | 82.11% |
+
+## HW Support
+| HW Support   | Value |
+|--------------|-------|
+| Cortex-A |:heavy_check_mark:          |
+| Cortex-M |:heavy_check_mark:          |
+| Mali GPU |:heavy_check_mark:          |
+| Ethos U  |:heavy_check_mark:          |
+
+### Key
+* :heavy_check_mark: - Will run on this platform.
+* :heavy_multiplication_x: - Will not run on this platform.
+
+## Network Quality
+| Network Quality         | Value |
+|-------------------------|-------|
+|  Recreate               | :heavy_check_mark:    |
+|  Quality level          | Deployable    |
+|  Vanilla                | :heavy_check_mark:    |
+|  Clustered              | :heavy_multiplication_x:    |
+|  Pruned                 | :heavy_multiplication_x:    |
+|  Quantization - default | :heavy_multiplication_x:    |
+|  Quantization - full    | :heavy_check_mark:    |
+
+## Network Inputs
+| Input Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| input | (1, 250) | int8 | models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input | int8 | [1, 250] | The input is a processed MFCCs |
+
+## Network Outputs
+| Output Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| Identity | (1, 12) | int8 | models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity | int8 | [1, 12] | The probability on 12 keywords |
\ No newline at end of file
diff --git a/models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_int8/definition.yaml b/models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_int8/definition.yaml
new file mode 100644
index 0000000..d653ebc
--- /dev/null
+++ b/models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_int8/definition.yaml
@@ -0,0 +1,62 @@
+benchmark:
+  benchmark_metrics:
+    Accuracy: 82.11%
+  benchmark_name: Google Speech Commands test set
+description: This is a fully quantized int8 version of the DNN Small model developed
+  by Arm, from the Hello Edge paper.
+license:
+- Apache-2.0
+network:
+  datatype: int8
+  file_size_bytes: 83544
+  filename: dnn_s_quantized.tflite
+  framework: TensorFlow Lite
+  hash:
+    algorithm: sha1
+    value: 4b92e09fb43b2f042ce2811b91c7c67bf7186b6b
+  provenance: https://arxiv.org/abs/1711.07128
+  training: Trained by Arm
+network_parameters:
+  input_nodes:
+  - description: The input is a processed MFCCs of shape (1, 250)
+    example_input:
+      path: models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input
+      shape:
+      - 1
+      - 250
+      type: int8
+      use_case: Random input for model regression.
+    input_datatype: int8
+    name: input
+    shape:
+    - 1
+    - 250
+  output_nodes:
+  - description: The probability on 12 keywords.
+    example_output:
+      path: models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity
+      shape:
+      - 1
+      - 12
+      type: int8
+      use_case: output for model regression.
+    name: Identity
+    output_datatype: int8
+    shape:
+    - 1
+    - 12
+network_quality:
+  clustered: false
+  is_vanilla: true
+  pruned: false
+  quality_level: Deployable
+  quality_level_hero_hw: cortex_m
+  quantization_default: false
+  quantization_full: true
+  recreate: true
+operators:
+  TensorFlow Lite:
+  - FULLY_CONNECTED
+  - RELU
+  - SOFTMAX
+paper: https://arxiv.org/abs/1711.07128
\ No newline at end of file
diff --git a/models/keyword_spotting/dnn_small/tflite_int8/dnn_s_quantized.tflite b/models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_int8/dnn_s_quantized.tflite
similarity index 100%
rename from models/keyword_spotting/dnn_small/tflite_int8/dnn_s_quantized.tflite
rename to models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_int8/dnn_s_quantized.tflite
diff --git a/models/keyword_spotting/dnn_small/tflite_int8/testing_input/input/0.npy b/models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input/0.npy
similarity index 100%
rename from models/keyword_spotting/dnn_small/tflite_int8/testing_input/input/0.npy
rename to models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input/0.npy
diff --git a/models/keyword_spotting/dnn_small/tflite_int8/testing_output/Identity/0.npy b/models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity/0.npy
similarity index 100%
rename from models/keyword_spotting/dnn_small/tflite_int8/testing_output/Identity/0.npy
rename to models/keyword_spotting/dnn_small/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity/0.npy
diff --git a/models/keyword_spotting/dnn_small/model_package_tf/model_archive/model_source/saved_model/dnn_small/keras_metadata.pb b/models/keyword_spotting/dnn_small/model_package_tf/model_archive/model_source/saved_model/dnn_small/keras_metadata.pb
new file mode 100644
index 0000000..4f01a9c
--- /dev/null
+++ b/models/keyword_spotting/dnn_small/model_package_tf/model_archive/model_source/saved_model/dnn_small/keras_metadata.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7035d087e4fea7940fc83080a1b64f4d8cdec6d8344aadb5876ff41994807bbf
+size 10087
diff --git a/models/keyword_spotting/dnn_small/model_package_tf/model_archive/model_source/saved_model/dnn_small/saved_model.pb b/models/keyword_spotting/dnn_small/model_package_tf/model_archive/model_source/saved_model/dnn_small/saved_model.pb
new file mode 100644
index 0000000..152a69e
--- /dev/null
+++ b/models/keyword_spotting/dnn_small/model_package_tf/model_archive/model_source/saved_model/dnn_small/saved_model.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c53338f2dc3fb47b591a96d93710047cc31fe9aa697bbf51283ce3b7d3557fe
+size 84664
diff --git a/models/keyword_spotting/dnn_small/model_package_tf/model_archive/model_source/saved_model/dnn_small/variables/variables.data-00000-of-00001 b/models/keyword_spotting/dnn_small/model_package_tf/model_archive/model_source/saved_model/dnn_small/variables/variables.data-00000-of-00001
new file mode 100644
index 0000000..d945297
--- /dev/null
+++ b/models/keyword_spotting/dnn_small/model_package_tf/model_archive/model_source/saved_model/dnn_small/variables/variables.data-00000-of-00001
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fd85a15e363ad2aeb3bf02308e5f89137221c1c6c658e71ccba21aefbba99d63
+size 321215
diff --git a/models/keyword_spotting/dnn_small/model_package_tf/model_archive/model_source/saved_model/dnn_small/variables/variables.index b/models/keyword_spotting/dnn_small/model_package_tf/model_archive/model_source/saved_model/dnn_small/variables/variables.index
new file mode 100644
index 0000000..35dd996
--- /dev/null
+++ b/models/keyword_spotting/dnn_small/model_package_tf/model_archive/model_source/saved_model/dnn_small/variables/variables.index
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc2c60477817e8647d6ebbe1409b40435de6bcaef280b0a41cf5713d3ec95393
+size 641
diff --git a/models/keyword_spotting/dnn_small/tflite_int8/ckpt/checkpoint b/models/keyword_spotting/dnn_small/model_package_tf/model_archive/model_source/weights/checkpoint
similarity index 100%
rename from models/keyword_spotting/dnn_small/tflite_int8/ckpt/checkpoint
rename to models/keyword_spotting/dnn_small/model_package_tf/model_archive/model_source/weights/checkpoint
diff --git a/models/keyword_spotting/dnn_small/tflite_int8/ckpt/dnn_0.84_ckpt.data-00000-of-00001 b/models/keyword_spotting/dnn_small/model_package_tf/model_archive/model_source/weights/dnn_0.84_ckpt.data-00000-of-00001
similarity index 100%
rename from models/keyword_spotting/dnn_small/tflite_int8/ckpt/dnn_0.84_ckpt.data-00000-of-00001
rename to models/keyword_spotting/dnn_small/model_package_tf/model_archive/model_source/weights/dnn_0.84_ckpt.data-00000-of-00001
diff --git a/models/keyword_spotting/dnn_small/tflite_int8/ckpt/dnn_0.84_ckpt.index b/models/keyword_spotting/dnn_small/model_package_tf/model_archive/model_source/weights/dnn_0.84_ckpt.index
similarity index 100%
rename from models/keyword_spotting/dnn_small/tflite_int8/ckpt/dnn_0.84_ckpt.index
rename to models/keyword_spotting/dnn_small/model_package_tf/model_archive/model_source/weights/dnn_0.84_ckpt.index
diff --git a/models/keyword_spotting/dnn_small/model_package_tf/model_core_utils/__init__.py b/models/keyword_spotting/dnn_small/model_package_tf/model_core_utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/models/keyword_spotting/dnn_small/model_package_tf/model_core_utils/models.py b/models/keyword_spotting/dnn_small/model_package_tf/model_core_utils/models.py
new file mode 100644
index 0000000..1978136
--- /dev/null
+++ b/models/keyword_spotting/dnn_small/model_package_tf/model_core_utils/models.py
@@ -0,0 +1,327 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Model definitions for simple keyword spotting."""
+
+import math
+
+import tensorflow as tf
+
+
+def prepare_model_settings(label_count, sample_rate, clip_duration_ms,
+                           window_size_ms, window_stride_ms,
+                           dct_coefficient_count):
+    """Calculates common settings needed for all models.
+
+    Args:
+        label_count: How many classes are to be recognized.
+        sample_rate: Number of audio samples per second.
+        clip_duration_ms: Length of each audio clip to be analyzed.
+        window_size_ms: Duration of frequency analysis window.
+        window_stride_ms: How far to move in time between frequency windows.
+        dct_coefficient_count: Number of frequency bins to use for analysis.
+
+    Returns:
+        Dictionary containing common settings.
+    """
+    desired_samples = int(sample_rate * clip_duration_ms / 1000)
+    window_size_samples = int(sample_rate * window_size_ms / 1000)
+    window_stride_samples = int(sample_rate * window_stride_ms / 1000)
+    length_minus_window = (desired_samples - window_size_samples)
+    if length_minus_window < 0:
+        spectrogram_length = 0
+    else:
+        spectrogram_length = 1 + int(length_minus_window / window_stride_samples)
+    fingerprint_size = dct_coefficient_count * spectrogram_length
+
+    return {
+        'desired_samples': desired_samples,
+        'window_size_samples': window_size_samples,
+        'window_stride_samples': window_stride_samples,
+        'spectrogram_length': spectrogram_length,
+        'dct_coefficient_count': dct_coefficient_count,
+        'fingerprint_size': fingerprint_size,
+        'label_count': label_count,
+        'sample_rate': sample_rate,
+    }
+
+
+def create_model(model_settings, model_architecture, model_size_info, is_training):
+    """Builds a tf.keras model of the requested architecture compatible with the settings.
+
+    Args:
+        model_settings: Dictionary of information about the model.
+        model_architecture: String specifying which kind of model to create.
+        model_size_info: Array with specific information for the chosen architecture
+            (e.g convolutional parameters, number of layers).
+
+    Returns:
+        A tf.keras Model with the requested architecture.
+
+    Raises:
+        Exception: If the architecture type isn't recognized.
+    """
+
+    if model_architecture == 'dnn':
+        return create_dnn_model(model_settings, model_size_info)
+
+    elif model_architecture == 'cnn':
+        return create_cnn_model(model_settings, model_size_info)
+
+    elif model_architecture == 'ds_cnn':
+        return create_ds_cnn_model(model_settings, model_size_info)
+    elif model_architecture == 'single_fc':
+        return create_single_fc_model(model_settings)
+    elif model_architecture == 'basic_lstm':
+        return create_basic_lstm_model(model_settings, model_size_info, is_training)
+    else:
+        raise Exception(f'model_architecture argument {model_architecture} not recognized'
+                        f', should be one of, "dnn", "cnn", "ds_cnn" ')
+
+
+def create_single_fc_model(model_settings):
+    """Builds a model with a single fully-connected layer.
+
+    For details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+
+    Returns:
+        tf.keras Model of the 'SINGLE_FC' architecture.
+    """
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size'],), name='input')
+    # Fully connected layer
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(inputs)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_basic_lstm_model(model_settings, model_size_info, is_training):
+    """Builds a model with a basic lstm layer.
+
+        For details see https://arxiv.org/abs/1711.07128.
+
+        Args:
+            model_settings: Dict of different settings for model training.
+            model_size_info: Length of the array defines the number of hidden-layers and
+                each element in the array represent the number of neurons in that layer.
+            is_training: Determining whether the use of the model is for training or for something else.
+
+        Returns:
+            tf.keras Model of the 'Basic_LSTM' architecture.
+        """
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size'], ), name='input')
+
+    input_frequency_size = model_settings['dct_coefficient_count']
+    input_time_size = model_settings['spectrogram_length']
+    x = tf.reshape(inputs, shape=(-1, input_time_size, input_frequency_size))
+
+    # LSTM layer, and unrolling depending on whether you are training or not
+    if is_training:
+        x = tf.keras.layers.LSTM(units=model_size_info[0], time_major=False, unroll=False)(x)
+    else:
+        x = tf.keras.layers.LSTM(units=model_size_info[0], time_major=False, unroll=True)(x)
+
+    # Outputs a fully connected layer
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_dnn_model(model_settings, model_size_info):
+    """Builds a model with multiple hidden fully-connected layers.
+
+    For details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+        model_size_info: Length of the array defines the number of hidden-layers and
+            each element in the array represent the number of neurons in that layer.
+
+    Returns:
+        tf.keras Model of the 'DNN' architecture.
+    """
+
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size'], ), name='input')
+
+    # First fully connected layer.
+    x = tf.keras.layers.Dense(units=model_size_info[0], activation='relu')(inputs)
+
+    # Hidden layers with ReLU activations.
+    for i in range(1, len(model_size_info)):
+        x = tf.keras.layers.Dense(units=model_size_info[i], activation='relu')(x)
+
+    # Output fully connected layer.
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_cnn_model(model_settings, model_size_info):
+    """Builds a model with 2 convolution layers followed by a linear layer and a hidden fully-connected layer.
+
+    For details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+        model_size_info: Defines the first and second convolution parameters in
+            {number of conv features, conv filter height, width, stride in y,x dir.},
+            followed by linear layer size and fully-connected layer size.
+
+    Returns:
+        tf.keras Model of the 'CNN' architecture.
+    """
+
+    input_frequency_size = model_settings['dct_coefficient_count']
+    input_time_size = model_settings['spectrogram_length']
+
+    first_filter_count = model_size_info[0]
+    first_filter_height = model_size_info[1]  # Time axis.
+    first_filter_width = model_size_info[2]  # Frequency axis.
+    first_filter_stride_y = model_size_info[3]  # Time axis.
+    first_filter_stride_x = model_size_info[4]  # Frequency_axis.
+
+    second_filter_count = model_size_info[5]
+    second_filter_height = model_size_info[6]  # Time axis.
+    second_filter_width = model_size_info[7]  # Frequency axis.
+    second_filter_stride_y = model_size_info[8]  # Time axis.
+    second_filter_stride_x = model_size_info[9]  # Frequency axis.
+
+    linear_layer_size = model_size_info[10]
+    fc_size = model_size_info[11]
+
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size']), name='input')
+
+    # Reshape the flattened input.
+    x = tf.reshape(inputs, shape=(-1, input_time_size, input_frequency_size, 1))
+
+    # First convolution.
+    x = tf.keras.layers.Conv2D(filters=first_filter_count,
+                               kernel_size=(first_filter_height, first_filter_width),
+                               strides=(first_filter_stride_y, first_filter_stride_x),
+                               padding='VALID')(x)
+    x = tf.keras.layers.BatchNormalization()(x)
+    x = tf.keras.layers.ReLU()(x)
+    x = tf.keras.layers.Dropout(rate=0)(x)
+
+    # Second convolution.
+    x = tf.keras.layers.Conv2D(filters=second_filter_count,
+                               kernel_size=(second_filter_height, second_filter_width),
+                               strides=(second_filter_stride_y, second_filter_stride_x),
+                               padding='VALID')(x)
+    x = tf.keras.layers.BatchNormalization()(x)
+    x = tf.keras.layers.ReLU()(x)
+    x = tf.keras.layers.Dropout(rate=0)(x)
+
+    # Flatten for fully connected layers.
+    x = tf.keras.layers.Flatten()(x)
+
+    # Fully connected layer with no activation.
+    x = tf.keras.layers.Dense(units=linear_layer_size)(x)
+
+    # Fully connected layer with ReLU activation.
+    x = tf.keras.layers.Dense(units=fc_size)(x)
+    x = tf.keras.layers.BatchNormalization()(x)
+    x = tf.keras.layers.ReLU()(x)
+    x = tf.keras.layers.Dropout(rate=0)(x)
+
+    # Output fully connected.
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_ds_cnn_model(model_settings, model_size_info):
+    """Builds a model with convolutional & depthwise separable convolutional layers.
+
+    For more details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+        model_size_info: Defines number of layers, followed by the DS-Conv layer
+            parameters in the order {number of conv features, conv filter height,
+            width and stride in y,x dir.} for each of the layers.
+
+    Returns:
+        tf.keras Model of the 'DS-CNN' architecture.
+    """
+
+    label_count = model_settings['label_count']
+    input_frequency_size = model_settings['dct_coefficient_count']
+    input_time_size = model_settings['spectrogram_length']
+
+    t_dim = input_time_size
+    f_dim = input_frequency_size
+
+    # Extract model dimensions from model_size_info.
+    num_layers = model_size_info[0]
+    conv_feat = [None]*num_layers
+    conv_kt = [None]*num_layers
+    conv_kf = [None]*num_layers
+    conv_st = [None]*num_layers
+    conv_sf = [None]*num_layers
+
+    i = 1
+    for layer_no in range(0, num_layers):
+        conv_feat[layer_no] = model_size_info[i]
+        i += 1
+        conv_kt[layer_no] = model_size_info[i]
+        i += 1
+        conv_kf[layer_no] = model_size_info[i]
+        i += 1
+        conv_st[layer_no] = model_size_info[i]
+        i += 1
+        conv_sf[layer_no] = model_size_info[i]
+        i += 1
+
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size']), name='input')
+
+    # Reshape the flattened input.
+    x = tf.reshape(inputs, shape=(-1, input_time_size, input_frequency_size, 1))
+
+    # Depthwise separable convolutions.
+    for layer_no in range(0, num_layers):
+        if layer_no == 0:
+            # First convolution.
+            x = tf.keras.layers.Conv2D(filters=conv_feat[0],
+                                       kernel_size=(conv_kt[0], conv_kf[0]),
+                                       strides=(conv_st[0], conv_sf[0]),
+                                       padding='SAME')(x)
+            x = tf.keras.layers.BatchNormalization()(x)
+            x = tf.keras.layers.ReLU()(x)
+        else:
+            # Depthwise convolution.
+            x = tf.keras.layers.DepthwiseConv2D(kernel_size=(conv_kt[layer_no], conv_kf[layer_no]),
+                                                strides=(conv_sf[layer_no], conv_st[layer_no]),
+                                                padding='SAME')(x)
+            x = tf.keras.layers.BatchNormalization()(x)
+            x = tf.keras.layers.ReLU()(x)
+
+            # Pointwise convolution.
+            x = tf.keras.layers.Conv2D(filters=conv_feat[layer_no], kernel_size=(1, 1))(x)
+            x = tf.keras.layers.BatchNormalization()(x)
+            x = tf.keras.layers.ReLU()(x)
+
+        t_dim = math.ceil(t_dim/float(conv_st[layer_no]))
+        f_dim = math.ceil(f_dim/float(conv_sf[layer_no]))
+
+    # Global average pool.
+    x = tf.keras.layers.AveragePooling2D(pool_size=(t_dim, f_dim), strides=1)(x)
+
+    # Squeeze before passing to output fully connected layer.
+    x = tf.reshape(x, shape=(-1, conv_feat[layer_no]))
+
+    # Output connected layer.
+    output = tf.keras.layers.Dense(units=label_count, activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
diff --git a/models/keyword_spotting/dnn_small/model_package_tf/optimisations.py b/models/keyword_spotting/dnn_small/model_package_tf/optimisations.py
new file mode 100644
index 0000000..16b6f4c
--- /dev/null
+++ b/models/keyword_spotting/dnn_small/model_package_tf/optimisations.py
@@ -0,0 +1,259 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for optimizing simple keyword spotting models using clustering API."""
+
+import argparse
+from pathlib import Path
+
+import tensorflow as tf
+import numpy as np
+import tensorflow_model_optimization as tfmot
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+
+
+def print_model_weight_clusters(model):
+
+    for layer in model.layers:
+        if isinstance(layer, tf.keras.layers.Wrapper):
+            weights = layer.trainable_weights
+        else:
+            weights = layer.weights
+        for weight in weights:
+            if "kernel" in weight.name:
+                unique_count = len(np.unique(weight))
+                print(
+                    f"{layer.name}/{weight.name}: {unique_count} clusters "
+                )
+
+
+def optimize():
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    # Create the model to optimize from checkpoint.
+    model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info)
+    model.load_weights(FLAGS.checkpoint).expect_partial()
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    # We decay learning rate in a constant piecewise way to help learning.
+    training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(',')))
+    learning_rates_list = list(map(float, FLAGS.learning_rate.split(',')))
+    lr_boundary_list = training_steps_list[:-1]  # Only need the values at which to change lr.
+    lr_schedule = tf.keras.optimizers.schedules.PiecewiseConstantDecay(boundaries=lr_boundary_list,
+                                                                       values=learning_rates_list)
+
+    cluster_weights = tfmot.clustering.keras.cluster_weights
+    CentroidInitialization = tfmot.clustering.keras.CentroidInitialization
+
+    clustering_params = {
+        'number_of_clusters': 32,
+        'cluster_centroids_init': CentroidInitialization.KMEANS_PLUS_PLUS}
+
+    clustered_model = cluster_weights(model, **clustering_params)
+
+    # Specify the optimizer configurations.
+    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
+    clustered_model.compile(optimizer=optimizer,
+                            loss=tf.keras.losses.SparseCategoricalCrossentropy(),
+                            metrics=['accuracy'])
+
+    train_data = audio_processor.get_data(audio_processor.Modes.TRAINING,
+                                          FLAGS.background_frequency, FLAGS.background_volume,
+                                          int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000))
+    train_data = train_data.repeat().batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION)
+    val_data = val_data.batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+
+    # We train for a max number of iterations so need to calculate how many 'epochs' this will be.
+    training_steps_max = np.sum(training_steps_list)
+    training_epoch_max = int(np.ceil(training_steps_max / FLAGS.eval_step_interval))
+
+    # Train the model with clustering applied.
+    clustered_model.fit(x=train_data,
+                        steps_per_epoch=FLAGS.eval_step_interval,
+                        epochs=training_epoch_max,
+                        validation_data=val_data)
+
+    stripped_clustered_model = tfmot.clustering.keras.strip_clustering(clustered_model)
+
+    print_model_weight_clusters(stripped_clustered_model)
+
+    # Save the clustered model weights
+    train_dir = Path(FLAGS.train_dir) / "optimized"
+    train_dir.mkdir(parents=True, exist_ok=True)
+
+    stripped_clustered_model.save_weights((train_dir /
+                                          (FLAGS.model_architecture +
+                                           "_clustered_ckpt")))
+
+    # Test the model.
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING)
+    test_data = test_data.batch(FLAGS.batch_size)
+
+    stripped_clustered_model.compile(optimizer=optimizer,
+                                     loss=tf.keras.losses.SparseCategoricalCrossentropy(),
+                                     metrics=['accuracy'])
+
+    test_loss, test_acc = stripped_clustered_model.evaluate(x=test_data)
+    print(f'Final test accuracy: {test_acc*100:.2f}%')
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--background_volume',
+        type=float,
+        default=0.1,
+        help="""\
+        How loud the background noise should be, between 0 and 1.
+        """)
+    parser.add_argument(
+        '--background_frequency',
+        type=float,
+        default=0.8,
+        help="""\
+        How many of the training samples have background noise mixed in.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--time_shift_ms',
+        type=float,
+        default=100.0,
+        help="""\
+        Range to randomly shift the training audio by in time.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--how_many_training_steps',
+        type=str,
+        default='3750,750',
+        help='How many training loops to run',)
+    parser.add_argument(
+        '--eval_step_interval',
+        type=int,
+        default=400,
+        help='How often to evaluate the training results.')
+    parser.add_argument(
+        '--learning_rate',
+        type=str,
+        default='0.001,0.0001',
+        help='How large a learning rate to use when training.')
+    parser.add_argument(
+        '--batch_size',
+        type=int,
+        default=100,
+        help='How many items to train with at once',)
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--train_dir',
+        type=str,
+        default='/tmp/speech_commands_train',
+        help='Directory to write event logs and checkpoint.')
+    parser.add_argument(
+        '--save_step_interval',
+        type=int,
+        default=100,
+        help='Save model checkpoint every save_steps.')
+    parser.add_argument(
+        '--checkpoint',
+        type=str,
+        help='Checkpoint to load the weights from before fine-tuning.')
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+
+    FLAGS, _ = parser.parse_known_args()
+    optimize()
diff --git a/models/keyword_spotting/dnn_small/model_package_tf/recreate_model.sh b/models/keyword_spotting/dnn_small/model_package_tf/recreate_model.sh
new file mode 100644
index 0000000..d00f43f
--- /dev/null
+++ b/models/keyword_spotting/dnn_small/model_package_tf/recreate_model.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+# Copyright (C) 2023 Arm Limited or its affiliates. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+ckpt_path=model_archive/model_source/weights/dnn_0.84_ckpt
+train=false
+
+# Parse command line args
+while (( $# >= 1 )); do 
+    case $1 in
+    --ckpt)
+       if [ "$2" ]; then
+           ckpt_path=$2
+           shift
+       else
+           printf 'ERROR: "--ckpt" requires a path to be supplied.\n'
+           exit 1
+       fi
+       ;;
+    --train) 
+    	train=true
+	break;;
+    *) shift;
+    esac;
+done
+
+
+# DNN Small training
+if [ "$train" = true ]
+then
+python train.py --model_architecture dnn --model_size_info 144 144 144 --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 40 --learning_rate 0.0005,0.0001,0.00002 --how_many_training_steps 10000,10000,10000 --summaries_dir work/DNN/DNN_S/retrain_logs --train_dir work/DNN/DNN_S/training
+fi
+
+# Conversion to TFLite fp32
+python convert_to_tflite.py --model_architecture dnn --model_size_info 144 144 144 --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 40 --checkpoint $ckpt_path --no-quantize
+
+# Conversion to TFLite int8
+python convert_to_tflite.py --model_architecture dnn --model_size_info 144 144 144 --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 40 --checkpoint $ckpt_path --inference_type int8
+
diff --git a/models/keyword_spotting/dnn_small/model_package_tf/requirements.txt b/models/keyword_spotting/dnn_small/model_package_tf/requirements.txt
new file mode 100644
index 0000000..3448cff
--- /dev/null
+++ b/models/keyword_spotting/dnn_small/model_package_tf/requirements.txt
@@ -0,0 +1,3 @@
+numpy == 1.19.5
+tensorflow == 2.5.0
+tensorflow-model-optimization == 0.6.0
\ No newline at end of file
diff --git a/models/keyword_spotting/dnn_small/model_package_tf/train.py b/models/keyword_spotting/dnn_small/model_package_tf/train.py
new file mode 100644
index 0000000..8c488b3
--- /dev/null
+++ b/models/keyword_spotting/dnn_small/model_package_tf/train.py
@@ -0,0 +1,227 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for training simple keyword spotting models."""
+
+import argparse
+from pathlib import Path
+
+import tensorflow as tf
+import numpy as np
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+
+
+def train():
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    # Create the model.
+    model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info, True)
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    # We decay learning rate in a constant piecewise way to help learning.
+    training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(',')))
+    learning_rates_list = list(map(float, FLAGS.learning_rate.split(',')))
+    lr_boundary_list = training_steps_list[:-1]  # Only need the values at which to change lr.
+    lr_schedule = tf.keras.optimizers.schedules.PiecewiseConstantDecay(boundaries=lr_boundary_list,
+                                                                       values=learning_rates_list)
+
+    # Specify the optimizer configurations.
+    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
+    model.compile(optimizer=optimizer,
+                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
+                  metrics=['accuracy'])
+
+    train_data = audio_processor.get_data(audio_processor.Modes.TRAINING,
+                                          FLAGS.background_frequency, FLAGS.background_volume,
+                                          int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000))
+    train_data = train_data.repeat().batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION)
+    val_data = val_data.batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+
+    # We train for a max number of iterations so need to calculate how many 'epochs' this will be.
+    training_steps_max = np.sum(training_steps_list)
+    training_epoch_max = int(np.ceil(training_steps_max / FLAGS.eval_step_interval))
+
+    # Callbacks.
+    train_dir = Path(FLAGS.train_dir) / "best"
+    train_dir.mkdir(parents=True, exist_ok=True)
+    model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
+        filepath=(train_dir / (FLAGS.model_architecture + "_{val_accuracy:.3f}_ckpt")),
+        save_weights_only=True,
+        monitor='val_accuracy',
+        mode='max',
+        save_best_only=True)
+    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=FLAGS.summaries_dir)
+
+    # Train the model.
+    model.fit(x=train_data,
+              steps_per_epoch=FLAGS.eval_step_interval,
+              epochs=training_epoch_max,
+              validation_data=val_data,
+              callbacks=[model_checkpoint_callback, tensorboard_callback])
+
+    # Test and save the model.
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING)
+    test_data = test_data.batch(FLAGS.batch_size)
+
+    test_loss, test_acc = model.evaluate(x=test_data)
+    print(f'Final test accuracy: {test_acc*100:.2f}%')
+    model.save(f'saved_model/{FLAGS.model_architecture}')
+    model.save(f'keras/{FLAGS.model_architecture}.h5')
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--background_volume',
+        type=float,
+        default=0.1,
+        help="""\
+        How loud the background noise should be, between 0 and 1.
+        """)
+    parser.add_argument(
+        '--background_frequency',
+        type=float,
+        default=0.8,
+        help="""\
+        How many of the training samples have background noise mixed in.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--time_shift_ms',
+        type=float,
+        default=100.0,
+        help="""\
+        Range to randomly shift the training audio by in time.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--how_many_training_steps',
+        type=str,
+        default='15000,3000',
+        help='How many training loops to run',)
+    parser.add_argument(
+        '--eval_step_interval',
+        type=int,
+        default=400,
+        help='How often to evaluate the training results.')
+    parser.add_argument(
+        '--learning_rate',
+        type=str,
+        default='0.001,0.0001',
+        help='How large a learning rate to use when training.')
+    parser.add_argument(
+        '--batch_size',
+        type=int,
+        default=100,
+        help='How many items to train with at once',)
+    parser.add_argument(
+        '--summaries_dir',
+        type=str,
+        default='/tmp/retrain_logs',
+        help='Where to save summary logs for TensorBoard.')
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--train_dir',
+        type=str,
+        default='/tmp/speech_commands_train',
+        help='Directory to write event logs and checkpoint.')
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+
+    FLAGS, _ = parser.parse_known_args()
+    train()
diff --git a/models/keyword_spotting/dnn_small/model_package_tf/validation_utils/labels.txt b/models/keyword_spotting/dnn_small/model_package_tf/validation_utils/labels.txt
new file mode 100644
index 0000000..ba41645
--- /dev/null
+++ b/models/keyword_spotting/dnn_small/model_package_tf/validation_utils/labels.txt
@@ -0,0 +1,12 @@
+_silence_
+_unknown_
+yes
+no
+up
+down
+left
+right
+on
+off
+stop
+go
\ No newline at end of file
diff --git a/models/keyword_spotting/dnn_small/tflite_int8/README.md b/models/keyword_spotting/dnn_small/tflite_int8/README.md
deleted file mode 100644
index 1f5d3f8..0000000
--- a/models/keyword_spotting/dnn_small/tflite_int8/README.md
+++ /dev/null
@@ -1,59 +0,0 @@
-# DNN Small INT8
-
-## Description
-This is a fully quantized version (asymmetrical int8) of the DNN Small model developed by Arm, with training checkpoints, from the Hello Edge paper. Code to recreate this model can be found here: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m
-
-## License
-[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
-
-## Related Materials
-### Class Labels
-The class labels associated with this model can be downloaded by running the script `get_class_labels.sh`.
-
-### Model Recreation Code
-Code to recreate this model can be found here: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m.
-
-## Network Information
-| Network Information |  Value         |
-|---------------------|------------------|
-|  Framework          | TensorFlow Lite |
-|  SHA-1 Hash         | 4b92e09fb43b2f042ce2811b91c7c67bf7186b6b |
-|  Size (Bytes)       | 83544 |
-|  Provenance         | https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m |
-|  Paper              | https://arxiv.org/abs/1711.07128 |
-
-## Accuracy
-Dataset: Google Speech Commands Test Set
-
-| Metric | Value |
-|--------|-------|
-| Accuracy | 0.825 |
-
-## Performance
-| Platform | Optimized |
-|----------|:---------:|
-| Cortex-A |:heavy_check_mark:         |
-| Cortex-M |:heavy_check_mark:         |
-| Mali GPU |:heavy_check_mark:         |
-| Ethos U  |:heavy_check_mark:         |
-
-### Key
-* :heavy_check_mark: - Will run on this platform.
-* :heavy_multiplication_x: - Will not run on this platform.
-
-
-
-## Optimizations
-| Optimization |  Value  |
-|-----------------|---------|
-| Quantization | INT8 |
-
-## Network Inputs
-| Input Node Name |  Shape  | Description |
-|-----------------|---------|-------------|
-| input | (1, 250) | The input is a processed MFCCs of shape (1, 250) |
-
-## Network Outputs
-| Output Node Name |  Shape  | Description |
-|------------------|---------|-------------|
-| Identity | (1, 12) | The probability on 12 keywords. |
diff --git a/models/keyword_spotting/dnn_small/tflite_int8/definition.yaml b/models/keyword_spotting/dnn_small/tflite_int8/definition.yaml
deleted file mode 100644
index 7f66d4d..0000000
--- a/models/keyword_spotting/dnn_small/tflite_int8/definition.yaml
+++ /dev/null
@@ -1,41 +0,0 @@
-benchmark:
-  Google Speech Commands test set:
-    Accuracy: 82.45%
-description: 'This is a fully quantized version (asymmetrical int8) of the DNN Small
-  model developed by Arm, with training checkpoints, from the Hello Edge paper. Code
-  to recreate this model can be found here: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m'
-license:
-- Apache-2.0
-network:
-  file_size_bytes: 83544
-  filename: dnn_s_quantized.tflite
-  framework: TensorFlow Lite
-  hash:
-    algorithm: sha1
-    value: 4b92e09fb43b2f042ce2811b91c7c67bf7186b6b
-  provenance: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m
-  quality_level: null
-network_parameters:
-  input_nodes:
-  - description: The input is a processed MFCCs of shape (1, 250)
-    example_input:
-      path: models/keyword_spotting/dnn_small/tflite_int8/testing_input/input
-    name: input
-    shape:
-    - 1
-    - 250
-  output_nodes:
-  - description: The probability on 12 keywords.
-    name: Identity
-    shape:
-    - 1
-    - 12
-    test_output_path: models/keyword_spotting/dnn_small/tflite_int8/testing_output/Identity
-operators:
-  TensorFlow Lite:
-  - DEQUANTIZE
-  - FULLY_CONNECTED
-  - QUANTIZE
-  - RELU
-  - SOFTMAX
-paper: https://arxiv.org/abs/1711.07128
diff --git a/models/keyword_spotting/dnn_small/tflite_int8/get_class_labels.sh b/models/keyword_spotting/dnn_small/tflite_int8/get_class_labels.sh
deleted file mode 100755
index e59caf5..0000000
--- a/models/keyword_spotting/dnn_small/tflite_int8/get_class_labels.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (C) 2021 Arm Limited or its affiliates. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the License); you may
-# not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an AS IS BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#!/usr/bin/env bash
-
-wget https://raw.githubusercontent.com/ARM-software/ML-KWS-for-MCU/e9cf319e9aa2ff71d433e111477dd95329fb94cb/Pretrained_models/labels.txt
-mv labels.txt labelmappings.txt
\ No newline at end of file
diff --git a/models/keyword_spotting/ds_cnn_large/model_package_tf/README.md b/models/keyword_spotting/ds_cnn_large/model_package_tf/README.md
new file mode 100644
index 0000000..c4e4d69
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_large/model_package_tf/README.md
@@ -0,0 +1,115 @@
+# DS-CNN Large model package
+
+This folder contains code that will allow you to recreate the DS-CNN Large keyword spotting model from
+the [Hello Edge paper](https://arxiv.org/pdf/1711.07128.pdf).
+
+## License
+[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
+
+## Model Package Overview
+| Model           	| DS_CNN_Large                            	  |
+|:---------------:	|:------------------------------------------:|
+| <u>**Format**</u>:          	| Keras, Saved Model, TensorFlow Lite int8, TensorFlow Lite fp32 |
+| <u>**Feature**</u>:         	|   Keyword spotting for Arm Cortex-M CPUs   |
+| <u>**Architectural Delta w.r.t. Vanilla**</u>: |                    None                    |
+| <u>**Domain**</u>:         	|              Keyword spotting              |
+| <u>**Package Quality**</u>: 	|                    Hero                    |
+
+## Model Recreation
+
+In order to recreate the model you will first need to be using ```Python3.7``` and install the requirements in ```requirements.txt```.
+
+Once you have these requirements satisfied you can execute the recreation script contained within this folder, just run:
+
+```bash
+bash ./recreate_model.sh
+```
+
+Running this script will use the pre-trained checkpoint files supplied in the ```./model_archive/model_source/weights``` folder
+to generate the TFLite files and perform evaluation on the test sets. Both an fp32 version and a quantized version will be produced.
+The quantized version will use post-training quantization to fully quantize it.
+
+If you want to run training from scratch you can do this by supplying ```--train``` when running the script. For example:
+
+```bash
+bash ./recreate_model.sh --train
+```
+
+Training is then performed and should produce a model to the stated accuracy in this repository.
+Note that exporting to TFLite will still happen with the pre-trained checkpoint files so you will need to re-run the script
+and this time supply the path to the new checkpoint files you want to use, for example:
+
+```bash
+bash ./recreate_model.sh --ckpt <checkpoint_path>
+```
+
+
+## Training
+
+To train a DNN with 3 fully-connected layers with 128 neurons in each layer, run:
+
+```
+python train.py --model_architecture dnn --model_size_info 128 128 128
+```
+The command line argument *--model_size_info* is used to pass the neural network layer
+dimensions such as number of layers, convolution filter size/stride as a list to models.py,
+which builds the TensorFlow graph based on the provided model architecture
+and layer dimensions. For more info on *model_size_info* for each network architecture see
+[models.py](models.py).
+
+The training commands with all the hyperparameters to reproduce the models shown in the
+[paper](https://arxiv.org/pdf/1711.07128.pdf) are given [here](recreate_model.sh).
+
+## Testing
+To run inference on the trained model from a checkpoint and get accuracy on validation and test sets, run:
+```
+python evaluation.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>
+```
+The parameters used here should match those used in the Training step.
+
+## Optimization
+
+We introduce a new *optional* step to optimize the trained keyword spotting model for deployment.
+
+Here we use TensorFlow's [weight clustering API](https://www.tensorflow.org/model_optimization/guide/clustering) to reduce the compressed model size and optimize inference on supported hardware. 32 weight clusters and kmeans++ cluster intialization method are used as the clustering hyperparameters.
+
+To optimize your trained model (e.g. a DNN), a trained model checkpoint is needed to run clustering and fine-tuning on.
+You can use the pre-trained checkpoints provided, or train your own model and use the resulting checkpoint.
+
+To apply the optimization and fine-tuning, run the following command:
+```
+python optimisations.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>
+```
+The parameters used here should match those used in the Training step, except for the number of training steps.
+The number of training steps is reduced since the optimization step only requires fine-tuning.
+
+This will generate a clustered model checkpoint that can be used in the quantization step to generate a quantized and clustered TFLite model.
+
+## Quantization and TFLite Conversion
+
+As part of the update we now use TensorFlow's
+[post training quantization](https://www.tensorflow.org/lite/performance/post_training_quantization) to
+make quantization of the trained models super simple.
+
+To quantize your trained model (e.g. a DNN) run:
+```
+python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> [--inference_type int8|int16]
+```
+The parameters used here should match those used in the Training step.
+
+The inference_type parameter is *optional* and to be used if a fully quantized model with inputs and outputs of type int8 or int16 is needed. It defaults to fp32.
+
+This step will produce a quantized TFLite file *dnn_quantized.tflite*.
+You can test the accuracy of this quantized model on the test set by running:
+```
+python evaluation.py --tflite_path dnn_quantized.tflite
+```
+The parameters used here should match those used in the Training step.
+
+`convert_to_tflite.py` uses post-training quantization to generate a quantized model by default. If you wish to convert to a floating point TFLite model, use the command below:
+
+```
+python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> --no-quantize
+```
+
+This will produce a floating point TFLite file *dnn.tflite*. You can test the accuracy of this floating point model using `evaluation.py` as above.
diff --git a/models/keyword_spotting/ds_cnn_large/model_package_tf/convert_to_tflite.py b/models/keyword_spotting/ds_cnn_large/model_package_tf/convert_to_tflite.py
new file mode 100644
index 0000000..64ab8df
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_large/model_package_tf/convert_to_tflite.py
@@ -0,0 +1,234 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for converting and quantizing a trained keyword spotting
+   model and saving to TFLite."""
+
+import argparse
+
+import tensorflow as tf
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+from evaluation import tflite_test
+
+NUM_REP_DATA_SAMPLES = 100  # How many samples to use for post training quantization.
+
+
+def convert(model_settings, audio_processor, checkpoint, quantize, inference_type, tflite_path):
+    """Load our trained floating point model and convert it.
+
+    TFLite conversion or post training quantization is performed and the
+    resulting model is saved as a TFLite file.
+    We use samples from the validation set to do post training quantization.
+
+    Args:
+        model_settings: Dictionary of common model settings.
+        audio_processor: Audio processor class object.
+        checkpoint: Path to training checkpoint to load.
+        quantize: Whether to quantize the model or convert to fp32 TFLite model.
+        inference_type: Input/output type of the quantized model.
+        tflite_path: Output TFLite file save path.
+    """
+    model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info, False)
+    model.load_weights(checkpoint).expect_partial()
+
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION).batch(1)
+
+    def _rep_dataset():
+        """Generator function to produce representative dataset."""
+        i = 0
+        for mfcc, label in val_data:
+            if i > NUM_REP_DATA_SAMPLES:
+                break
+            i += 1
+            yield [mfcc]
+
+    if quantize:
+        # Quantize model and save to disk.
+        tflite_model = post_training_quantize(model, inference_type, _rep_dataset)
+        with open(tflite_path, 'wb') as f:
+            f.write(tflite_model)
+        print(f'Quantized model saved to {tflite_path}.')
+    else:
+        converter = tf.lite.TFLiteConverter.from_keras_model(model)
+        tflite_model = converter.convert()
+        with open(tflite_path, 'wb') as f:
+            f.write(tflite_model)
+        print(f'Converted model saved to {tflite_path}.')
+
+
+def post_training_quantize(keras_model, inference_type, rep_dataset):
+    """Perform post training quantization and returns the TFLite model ready for saving.
+
+    See https://www.tensorflow.org/lite/performance/post_training_quantization#full_integer_quantization for
+    more details.
+
+    Args:
+        keras_model: The trained tf Keras model used for post training quantization.
+        inference_type: Input/output type of the quantized model.
+        rep_dataset: Function to use as a representative dataset, must be callable.
+
+    Returns:
+        Quantized TFLite model ready for saving to disk.
+    """
+    converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
+    converter.optimizations = [tf.lite.Optimize.DEFAULT]
+
+    if inference_type == 'int8':
+        converter.inference_input_type = tf.int8
+        converter.inference_output_type = tf.int8
+        supported_ops = tf.lite.OpsSet.TFLITE_BUILTINS_INT8
+    if inference_type == 'int16':
+        converter.inference_input_type = tf.int16
+        converter.inference_output_type = tf.int16
+        supported_ops = tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8
+
+    # Int8 post training quantization needs representative dataset.
+    converter.representative_dataset = rep_dataset
+    converter.target_spec.supported_ops = [supported_ops]
+
+    tflite_model = converter.convert()
+
+    return tflite_model
+
+
+def main():
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    if FLAGS.quantize:
+        tflite_path = f'{FLAGS.model_architecture}_quantized.tflite'
+    else:
+        tflite_path = f'{FLAGS.model_architecture}.tflite'
+
+    # Load floating point model from checkpoint and convert it.
+    convert(model_settings, audio_processor, FLAGS.checkpoint,
+            FLAGS.quantize, FLAGS.inference_type, tflite_path)
+
+    # Test the newly converted model on the test set.
+    tflite_test(model_settings, audio_processor, tflite_path)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+    parser.add_argument(
+        '--checkpoint',
+        type=str,
+        help='Checkpoint to load the weights from.')
+    parser.add_argument(
+        '--quantize',
+        dest='quantize',
+        action="store_true",
+        default=True,
+        help='Whether to quantize the model or convert to fp32 TFLite model. Defaults to True.')
+    parser.add_argument(
+        '--no-quantize',
+        dest='quantize',
+        action="store_false",
+        help='Whether to quantize the model or convert to fp32 TFLite model. Defaults to True.')
+    parser.add_argument(
+        '--inference_type',
+        type=str,
+        default='fp32',
+        help='If quantize is true, whether the model input and output is float32, int8 or int16')
+
+    FLAGS, _ = parser.parse_known_args()
+    main()
diff --git a/models/keyword_spotting/ds_cnn_large/model_package_tf/data_processing/__init__.py b/models/keyword_spotting/ds_cnn_large/model_package_tf/data_processing/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/models/keyword_spotting/ds_cnn_large/model_package_tf/data_processing/data_preprocessing.py b/models/keyword_spotting/ds_cnn_large/model_package_tf/data_processing/data_preprocessing.py
new file mode 100644
index 0000000..05cf5ba
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_large/model_package_tf/data_processing/data_preprocessing.py
@@ -0,0 +1,462 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Modifications Copyright 2023 Arm Inc. All Rights Reserved.
+# Modified to use TensorFlow 2.0 and data pipelines.
+#
+"""Functions for loading and preparing data for keyword spotting."""
+
+import os
+import re
+import sys
+import urllib
+from pathlib import Path
+import tarfile
+import hashlib
+import random
+import math
+from enum import Enum
+
+import numpy as np
+import tensorflow as tf
+from tensorflow.python.ops import gen_audio_ops as audio_ops
+
+MAX_NUM_WAVS_PER_CLASS = 2**27 - 1  # ~134M
+RANDOM_SEED = 59185
+BACKGROUND_NOISE_DIR_NAME = '_background_noise_'
+SILENCE_LABEL = '_silence_'
+SILENCE_INDEX = 0
+UNKNOWN_WORD_INDEX = 1
+UNKNOWN_WORD_LABEL = '_unknown_'
+
+
+def load_wav_file(wav_filename, desired_samples):
+    """Loads and then decodes a given 16bit PCM wav file.
+
+    Decoded audio is scaled to the range [-1, 1] and padded or cropped to the desired number of samples.
+
+    Args:
+        wav_filename: 16bit PCM wav file to load.
+        desired_samples: Number of samples wanted from the audio file.
+
+    Returns:
+        Tuple consisting of the decoded audio and sample rate.
+    """
+    wav_file = tf.io.read_file(wav_filename)
+    decoded_wav = audio_ops.decode_wav(wav_file, desired_channels=1, desired_samples=desired_samples)
+
+    return decoded_wav.audio, decoded_wav.sample_rate
+
+
+def calculate_mfcc(audio_signal, audio_sample_rate, window_size, window_stride, num_mfcc):
+    """Returns Mel Frequency Cepstral Coefficients (MFCC) for a given audio signal.
+
+    Args:
+        audio_signal: Raw audio signal in range [-1, 1]
+        audio_sample_rate: Audio signal sample rate
+        window_size: Window size in samples for calculating spectrogram
+        window_stride: Window stride in samples for calculating spectrogram
+        num_mfcc: The number of MFCC features wanted.
+
+    Returns:
+        Calculated mffc features.
+    """
+    spectrogram = audio_ops.audio_spectrogram(input=audio_signal, window_size=window_size, stride=window_stride,
+                                              magnitude_squared=True)
+
+    mfcc_features = audio_ops.mfcc(spectrogram, audio_sample_rate, dct_coefficient_count=num_mfcc)
+
+    return mfcc_features
+
+
+def which_set(filename, validation_percentage, testing_percentage):
+    """Determines which data partition the file should belong to.
+
+    We want to keep files in the same training, validation, or testing sets even
+    if new ones are added over time. This makes it less likely that testing
+    samples will accidentally be reused in training when long runs are restarted
+    for example. To keep this stability, a hash of the filename is taken and used
+    to determine which set it should belong to. This determination only depends on
+    the name and the set proportions, so it won't change as other files are added.
+    It's also useful to associate particular files as related (for example words
+    spoken by the same person), so anything after '_nohash_' in a filename is
+    ignored for set determination. This ensures that 'bobby_nohash_0.wav' and
+    'bobby_nohash_1.wav' are always in the same set, for example.
+
+    Args:
+        filename: File path of the data sample.
+        validation_percentage: How much of the data set to use for validation.
+        testing_percentage: How much of the data set to use for testing.
+
+    Returns:
+        String, one of 'training', 'validation', or 'testing'.
+    """
+    base_name = os.path.basename(filename)
+    # We want to ignore anything after '_nohash_' in the file name when
+    # deciding which set to put a wav in, so the data set creator has a way of
+    # grouping wavs that are close variations of each other.
+    hash_name = re.sub(r'_nohash_.*$', '', base_name)
+    # This looks a bit magical, but we need to decide whether this file should
+    # go into the training, testing, or validation sets, and we want to keep
+    # existing files in the same set even if more files are subsequently
+    # added.
+    # To do that, we need a stable way of deciding based on just the file name
+    # itself, so we do a hash of that and then use that to generate a
+    # probability value that we use to assign it.
+    hash_name_hashed = hashlib.sha1(tf.compat.as_bytes(hash_name)).hexdigest()
+    percentage_hash = ((int(hash_name_hashed, 16) %
+                       (MAX_NUM_WAVS_PER_CLASS + 1)) *
+                       (100.0 / MAX_NUM_WAVS_PER_CLASS))
+    if percentage_hash < validation_percentage:
+        result = 'validation'
+    elif percentage_hash < (testing_percentage + validation_percentage):
+        result = 'testing'
+    else:
+        result = 'training'
+    return result
+
+
+def prepare_words_list(wanted_words):
+    """Prepends common tokens to the custom word list.
+
+    Args:
+        wanted_words: List of strings containing custom words to spot.
+
+    Returns:
+        List of words with silence and unknown tokens added.
+    """
+    return [SILENCE_LABEL, UNKNOWN_WORD_LABEL] + wanted_words
+
+
+class AudioProcessor:
+    """Handles loading, partitioning, and preparing audio training data."""
+
+    class Modes(Enum):
+        TRAINING = 1
+        VALIDATION = 2
+        TESTING = 3
+
+    def __init__(self, data_url, data_dir, silence_percentage, unknown_percentage,
+                 wanted_words, validation_percentage, testing_percentage, model_settings):
+        self.data_dir = Path(data_dir)
+        self.model_settings = model_settings
+        self.words_list = prepare_words_list(wanted_words)
+
+        self._tf_datasets = {}
+        self.background_data = None
+        self._set_size = {'training': 0, 'validation': 0, 'testing': 0}
+
+        self._download_and_extract_data(data_url, data_dir)
+        self._prepare_datasets(silence_percentage, unknown_percentage, wanted_words,
+                               validation_percentage, testing_percentage)
+        self._prepare_background_data()
+
+    def get_data(self, mode, background_frequency=0, background_volume_range=0, time_shift=0):
+        """Returns the train, validation or test set for KWS as a TF Dataset.
+
+        Args:
+            mode: The set to return, see AudioProcessor.Modes enumeration.
+            background_frequency: How many of the samples have background noise mixed in.
+            background_volume_range: How loud the background noise should be, between 0 and 1.
+            time_shift: Range to randomly shift the training audio by in time.
+
+        Returns:
+            TF dataset that will generate tuples containing an mfcc and corresponding label.
+
+        Raises:
+            ValueError: If mode is not recognised.
+        """
+        if mode == AudioProcessor.Modes.TRAINING:
+            dataset = self._tf_datasets['training']
+        elif mode == AudioProcessor.Modes.VALIDATION:
+            dataset = self._tf_datasets['validation']
+        elif mode == AudioProcessor.Modes.TESTING:
+            dataset = self._tf_datasets['testing']
+        else:
+            ValueError("Incorrect dataset type given")
+
+        use_background = (self.background_data is not None) and (mode == AudioProcessor.Modes.TRAINING)
+        dataset = dataset.map(lambda path, label: self._process_path(path, label, self.model_settings,
+                                                                     background_frequency, background_volume_range,
+                                                                     time_shift, use_background, self.background_data),
+                              num_parallel_calls=tf.data.experimental.AUTOTUNE)
+
+        return dataset
+
+    def set_size(self, mode):
+        """Get the number of samples in the requested dataset partition.
+
+        Args:
+            mode: Which partition, see AudioProcessor.Modes enumeration.
+
+        Returns:
+            Number of samples in the partition.
+
+        Raises:
+            ValueError: If mode is not recognised.
+        """
+        if mode == AudioProcessor.Modes.TRAINING:
+            return self._set_size['training']
+        elif mode == AudioProcessor.Modes.VALIDATION:
+            return self._set_size['validation']
+        elif mode == AudioProcessor.Modes.TESTING:
+            return self._set_size['testing']
+        else:
+            ValueError('Incorrect dataset type given')
+
+    @staticmethod
+    def _process_path(path, label, model_settings, background_frequency, background_volume_range, time_shift_samples,
+                      use_background, background_data):
+        """Load wav files and calculate mfcc features.
+
+        Random shifting of samples and adding in background noise is done within this function as well.
+        This function is meant to be mapped onto a TF Dataset by using a lambda function.
+
+        Args:
+            path: Path to the wav file to load.
+            label: Integer label for classifying the audio clip.
+            model_settings: Dictionary of settings for model being trained.
+            background_frequency: How many clips will have background noise, 0.0 to 1.0.
+            background_volume_range: How loud the background noise will be.
+            time_shift_samples: How much to randomly shift the clips by.
+            use_background: Add in background noise to audio clips or not.
+            background_data: Ragged tensor of loaded background noise samples.
+
+        Returns:
+            Tuple of calculated flattened mfcc and its class label.
+        """
+
+        desired_samples = model_settings['desired_samples']
+        audio, sample_rate = load_wav_file(path, desired_samples=desired_samples)
+
+        # Make our own silence audio data.
+        if label == SILENCE_INDEX:
+            audio = tf.multiply(audio, 0)
+
+        # Shift samples start position and pad any gaps with zeros.
+        if time_shift_samples > 0:
+            time_shift_amount = tf.random.uniform(shape=(), minval=-time_shift_samples, maxval=time_shift_samples,
+                                                  dtype=tf.int32)
+        else:
+            time_shift_amount = 0
+        if time_shift_amount > 0:
+            time_shift_padding = [[time_shift_amount, 0], [0, 0]]
+            time_shift_offset = [0, 0]
+        else:
+            time_shift_padding = [[0, -time_shift_amount], [0, 0]]
+            time_shift_offset = [-time_shift_amount, 0]
+
+        padded_foreground = tf.pad(audio, time_shift_padding, mode='CONSTANT')
+        sliced_foreground = tf.slice(padded_foreground, time_shift_offset, [desired_samples, -1])
+
+        # Get a random section of background noise.
+        if use_background:
+            background_index = tf.random.uniform(shape=(), maxval=background_data.shape[0], dtype=tf.int32)
+            background_sample = background_data[background_index]
+            background_offset = tf.random.uniform(shape=(), maxval=len(background_sample)-desired_samples,
+                                                  dtype=tf.int32)
+            background_clipped = background_sample[background_offset:(background_offset + desired_samples)]
+            background_reshaped = tf.reshape(background_clipped, [desired_samples, 1])
+            if tf.random.uniform(shape=(), maxval=1) < background_frequency:
+                background_volume = tf.random.uniform(shape=(), maxval=background_volume_range)
+            else:
+                background_volume = tf.constant(0, dtype='float32')
+        else:
+            background_reshaped = np.zeros([desired_samples, 1], dtype=np.float32)
+            background_volume = tf.constant(0, dtype='float32')
+
+        # Mix in background noise.
+        background_mul = tf.multiply(background_reshaped, background_volume)
+        background_add = tf.add(background_mul, sliced_foreground)
+        background_clamp = tf.clip_by_value(background_add, -1.0, 1.0)
+
+        mfcc = calculate_mfcc(background_clamp, sample_rate, model_settings['window_size_samples'],
+                              model_settings['window_stride_samples'],
+                              model_settings['dct_coefficient_count'])
+        mfcc = tf.reshape(mfcc, [-1])
+
+        return mfcc, label
+
+    def _download_and_extract_data(self, data_url, target_directory):
+        """Downloads and extracts file to target directory.
+
+        If the file does not already exist download it and then untar into the target directory.
+
+        Args:
+            data_url: Web link to the tarred data to download.
+            target_directory: Directory to download and extract to.
+        """
+        target_directory = Path(target_directory)
+        target_directory.mkdir(exist_ok=True)
+
+        filename = data_url.split('/')[-1]
+        filepath = target_directory / filename
+
+        if not filepath.exists():
+            def _report_hook(block_num, block_size, total_size):
+                """Function to track download progress in urllib"""
+                read_so_far = block_num * block_size
+                percent = (read_so_far / total_size) * 100.0
+
+                s = f"\rDownloading {filename} {percent:.1f}%"
+
+                sys.stdout.write(s)
+                sys.stdout.flush()
+
+            filepath, _ = urllib.request.urlretrieve(data_url, filepath, _report_hook)
+            print()
+
+        print(f'Untarring {filename}...')
+        tarfile.open(filepath, 'r:gz').extractall(target_directory)
+
+    def _prepare_datasets(self, silence_percentage, unknown_percentage, wanted_words,
+                          validation_percentage, testing_percentage):
+        """Split the data into train, validation and testing sets.
+
+        Silence and unknown data is added, then sets are converted to TF Datasets.
+
+        Args:
+            silence_percentage: Percent of words should be silence.
+            unknown_percentage: Percent of words that should be unknown.
+            wanted_words: List of words wanted to classify.
+            validation_percentage: Percent to split off for validation.
+            testing_percentage: Percent to split off for testing.
+        """
+        # Make sure the shuffling and picking of unknowns is deterministic.
+        random.seed(RANDOM_SEED)
+        wanted_words_index = {}
+
+        for index, wanted_word in enumerate(wanted_words):
+            wanted_words_index[wanted_word] = index + 2
+
+        # Find all wav files in subfolders.
+        search_path = self.data_dir / '*' / '*.wav'
+        data_index, unknown_index, all_words = self._find_and_sort_wavs(search_path, validation_percentage,
+                                                                        testing_percentage, wanted_words_index)
+
+        for index, wanted_word in enumerate(wanted_words):
+            if wanted_word not in all_words:
+                raise Exception(f'Tried to find {wanted_word} in labels but only found: {", ".join(all_words.keys())}')
+
+        word_to_index = {}
+        for word in all_words:
+            if word in wanted_words_index:
+                word_to_index[word] = wanted_words_index[word]
+            else:
+                word_to_index[word] = UNKNOWN_WORD_INDEX
+        word_to_index[SILENCE_LABEL] = SILENCE_INDEX
+
+        # We need an arbitrary file to load as the input for the silence samples.
+        # It's multiplied by zero later, so the content doesn't matter.
+        silence_wav_path = data_index['training'][0]['file']
+        for set_index in ['validation', 'testing', 'training']:
+            set_size = len(data_index[set_index])  # Size before adding silence and unknown samples.
+            silence_size = int(math.ceil(set_size * silence_percentage / 100))
+            for _ in range(silence_size):
+                data_index[set_index].append({
+                    'label': SILENCE_LABEL,
+                    'file': silence_wav_path
+                })
+            # Pick some unknowns to add to each partition of the data set.
+            random.shuffle(unknown_index[set_index])
+            unknown_size = int(math.ceil(set_size * unknown_percentage / 100))
+            data_index[set_index].extend(unknown_index[set_index][:unknown_size])
+
+            self._set_size[set_index] = len(data_index[set_index])  # Size after adding silence and unknown samples.
+
+            # Make sure the ordering is random.
+            random.shuffle(data_index[set_index])
+
+            # Transform into TF Datasets ready for easier processing later.
+            labels, paths = list(zip(*[d.values() for d in data_index[set_index]]))
+            labels = [word_to_index[label] for label in labels]
+            self._tf_datasets[set_index] = tf.data.Dataset.from_tensor_slices((list(paths), labels))
+
+    def _find_and_sort_wavs(self, search_pattern, validation_percentage, testing_percentage, wanted_words_index):
+        """Find and sort wav files into known and unknown word sets.
+
+        Known words are files containing words in the list of wanted words.
+        Any other clip goes to the unknown label set. Labels come from the folder names.
+        All clips are also assigned to train, test and validation sets.
+
+        Args:
+            search_pattern: Path pattern used by glob to find wav files.
+            validation_percentage: Percent to split off for validation.
+            testing_percentage: Percent to split off for testing.
+            wanted_words_index: Dict mapping wanted words to their label index.
+
+        Returns:
+            3-tuple of known words, unknown words and mapping of all word labels.
+        """
+        data_index = {'validation': [], 'testing': [], 'training': []}
+        unknown_index = {'validation': [], 'testing': [], 'training': []}
+        all_words = {}
+
+        for wav_path in sorted(tf.io.gfile.glob(str(search_pattern))):
+            word = Path(wav_path).parent.name.lower()
+
+            # Treat the '_background_noise_' folder as a special case, since we expect
+            # it to contain long audio samples we mix in to improve training.
+            if word == BACKGROUND_NOISE_DIR_NAME:
+                continue
+
+            all_words[word] = True
+            set_index = which_set(wav_path, validation_percentage, testing_percentage)
+            # If it's a known class, store its detail, otherwise add it to the list
+            # we'll use to train the unknown label.
+            if word in wanted_words_index:
+                data_index[set_index].append({'label': word, 'file': wav_path})
+            else:
+                unknown_index[set_index].append({'label': word, 'file': wav_path})
+        if not all_words:
+            raise Exception('No .wavs found at ' + str(search_pattern))
+
+        return data_index, unknown_index, all_words
+
+    def _prepare_background_data(self):
+        """Searches a folder for background noise audio, and loads it into memory.
+
+        It's expected that the background audio samples will be in a subdirectory
+        named '_background_noise_' inside the 'data_dir' folder, as .wavs that match
+        the sample rate of the training data, but can be much longer in duration.
+
+        If the '_background_noise_' folder doesn't exist at all, this isn't an
+        error, it's just taken to mean that no background noise augmentation should
+        be used. If the folder does exist, but it's empty, that's treated as an
+        error.
+
+        Returns:
+          Ragged tensor of raw PCM-encoded audio samples of background noise.
+          None if '_background_noise_' folder doesnt exist.
+
+        Raises:
+          Exception: If files aren't found in the folder.
+        """
+        background_data = []
+        background_dir = Path(self.data_dir / BACKGROUND_NOISE_DIR_NAME)
+        if not background_dir.exists():
+            self.background_data = None
+            return
+
+        search_path = Path(background_dir / '*.wav')
+        for wav_path in tf.io.gfile.glob(str(search_path)):
+            wav_data, _ = load_wav_file(wav_path, desired_samples=-1)
+            background_data.append(tf.reshape(wav_data, [-1]))
+
+        if not background_data:
+            raise Exception('No background wav files were found in ' + str(search_path))
+
+        # Ragged tensor as we cant use lists in tf dataset map functions.
+        self.background_data = tf.ragged.stack(background_data)
diff --git a/models/keyword_spotting/ds_cnn_large/model_package_tf/ds_cnn_l_inference_keras.py b/models/keyword_spotting/ds_cnn_large/model_package_tf/ds_cnn_l_inference_keras.py
new file mode 100644
index 0000000..db7694a
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_large/model_package_tf/ds_cnn_l_inference_keras.py
@@ -0,0 +1,76 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from data_processing.data_preprocessing import load_wav_file, calculate_mfcc
+
+import tensorflow as tf
+import argparse
+
+
+def load_labels(filename):
+    """Read in labels, one label per line."""
+    f = open(filename, "r")
+    return f.read().splitlines()
+
+
+def main():
+    window_size_samples = int(FLAGS.sample_rate * FLAGS.window_size_ms / 1000)
+    window_stride_samples = int(FLAGS.sample_rate * FLAGS.window_stride_ms / 1000)
+    decoded, sample = load_wav_file(FLAGS.wav, FLAGS.sample_rate)
+    x = calculate_mfcc(decoded, sample, window_size_samples, window_stride_samples, FLAGS.dct_coefficient_count)
+    x = tf.reshape(x, [1, -1])
+
+    model = tf.keras.models.load_model(FLAGS.keras_file_path)
+    predictions = model.predict(x)
+
+    # Sort to show labels in order of confidence
+    top_k = predictions[0].argsort()[-1:][::-1]
+    for node_id in top_k:
+        human_string = load_labels(FLAGS.labels)[int(node_id)]
+        score = predictions[0,node_id]
+        print(f'model predicted: {human_string} with score {score:.5f}')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--wav', type=str, default='', help='Audio file to be identified.')
+    parser.add_argument(
+        '--labels', type=str, default='', help='Path to file containing labels.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs', )
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint', )
+    parser.add_argument(
+        '--keras_file_path',
+        type=str,
+        default='',
+        help='Path to the .h5 Keras model file to use for testing.')
+    FLAGS, unparsed = parser.parse_known_args()
+    main()
diff --git a/models/keyword_spotting/ds_cnn_large/model_package_tf/ds_cnn_l_inference_tflite.py b/models/keyword_spotting/ds_cnn_large/model_package_tf/ds_cnn_l_inference_tflite.py
new file mode 100644
index 0000000..9f79d99
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_large/model_package_tf/ds_cnn_l_inference_tflite.py
@@ -0,0 +1,120 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from data_processing.data_preprocessing import load_wav_file, calculate_mfcc
+
+import tensorflow as tf
+import numpy as np
+import argparse
+
+
+def tflite_inference(input_data, tflite_path):
+    """Call forwards pass of TFLite file and returns the result.
+
+    Args:
+        input_data: Input data to use on forward pass.
+        tflite_path: Path to TFLite file to run.
+
+    Returns:
+        Output from inference.
+    """
+    supported_quant_dtypes = (np.int8, np.int16)
+    interpreter = tf.lite.Interpreter(model_path=tflite_path)
+    interpreter.allocate_tensors()
+
+    input_details = interpreter.get_input_details()
+    output_details = interpreter.get_output_details()
+
+    input_dtype = input_details[0]["dtype"]
+    output_dtype = output_details[0]["dtype"]
+
+    # Check if the input/output type is quantized,
+    # set scale and zero-point accordingly
+    if input_dtype in supported_quant_dtypes:
+        input_scale, input_zero_point = input_details[0]["quantization"]
+    else:
+        input_scale, input_zero_point = 1, 0
+
+    input_data = input_data / input_scale + input_zero_point
+    input_data = np.round(input_data) if input_dtype in supported_quant_dtypes else input_data
+
+    if output_dtype in supported_quant_dtypes:
+        output_scale, output_zero_point = output_details[0]["quantization"]
+    else:
+        output_scale, output_zero_point = 1, 0
+
+    interpreter.set_tensor(input_details[0]['index'], tf.cast(input_data, input_dtype))
+    interpreter.invoke()
+
+    output_data = interpreter.get_tensor(output_details[0]['index'])
+
+    output_data = output_scale * (output_data.astype(np.float32) - output_zero_point)
+
+    return output_data
+
+
+def load_labels(filename):
+    """Read in labels, one label per line."""
+    f = open(filename, "r")
+    return f.read().splitlines()
+
+
+def main():
+    window_size_samples = int(FLAGS.sample_rate * FLAGS.window_size_ms / 1000)
+    window_stride_samples = int(FLAGS.sample_rate * FLAGS.window_stride_ms / 1000)
+    decoded, sample = load_wav_file(FLAGS.wav, FLAGS.sample_rate)
+    x = calculate_mfcc(decoded, sample, window_size_samples, window_stride_samples, FLAGS.dct_coefficient_count)
+    x = tf.reshape(x, [1, -1])
+    predictions = tflite_inference(x, FLAGS.tflite_path)
+
+    # Sort to show labels in order of confidence
+    top_k = predictions[0].argsort()[-1:][::-1]
+    for node_id in top_k:
+        human_string = load_labels(FLAGS.labels)[int(node_id)]
+        score = predictions[0,node_id]
+        print(f'model predicted: {human_string} with score {score:.5f}')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--wav', type=str, default='', help='Audio file to be identified.')
+    parser.add_argument(
+        '--labels', type=str, default='', help='Path to file containing labels.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs', )
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint', )
+    parser.add_argument(
+        '--tflite_path',
+        type=str,
+        default='',
+        help='Path to TFLite file to use for testing.')
+    FLAGS, unparsed = parser.parse_known_args()
+    main()
diff --git a/models/keyword_spotting/ds_cnn_large/model_package_tf/evaluation.py b/models/keyword_spotting/ds_cnn_large/model_package_tf/evaluation.py
new file mode 100644
index 0000000..da2c57c
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_large/model_package_tf/evaluation.py
@@ -0,0 +1,250 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for testing trained keyword spotting models from checkpoint files and TFLite files."""
+
+import argparse
+
+import numpy as np
+import tensorflow as tf
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+from ds_cnn_l_inference_tflite import tflite_inference
+
+
+def tflite_test(model_settings, audio_processor, tflite_path):
+    """Calculate accuracy and confusion matrices on the validation and test sets.
+
+    A TFLite model is used for doing testing.
+
+    Args:
+        model_settings: Dictionary of common model settings.
+        audio_processor: Audio processor class object.
+        tflite_path: Path to TFLite file to use for inference.
+    """
+    # Evaluate on validation set.
+    print("Running TFLite evaluation on validation set...")
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION).batch(1)
+    expected_indices = np.concatenate([y for x, y in val_data])
+    predicted_indices = []
+
+    for mfcc, label in val_data:
+        prediction = tflite_inference(mfcc, tflite_path)
+        predicted_indices.append(np.squeeze(tf.argmax(prediction, axis=1)))
+
+    val_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+
+    print(confusion_matrix.numpy())
+    print(f'Validation accuracy = {val_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.VALIDATION)})')
+
+    # Evaluate on testing set.
+    print("Running TFLite evaluation on test set...")
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING).batch(1)
+    expected_indices = np.concatenate([y for x, y in test_data])
+    predicted_indices = []
+
+    for mfcc, label in test_data:
+        prediction = tflite_inference(mfcc, tflite_path)
+        predicted_indices.append(np.squeeze(tf.argmax(prediction, axis=1)))
+
+    test_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+
+    print(confusion_matrix.numpy())
+    print(f'Test accuracy = {test_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.TESTING)})')
+
+
+def keras_test(model_settings, audio_processor, model):
+    """Calculate accuracy and confusion matrices on the validation and test sets.
+
+    A loaded keras model is used for doing testing.
+
+    Args:
+        model_settings: Dictionary of common model settings.
+        audio_processor: Audio processor class object.
+        model: Loaded keras model.
+    """
+    # Evaluate on validation set.
+    print("Running TF evaluation on validation set...")
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION).batch(FLAGS.batch_size)
+    expected_indices = np.concatenate([y for x, y in val_data])
+
+    predictions = model.predict(val_data)
+    predicted_indices = tf.argmax(predictions, axis=1)
+
+    val_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+    print(confusion_matrix.numpy())
+    print(f'Validation accuracy = {val_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.VALIDATION)})')
+
+    # Evaluate on testing set.
+    print("Running TF evaluation on test set...")
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING).batch(FLAGS.batch_size)
+    expected_indices = np.concatenate([y for x, y in test_data])
+
+    predictions = model.predict(test_data)
+    predicted_indices = tf.argmax(predictions, axis=1)
+
+    test_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+    print(confusion_matrix.numpy())
+    print(f'Test accuracy = {test_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.TESTING)})')
+
+
+def calculate_accuracy(predicted_indices, expected_indices):
+    """Calculates and returns accuracy.
+
+    Args:
+        predicted_indices: List of predicted integer indices.
+        expected_indices: List of expected integer indices.
+
+    Returns:
+        Accuracy value between 0 and 1.
+    """
+    correct_prediction = tf.equal(predicted_indices, expected_indices)
+    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
+    return accuracy
+
+
+def evaluate():
+    """Calculate accuracy and confusion matrices on validation and test sets.
+
+    Model is created and weights loaded from supplied command line arguments.
+    """
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    if FLAGS.tflite_path:
+        tflite_test(model_settings, audio_processor, FLAGS.tflite_path)
+
+    if FLAGS.checkpoint:
+        model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info, False)
+        model.load_weights(FLAGS.checkpoint).expect_partial()
+        keras_test(model_settings, audio_processor, model)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--batch_size',
+        type=int,
+        default=100,
+        help='How many items to train with at once',)
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--checkpoint',
+        type=str,
+        help='Checkpoint to load the weights from')
+    parser.add_argument(
+        '--tflite_path',
+        type=str,
+        help='Path to TFLite file to use for evaluation')
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+
+    FLAGS, _ = parser.parse_known_args()
+    evaluate()
diff --git a/models/keyword_spotting/ds_cnn_large/model_package_tf/how_to_guidance.ipynb b/models/keyword_spotting/ds_cnn_large/model_package_tf/how_to_guidance.ipynb
new file mode 100644
index 0000000..73d594b
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_large/model_package_tf/how_to_guidance.ipynb
@@ -0,0 +1,428 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Copyright (C) 2023 Arm Limited or its affiliates. All rights reserved.\n",
+    "#\n",
+    "# SPDX-License-Identifier: Apache-2.0\n",
+    "#\n",
+    "# Licensed under the Apache License, Version 2.0 (the License); you may\n",
+    "# not use this file except in compliance with the License.\n",
+    "# You may obtain a copy of the License at\n",
+    "#\n",
+    "# www.apache.org/licenses/LICENSE-2.0\n",
+    "#\n",
+    "# Unless required by applicable law or agreed to in writing, software\n",
+    "# distributed under the License is distributed on an AS IS BASIS, WITHOUT\n",
+    "# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+    "# See the License for the specific language governing permissions and\n",
+    "# limitations under the License."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# DS_CNN_Large - Hero\n",
+    "\n",
+    "Here we reproduce the models with our established codebase and ModelPackage approach for your convenience.\n",
+    "\n",
+    "## Model-Package Overview:\n",
+    "\n",
+    "| Model           \t| DS_CNN_Large                            \t|\n",
+    "|:---------------:\t|:---------------------------------------------------------------:\t|\n",
+    "| <u>**Format**</u>:          \t| Keras, Saved Model, TensorFlow Lite int8, TensorFlow Lite fp32 |\n",
+    "| <u>**Feature**</u>:         \t| Keyword spotting for Arm Cortex-M CPUs |\n",
+    "| <u>**Architectural Delta w.r.t. Vanilla**</u>: | None |\n",
+    "| <u>**Domain**</u>:         \t| Keyword spotting |\n",
+    "| <u>**Package Quality**</u>: \t| Hero |"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Table of contents <a name=\"index_page\"></a>\n",
+    "\n",
+    "This how-to guidance presents the key steps to reproduce everything in this package. The contents are organised as below. We provided the internal navigation links for users to easy-jump among different sections.  \n",
+    "\n",
+    "    \n",
+    "* [1.0 Model recreation](#model_recreation)\n",
+    "\n",
+    "* [2.0 Training](#training)\n",
+    "\n",
+    "* [3.0 Testing](#testing)\n",
+    "\n",
+    "* [4.0 Optimization](#optimization)\n",
+    "\n",
+    "* [5.0 Quantization and TFLite conversion](#tflite_conversion)\n",
+    "\n",
+    "* [6.0 Inference the TFLite model files](#tflite_inference)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1.0 Model Recreation<a name=\"model_recreation\"></a>\n",
+    "\n",
+    "In order to recreate the model you will first need to be using ```Python3.7``` and install the requirements in ```requirements.txt```.\n",
+    "\n",
+    "Once you have these requirements satisfied you can execute the recreation script contained within this folder, just run:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-01-31 11:38:02.599656: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "Untarring speech_commands_v0.02.tar.gz...\n",
+      "2023-01-31 11:38:53.030038: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1\n",
+      "2023-01-31 11:38:53.069964: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 11:38:53.070029: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 11:38:53.094139: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublas.so.11\n",
+      "2023-01-31 11:38:53.094219: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublasLt.so.11\n",
+      "2023-01-31 11:38:53.096985: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcufft.so.10\n",
+      "2023-01-31 11:38:53.097285: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcurand.so.10\n",
+      "2023-01-31 11:38:53.097852: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusolver.so.11\n",
+      "2023-01-31 11:38:53.098590: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusparse.so.11\n",
+      "2023-01-31 11:38:53.098752: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudnn.so.8\n",
+      "2023-01-31 11:38:53.099168: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 11:38:53.099481: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2023-01-31 11:38:53.100222: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 11:38:53.100624: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 11:38:53.100693: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 11:38:53.524442: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 11:38:53.524481: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 11:38:53.524492: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 11:38:53.524999: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10974 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.\n",
+      "2023-01-31 11:38:56.213089: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.\n",
+      "2023-01-31 11:38:58.326629: I tensorflow/core/grappler/devices.cc:69] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1\n",
+      "2023-01-31 11:38:58.326721: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session\n",
+      "2023-01-31 11:38:58.327408: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 11:38:58.327678: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 11:38:58.327711: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 11:38:58.327721: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 11:38:58.327731: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 11:38:58.328025: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10974 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "2023-01-31 11:38:58.347388: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 3492140000 Hz\n",
+      "2023-01-31 11:38:58.352977: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1144] Optimization results for grappler item: graph_to_optimize\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.012ms.\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.002ms.\n",
+      "\n",
+      "2023-01-31 11:38:58.537693: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:345] Ignored output_format.\n",
+      "2023-01-31 11:38:58.537738: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:348] Ignored drop_control_dependency.\n",
+      "2023-01-31 11:38:58.545075: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:210] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n",
+      "2023-01-31 11:38:58.548334: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 11:38:58.548626: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 11:38:58.548661: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 11:38:58.548672: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 11:38:58.548679: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 11:38:58.548981: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10974 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "Converted model saved to ds_cnn.tflite.\n",
+      "Running TFLite evaluation on validation set...\n",
+      "2023-01-31 11:38:58.616947: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)\n",
+      "[[371   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  0 339   1   5   2   3   6   5   3   2   1   4]\n",
+      " [  0   2 382   0   0   2   8   0   0   0   0   3]\n",
+      " [  0   5   2 382   1   7   1   0   0   0   0   8]\n",
+      " [  0   2   0   0 323   1   3   0   1  14   5   1]\n",
+      " [  0   2   0   6   1 361   1   1   0   0   3   2]\n",
+      " [  0   0   5   1   0   0 344   2   0   0   0   0]\n",
+      " [  0   3   0   1   0   0   0 358   0   0   0   1]\n",
+      " [  1   3   0   2   4   1   0   0 344   7   0   1]\n",
+      " [  0   2   1   0  18   0   1   0   4 342   3   2]\n",
+      " [  0   1   0   0   8   0   0   1   0   4 335   1]\n",
+      " [  0   4   0   9   1   5   0   0   1   2   2 348]]\n",
+      "Validation accuracy = 95.14%(N=4445)\n",
+      "Running TFLite evaluation on test set...\n",
+      "[[408   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  0 368   2   3   1   4   6   4   6   3   1  10]\n",
+      " [  0   1 405   2   0   0  10   0   0   1   0   0]\n",
+      " [  0   0   1 389   0   7   2   0   0   1   0   5]\n",
+      " [  0   6   0   0 399   2   0   0   2  10   5   1]\n",
+      " [  0   5   2   8   0 380   3   0   0   0   1   7]\n",
+      " [  0   3   5   2   0   1 400   1   0   0   0   0]\n",
+      " [  0   6   1   1   0   0   4 383   0   0   1   0]\n",
+      " [  0   7   0   0   3   8   0   0 369   8   0   1]\n",
+      " [  0   2   0   2  13   0   0   0   5 374   0   6]\n",
+      " [  0   0   0   1   7   3   0   0   1   0 398   1]\n",
+      " [  0   3   1  18   3   2   0   0   0   1   0 374]]\n",
+      "Test accuracy = 95.03%(N=4890)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-01-31 11:39:46.821173: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "Untarring speech_commands_v0.02.tar.gz...\n",
+      "2023-01-31 11:40:36.690810: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1\n",
+      "2023-01-31 11:40:36.728954: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 11:40:36.728995: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 11:40:36.749408: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublas.so.11\n",
+      "2023-01-31 11:40:36.749475: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublasLt.so.11\n",
+      "2023-01-31 11:40:36.752323: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcufft.so.10\n",
+      "2023-01-31 11:40:36.752624: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcurand.so.10\n",
+      "2023-01-31 11:40:36.753198: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusolver.so.11\n",
+      "2023-01-31 11:40:36.753937: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusparse.so.11\n",
+      "2023-01-31 11:40:36.754090: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudnn.so.8\n",
+      "2023-01-31 11:40:36.754586: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 11:40:36.754864: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2023-01-31 11:40:36.755740: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 11:40:36.756134: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 11:40:36.756197: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 11:40:37.210806: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 11:40:37.210845: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 11:40:37.210854: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 11:40:37.211393: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10994 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.\n",
+      "2023-01-31 11:40:39.812506: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.\n",
+      "2023-01-31 11:40:42.235293: I tensorflow/core/grappler/devices.cc:69] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1\n",
+      "2023-01-31 11:40:42.235385: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session\n",
+      "2023-01-31 11:40:42.236028: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 11:40:42.236295: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 11:40:42.236328: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 11:40:42.236339: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 11:40:42.236348: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 11:40:42.236662: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10994 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "2023-01-31 11:40:42.255416: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 3492140000 Hz\n",
+      "2023-01-31 11:40:42.259691: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1144] Optimization results for grappler item: graph_to_optimize\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.012ms.\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.002ms.\n",
+      "\n",
+      "2023-01-31 11:40:42.434390: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:345] Ignored output_format.\n",
+      "2023-01-31 11:40:42.434429: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:348] Ignored drop_control_dependency.\n",
+      "2023-01-31 11:40:42.441258: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:210] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n",
+      "2023-01-31 11:40:42.444349: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 11:40:42.444613: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 11:40:42.444644: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 11:40:42.444655: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 11:40:42.444662: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 11:40:42.444950: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10994 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "2023-01-31 11:40:42.484939: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)\n",
+      "fully_quantize: 0, inference_type: 6, input_inference_type: 9, output_inference_type: 9\n",
+      "Quantized model saved to ds_cnn_quantized.tflite.\n",
+      "Running TFLite evaluation on validation set...\n",
+      "[[371   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  0 333   2   4   2   3   6   5   3   1   3   9]\n",
+      " [  0   2 378   1   0   2   8   1   0   0   2   3]\n",
+      " [  0   5   1 375   2   5   2   0   2   0   1  13]\n",
+      " [  0   5   0   0 321   2   3   0   1  11   6   1]\n",
+      " [  0   2   0   7   1 354   1   1   1   2   6   2]\n",
+      " [  0   1   8   1   2   0 338   2   0   0   0   0]\n",
+      " [  0   2   0   1   1   0   0 355   0   1   1   2]\n",
+      " [  1   4   0   1   3   1   1   0 345   6   1   0]\n",
+      " [  0   1   0   1  27   0   2   1   5 330   4   2]\n",
+      " [  0   2   1   0   9   0   0   1   0   3 333   1]\n",
+      " [  0   4   0  12   3   5   1   0   1   0   6 340]]\n",
+      "Validation accuracy = 93.88%(N=4445)\n",
+      "Running TFLite evaluation on test set...\n",
+      "[[408   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  0 363   2   5   3   5   7   3   8   0   3   9]\n",
+      " [  0   2 399   3   0   0  13   0   0   1   1   0]\n",
+      " [  0   1   1 384   0   7   4   0   0   0   2   6]\n",
+      " [  0   9   0   0 398   2   1   0   1   7   6   1]\n",
+      " [  0   5   3  12   1 372   5   0   1   0   1   6]\n",
+      " [  0   4   5   2   1   0 395   1   0   0   4   0]\n",
+      " [  0   8   0   4   3   2   7 370   0   0   2   0]\n",
+      " [  0   9   0   1   6   8   0   2 361   7   2   0]\n",
+      " [  0   2   0   2  16   0   1   0   5 367   2   7]\n",
+      " [  0   0   0   0  11   3   0   3   1   2 389   2]\n",
+      " [  0   6   1  19   4   5   3   0   0   1   2 361]]\n",
+      "Test accuracy = 93.39%(N=4890)\n"
+     ]
+    }
+   ],
+   "source": [
+    "!bash ./recreate_model.sh"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Running this script will use the pre-trained checkpoint files supplied in the ```./model_archive/model_source/weights``` folder to generate the TFLite files and perform evaluation on the test set. Both an fp32 version and a quantized version will be produced. The quantized version will use post-training quantization to fully quantize it.\n",
+    "\n",
+    "If you want to run training from scratch you can do this by supplying ```--train``` when running the script. For example:\n",
+    "\n",
+    "```bash\n",
+    "bash ./recreate_model.sh --train\n",
+    "```\n",
+    "\n",
+    "Training is then performed and should produce a model to the stated accuracy in this repository. Note that exporting to TFLite will still happen with the baseline pre-trained checkpoint files, so you will need to re-run the script and this time supply the path to the new checkpoint files you want to use, for example:\n",
+    "\n",
+    "```bash\n",
+    "bash ./recreate_model.sh --ckpt <checkpoint_path>\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2.0 Training<a name=\"training\"></a>\n",
+    "\n",
+    "The training scripts can be used to recreate any of the models from the [Hello Edge paper](https://arxiv.org/pdf/1711.07128.pdf) provided the right hyperparameters are used. The training commands with all the hyperparameters to reproduce the model in this repository are given [here](recreate_model.sh). The model in this part of the repository represents just one variation of the models from the paper, other varieties are covered in other parts of the repository.\n",
+    "\n",
+    "\n",
+    "As a general example of how to train a DNN with 3 fully-connected layers with 128 neurons in each layer, run:\n",
+    "```\n",
+    "python train.py --model_architecture dnn --model_size_info 128 128 128\n",
+    "```\n",
+    "\n",
+    "The command line argument *--model_size_info* is used to pass the neural network layer\n",
+    "dimensions such as number of layers, convolution filter size/stride as a list to models.py,\n",
+    "which builds the TensorFlow graph based on the provided model architecture\n",
+    "and layer dimensions. For more info on *model_size_info* for each network architecture see\n",
+    "[models.py](model_core_utils/models.py).\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3.0 Testing<a name=\"testing\"></a>\n",
+    "To run inference on the trained model from a checkpoint and get accuracy on validation and test sets, run:\n",
+    "```\n",
+    "python evaluation.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>\n",
+    "```\n",
+    "**The model and feature extraction parameters passed to this script should match those used in the Training step.**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4.0 Optimization<a name=\"optimization\"></a>\n",
+    "\n",
+    "We introduce an *optional* step to optimize the trained keyword spotting model for deployment.\n",
+    "\n",
+    "Here we use TensorFlow's [weight clustering API](https://www.tensorflow.org/model_optimization/guide/clustering) to reduce the compressed model size and optimize inference on supported hardware. 32 weight clusters and kmeans++ cluster intialization method are used as the clustering hyperparameters.\n",
+    "\n",
+    "To optimize your trained model (e.g. a DNN), a trained model checkpoint is needed to run clustering and fine-tuning on.\n",
+    "You can use the pre-trained checkpoints provided, or train your own model and use the resulting checkpoint.\n",
+    "\n",
+    "To apply the optimization and fine-tuning, run the following command:\n",
+    "```\n",
+    "python optimisations.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>\n",
+    "```\n",
+    "**The model and feature extraction parameters used here should match those used in the Training step, except for the number of training steps.\n",
+    "The number of training steps is reduced since the optimization step only requires fine-tuning.**\n",
+    "\n",
+    "This will generate a clustered model checkpoint that can be used in the quantization step to generate a quantized and clustered TFLite model."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5.0 Quantization and TFLite Conversion<a name=\"tflite_conversion\"></a>\n",
+    "\n",
+    "You can now use TensorFlow's\n",
+    "[post training quantization](https://www.tensorflow.org/lite/performance/post_training_quantization) to\n",
+    "make quantization of the trained models super simple.\n",
+    "\n",
+    "To quantize your trained model (e.g. a DNN) run:\n",
+    "```\n",
+    "python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> [--inference_type int8|int16]\n",
+    "```\n",
+    "**The model and feature extraction parameters used here should match those used in the Training step.**\n",
+    "\n",
+    "The ```inference_type``` parameter is *optional* and to be used if a fully quantized model with inputs and outputs of type int8 or int16 is needed. It defaults to fp32.\n",
+    "\n",
+    "In this example, this step will produce a quantized TFLite file *dnn_quantized.tflite*."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can test the accuracy of this quantized model on the test set by running:\n",
+    "```\n",
+    "python evaluation.py --tflite_path dnn_quantized.tflite\n",
+    "```\n",
+    "**The model and feature extraction parameters used here should match those used in the Training step.**\n",
+    "\n",
+    "`convert_to_tflite.py` uses post-training quantization to generate a quantized model by default. If you wish to convert to a floating point TFLite model, use the command below:\n",
+    "\n",
+    "```\n",
+    "python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> --no-quantize\n",
+    "```\n",
+    "\n",
+    "This will produce a floating point TFLite file *dnn.tflite*. You can test the accuracy of this floating point model using `evaluation.py` as above.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 6.0 Single inference of the TFLite model files <a name=\"tflite_inference\"></a>\n",
+    "\n",
+    "You can conduct TFLite inference for .fp32 and .int8 model files by using the following command: \n",
+    "\n",
+    "```python ds_cnn_l_inference_tflite.py --labels validation_utils/labels.txt --wav <path_to_wav_file> --tflite_path <path_to_tflite_file>```\n",
+    "\n",
+    "**The feature extraction parameters used here should match those used in the Training step.**\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_fp32/README.md b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_fp32/README.md
new file mode 100644
index 0000000..be17ae3
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_fp32/README.md
@@ -0,0 +1,62 @@
+# keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_fp32
+
+## Description
+This is a clustered (32 clusters, kmeans++ centroid initialization) and retrained (fine-tuned) floating point fp32 version of the DS-CNN Large model developed by Arm, from the Hello Edge paper.
+
+## License
+[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
+
+## Network Information
+| Network Information | Value |
+|---------------------|-------|
+|  Framework          | TensorFlow Lite |
+|  Datatype           | fp32 |
+|  SHA-1 Hash         | d9af9829a2363c21fd6158c7bc425d0b635eb55c |
+|  Size (Bytes)       | 1652648 |
+|  Provenance         | https://arxiv.org/abs/1711.07128 |
+|  Training           | Trained by Arm |
+|  Paper | https://arxiv.org/abs/1711.07128 |
+
+## DataSet
+| Dataset Information | Value |
+|--------|-------|
+| Name | Google Speech Commands test set |
+
+## Accuracy
+
+| Metric | Value |
+|--------|-------|
+| accuracy | 94.76% |
+
+## HW Support
+| HW Support   | Value |
+|--------------|-------|
+| Cortex-A |:heavy_check_mark:         |
+| Cortex-M |:heavy_check_mark:         |
+| Mali GPU |:heavy_check_mark:         |
+| Ethos U  |:heavy_multiplication_x:          |
+
+### Key
+* :heavy_check_mark: - Will run on this platform.
+* :heavy_multiplication_x: - Will not run on this platform.
+
+## Network Quality
+| Network Quality         | Value |
+|-------------------------|-------|
+|  Recreate               | :heavy_check_mark:    |
+|  Quality level          | Deployable    |
+|  Vanilla                | :heavy_check_mark:    |
+|  Clustered              | :heavy_check_mark:    |
+|  Pruned                 | :heavy_multiplication_x:    |
+|  Quantization - default | :heavy_multiplication_x:    |
+|  Quantization - full    | :heavy_multiplication_x:    |
+
+## Network Inputs
+| Input Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| input | (1, 490) | fp32 | models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_fp32/testing_input/input | fp32 | [1, 490] | The input is a processed MFCCs |
+
+## Network Outputs
+| Output Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| Identity | (1, 12) | fp32 | models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_fp32/testing_output/Identity | fp32 | [1, 12] | The probability on 12 keywords |
\ No newline at end of file
diff --git a/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_fp32/definition.yaml b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_fp32/definition.yaml
new file mode 100644
index 0000000..77d4f8c
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_fp32/definition.yaml
@@ -0,0 +1,67 @@
+benchmark:
+  benchmark_metrics:
+    accuracy: 94.76%
+  benchmark_name: Google Speech Commands test set
+description: This is a clustered (32 clusters, kmeans++ centroid initialization)
+  and retrained (fine-tuned) fp32 version of the DS-CNN Large model developed
+  by Arm, from the Hello Edge paper.
+license:
+- Apache-2.0
+network:
+  datatype: fp32
+  file_size_bytes: 1652648
+  filename: ds_cnn_l_clustered_fp32.tflite
+  framework: TensorFlow Lite
+  hash:
+    algorithm: sha1
+    value: d9af9829a2363c21fd6158c7bc425d0b635eb55c
+  provenance: https://arxiv.org/abs/1711.07128
+  training: Trained by Arm
+network_parameters:
+  input_nodes:
+  - description: The input is a processed MFCCs of shape (1, 490)
+    example_input:
+      path: models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_fp32/testing_input/input
+      shape:
+      - 1
+      - 490
+      type: fp32
+      use_case: Random input for model regression.
+    input_datatype: fp32
+    name: input
+    shape:
+    - 1
+    - 490
+  output_nodes:
+  - description: The probability on 12 keywords.
+    example_output:
+      path: models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_fp32/testing_output/Identity
+      shape:
+      - 1
+      - 12
+      type: fp32
+      use_case: output for model regression.
+    name: Identity
+    output_datatype: fp32
+    shape:
+    - 1
+    - 12
+network_quality:
+  clustered: true
+  is_vanilla: true
+  pruned: false
+  quality_level: Deployable
+  quality_level_hero_hw: cortex_m
+  quantization_default: false
+  quantization_full: false
+  recreate: true
+operators:
+  TensorFlow Lite:
+  - AVERAGE_POOL_2D
+  - CONV_2D
+  - DEPTHWISE_CONV_2D
+  - FULLY_CONNECTED
+  - RELU
+  - RESHAPE
+  - SOFTMAX
+paper: https://arxiv.org/abs/1711.07128
\ No newline at end of file
diff --git a/models/keyword_spotting/ds_cnn_large/tflite_clustered_fp32/ds_cnn_clustered_fp32.tflite b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_fp32/ds_cnn_l_clustered_fp32.tflite
similarity index 100%
rename from models/keyword_spotting/ds_cnn_large/tflite_clustered_fp32/ds_cnn_clustered_fp32.tflite
rename to models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_fp32/ds_cnn_l_clustered_fp32.tflite
diff --git a/models/keyword_spotting/ds_cnn_large/tflite_clustered_fp32/testing_input/input/0.npy b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_fp32/testing_input/input/0.npy
similarity index 100%
rename from models/keyword_spotting/ds_cnn_large/tflite_clustered_fp32/testing_input/input/0.npy
rename to models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_fp32/testing_input/input/0.npy
diff --git a/models/keyword_spotting/ds_cnn_large/tflite_clustered_fp32/testing_output/Identity/0.npy b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_fp32/testing_output/Identity/0.npy
similarity index 100%
rename from models/keyword_spotting/ds_cnn_large/tflite_clustered_fp32/testing_output/Identity/0.npy
rename to models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_fp32/testing_output/Identity/0.npy
diff --git a/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_int8/README.md b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_int8/README.md
new file mode 100644
index 0000000..976c8c6
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_int8/README.md
@@ -0,0 +1,62 @@
+# keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_int8
+
+## Description
+This is a clustered (32 clusters, kmeans++ centroid initialization) and retrained (fine-tuned) fully quantized int8 version of the DS-CNN Large model developed by Arm, from the Hello Edge paper.
+
+## License
+[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
+
+## Network Information
+| Network Information | Value                                    |
+|---------------------|------------------------------------------|
+|  Framework          | TensorFlow Lite                          |
+|  Datatype           | int8                                     |
+|  SHA-1 Hash         | 2ee38794ed171c75d3313460a1633c5d6a79f530 |
+|  Size (Bytes)       | 503816                                   |
+|  Provenance         | https://arxiv.org/abs/1711.07128         |
+|  Training           | Trained by Arm                           |
+|  Paper | https://arxiv.org/abs/1711.07128         |
+
+## DataSet
+| Dataset Information | Value |
+|--------|-------|
+| Name | Google Speech Commands test set |
+
+## Accuracy
+
+| Metric | Value |
+|--------|-------|
+| accuracy | 93.87% |
+
+## HW Support
+| HW Support   | Value |
+|--------------|-------|
+| Cortex-A |:heavy_check_mark:         |
+| Cortex-M |:heavy_check_mark:         |
+| Mali GPU |:heavy_check_mark:         |
+| Ethos U  |:heavy_check_mark:          |
+
+### Key
+* :heavy_check_mark: - Will run on this platform.
+* :heavy_multiplication_x: - Will not run on this platform.
+
+## Network Quality
+| Network Quality         | Value |
+|-------------------------|-------|
+|  Recreate               | :heavy_check_mark:    |
+|  Quality level          | Deployable    |
+|  Vanilla                | :heavy_check_mark:    |
+|  Clustered              | :heavy_check_mark:    |
+|  Pruned                 | :heavy_multiplication_x:    |
+|  Quantization - default | :heavy_multiplication_x:    |
+|  Quantization - full    | :heavy_check_mark:    |
+
+## Network Inputs
+| Input Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| input | (1, 490) | int8 | models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_int8/testing_input/input | int8 | [1, 490] | The input is a processed MFCCs |
+
+## Network Outputs
+| Output Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| Identity | (1, 12) | int8 | models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_int8/testing_output/Identity | int8 | [1, 12] | The probability on 12 keywords |
\ No newline at end of file
diff --git a/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_int8/definition.yaml b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_int8/definition.yaml
new file mode 100644
index 0000000..a3adef5
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_int8/definition.yaml
@@ -0,0 +1,67 @@
+benchmark:
+  benchmark_metrics:
+    accuracy: 93.87%
+  benchmark_name: Google Speech Commands test set
+description: This is a clustered (32 clusters, kmeans++ centroid initialization)
+  and retrained (fine-tuned) fully quantized int8 version of the DS-CNN Large model developed
+  by Arm, from the Hello Edge paper.
+license:
+- Apache-2.0
+network:
+  datatype: int8
+  file_size_bytes: 503816
+  filename: ds_cnn_l_clustered_int8.tflite
+  framework: TensorFlow Lite
+  hash:
+    algorithm: sha1
+    value: 2ee38794ed171c75d3313460a1633c5d6a79f530
+  provenance: https://arxiv.org/abs/1711.07128
+  training: Trained by Arm
+network_parameters:
+  input_nodes:
+  - description: The input is a processed MFCCs of shape (1, 490)
+    example_input:
+      path: models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_int8/testing_input/input
+      shape:
+      - 1
+      - 490
+      type: int8
+      use_case: Random input for model regression.
+    input_datatype: int8
+    name: input
+    shape:
+    - 1
+    - 490
+  output_nodes:
+  - description: The probability on 12 keywords.
+    example_output:
+      path: models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_int8/testing_output/Identity
+      shape:
+      - 1
+      - 12
+      type: int8
+      use_case: output for model regression.
+    name: Identity
+    output_datatype: int8
+    shape:
+    - 1
+    - 12
+network_quality:
+  clustered: true
+  is_vanilla: true
+  pruned: false
+  quality_level: Deployable
+  quality_level_hero_hw: cortex_m
+  quantization_default: false
+  quantization_full: true
+  recreate: true
+operators:
+  TensorFlow Lite:
+  - AVERAGE_POOL_2D
+  - CONV_2D
+  - DEPTHWISE_CONV_2D
+  - FULLY_CONNECTED
+  - RELU
+  - RESHAPE
+  - SOFTMAX
+paper: https://arxiv.org/abs/1711.07128
\ No newline at end of file
diff --git a/models/keyword_spotting/ds_cnn_large/tflite_clustered_int8/ds_cnn_clustered_int8.tflite b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_int8/ds_cnn_l_clustered_int8.tflite
similarity index 100%
rename from models/keyword_spotting/ds_cnn_large/tflite_clustered_int8/ds_cnn_clustered_int8.tflite
rename to models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_int8/ds_cnn_l_clustered_int8.tflite
diff --git a/models/keyword_spotting/ds_cnn_large/tflite_clustered_int8/testing_input/input/0.npy b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_int8/testing_input/input/0.npy
similarity index 100%
rename from models/keyword_spotting/ds_cnn_large/tflite_clustered_int8/testing_input/input/0.npy
rename to models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_int8/testing_input/input/0.npy
diff --git a/models/keyword_spotting/ds_cnn_large/tflite_clustered_int8/testing_output/Identity/0.npy b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_int8/testing_output/Identity/0.npy
similarity index 100%
rename from models/keyword_spotting/ds_cnn_large/tflite_clustered_int8/testing_output/Identity/0.npy
rename to models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_clustered_int8/testing_output/Identity/0.npy
diff --git a/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/README.md b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/README.md
new file mode 100644
index 0000000..7647971
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/README.md
@@ -0,0 +1,62 @@
+# keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32
+
+## Description
+This is a floating point fp32 version of the DS-CNN Large model developed by Arm, from the Hello Edge paper.
+
+## License
+[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
+
+## Network Information
+| Network Information | Value |
+|---------------------|-------|
+|  Framework          | TensorFlow Lite |
+|  Datatype           | fp32 |
+|  SHA-1 Hash         | fea0e0dc13fc4207dd44904fe701f34254dd4767 |
+|  Size (Bytes)       | 1652648 |
+|  Provenance         | https://arxiv.org/abs/1711.07128 |
+|  Training           | Trained by Arm |
+|  Paper | https://arxiv.org/abs/1711.07128 |
+
+## DataSet
+| Dataset Information | Value |
+|--------|-------|
+| Name | Google Speech Commands test set |
+
+## Accuracy
+
+| Metric | Value |
+|--------|-------|
+| accuracy | 95.03% |
+
+## HW Support
+| HW Support   | Value |
+|--------------|-------|
+| Cortex-A |:heavy_check_mark:         |
+| Cortex-M |:heavy_check_mark: HERO        |
+| Mali GPU |:heavy_check_mark:         |
+| Ethos U  |:heavy_multiplication_x:          |
+
+### Key
+* :heavy_check_mark: - Will run on this platform.
+* :heavy_multiplication_x: - Will not run on this platform.
+
+## Network Quality
+| Network Quality         | Value |
+|-------------------------|-------|
+|  Recreate               | :heavy_check_mark:    |
+|  Quality level          | Hero    |
+|  Vanilla                | :heavy_check_mark:    |
+|  Clustered              | :heavy_multiplication_x:    |
+|  Pruned                 | :heavy_multiplication_x:    |
+|  Quantization - default | :heavy_multiplication_x:    |
+|  Quantization - full    | :heavy_multiplication_x:    |
+
+## Network Inputs
+| Input Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| input | (1, 490) | int8 | models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input | fp32 | [1, 490] | The input is a processed MFCCs |
+
+## Network Outputs
+| Output Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| Identity | (1, 12) | int8 | models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity | fp32 | [1, 12] | The probability on 12 keywords |
\ No newline at end of file
diff --git a/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/definition.yaml b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/definition.yaml
new file mode 100644
index 0000000..288d185
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/definition.yaml
@@ -0,0 +1,66 @@
+benchmark:
+  benchmark_metrics:
+    accuracy: 95.03%
+  benchmark_name: Google Speech Commands test set
+description: This is a floating point fp32 version of the DS-CNN Large model developed
+  by Arm, from the Hello Edge paper.
+license:
+- Apache-2.0
+network:
+  datatype: fp32
+  file_size_bytes: 1652648
+  filename: ds_cnn_l.tflite
+  framework: TensorFlow Lite
+  hash:
+    algorithm: sha1
+    value: fea0e0dc13fc4207dd44904fe701f34254dd4767
+  provenance: https://arxiv.org/abs/1711.07128
+  training: Trained by Arm
+network_parameters:
+  input_nodes:
+  - description: The input is a processed MFCCs of shape (1, 490)
+    example_input:
+      path: models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input
+      shape:
+      - 1
+      - 490
+      type: fp32
+      use_case: Random input for model regression.
+    input_datatype: fp32
+    name: input
+    shape:
+    - 1
+    - 490
+  output_nodes:
+  - description: The probability on 12 keywords.
+    example_output:
+      path: models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity
+      shape:
+      - 1
+      - 12
+      type: fp32
+      use_case: output for model regression.
+    name: Identity
+    output_datatype: fp32
+    shape:
+    - 1
+    - 12
+network_quality:
+  clustered: false
+  is_vanilla: true
+  pruned: false
+  quality_level: Deployable
+  quality_level_hero_hw: cortex_m
+  quantization_default: false
+  quantization_full: false
+  recreate: true
+operators:
+  TensorFlow Lite:
+  - AVERAGE_POOL_2D
+  - CONV_2D
+  - DEPTHWISE_CONV_2D
+  - FULLY_CONNECTED
+  - RELU
+  - RESHAPE
+  - SOFTMAX
+paper: https://arxiv.org/abs/1711.07128
\ No newline at end of file
diff --git a/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/ds_cnn_l.tflite b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/ds_cnn_l.tflite
new file mode 100644
index 0000000..6619422
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/ds_cnn_l.tflite
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:703bedd0f14360a47ac870a51b13dfde965e4be4d901ee8c6b87bd2f3360671b
+size 1652648
diff --git a/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input/0.npy b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input/0.npy
new file mode 100644
index 0000000..8886270
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input/0.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:add2d479791b5e4aa5e4bfd8f16cf47f965783aff20845a8283fa7e571cabd50
+size 2088
diff --git a/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity/0.npy b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity/0.npy
new file mode 100644
index 0000000..5b8a6d6
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity/0.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ffd3d5e8b2601d820fd4b4c786d5f475075848f6f9636a5d62a7c38f30d2cc0
+size 176
diff --git a/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_int8/README.md b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_int8/README.md
new file mode 100644
index 0000000..7f813ed
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_int8/README.md
@@ -0,0 +1,62 @@
+# keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_int8
+
+## Description
+This is a fully quantized int8 version of the DS-CNN Large model developed by Arm, from the Hello Edge paper.
+
+## License
+[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
+
+## Network Information
+| Network Information | Value |
+|---------------------|-------|
+|  Framework          | TensorFlow Lite |
+|  Datatype           | int8 |
+|  SHA-1 Hash         | 504f8e7bfa5c0f15c5475e5d08637b3b8aad0972 |
+|  Size (Bytes)       | 503816 |
+|  Provenance         | https://arxiv.org/abs/1711.07128 |
+|  Training           | Trained by Arm |
+|  Paper | https://arxiv.org/abs/1711.07128 |
+
+## DataSet
+| Dataset Information | Value |
+|--------|-------|
+| Name | Google Speech Commands test set |
+
+## Accuracy
+
+| Metric | Value |
+|--------|-------|
+| Accuracy | 94.52% |
+
+## HW Support
+| HW Support   | Value |
+|--------------|-------|
+| Cortex-A |:heavy_check_mark:          |
+| Cortex-M |:heavy_check_mark: HERO         |
+| Mali GPU |:heavy_check_mark:          |
+| Ethos U  |:heavy_check_mark:          |
+
+### Key
+* :heavy_check_mark: - Will run on this platform.
+* :heavy_multiplication_x: - Will not run on this platform.
+
+## Network Quality
+| Network Quality         | Value |
+|-------------------------|-------|
+|  Recreate               | :heavy_check_mark:    |
+|  Quality level          | Hero    |
+|  Vanilla                | :heavy_check_mark:    |
+|  Clustered              | :heavy_multiplication_x:    |
+|  Pruned                 | :heavy_multiplication_x:    |
+|  Quantization - default | :heavy_multiplication_x:    |
+|  Quantization - full    | :heavy_check_mark:    |
+
+## Network Inputs
+| Input Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| input | (1, 490) | int8 | models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input | int8 | [1, 490] | The input is a processed MFCCs |
+
+## Network Outputs
+| Output Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| Identity | (1, 12) | int8 | models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity | int8 | [1, 12] | The probability on 12 keywords |
\ No newline at end of file
diff --git a/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_int8/definition.yaml b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_int8/definition.yaml
new file mode 100644
index 0000000..6a2b864
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_int8/definition.yaml
@@ -0,0 +1,66 @@
+benchmark:
+  benchmark_metrics:
+    Accuracy: 94.52%
+  benchmark_name: Google Speech Commands test set
+description: This is a fully quantized int8 version of the DS-CNN Large model developed
+  by Arm, from the Hello Edge paper.
+license:
+- Apache-2.0
+network:
+  datatype: int8
+  file_size_bytes: 503816
+  filename: ds_cnn_l_quantized.tflite
+  framework: TensorFlow Lite
+  hash:
+    algorithm: sha1
+    value: 504f8e7bfa5c0f15c5475e5d08637b3b8aad0972
+  provenance: https://arxiv.org/abs/1711.07128
+  training: Trained by Arm
+network_parameters:
+  input_nodes:
+  - description: The input is a processed MFCCs of shape (1, 490)
+    example_input:
+      path: models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input
+      shape:
+      - 1
+      - 490
+      type: int8
+      use_case: Random input for model regression.
+    input_datatype: int8
+    name: input
+    shape:
+    - 1
+    - 490
+  output_nodes:
+  - description: The probability on 12 keywords.
+    example_output:
+      path: models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity
+      shape:
+      - 1
+      - 12
+      type: int8
+      use_case: output for model regression.
+    name: Identity
+    output_datatype: int8
+    shape:
+    - 1
+    - 12
+network_quality:
+  clustered: false
+  is_vanilla: true
+  pruned: false
+  quality_level: Deployable
+  quality_level_hero_hw: cortex_m
+  quantization_default: false
+  quantization_full: true
+  recreate: true
+operators:
+  TensorFlow Lite:
+  - AVERAGE_POOL_2D
+  - CONV_2D
+  - DEPTHWISE_CONV_2D
+  - FULLY_CONNECTED
+  - RELU
+  - RESHAPE
+  - SOFTMAX
+paper: https://arxiv.org/abs/1711.07128
\ No newline at end of file
diff --git a/models/keyword_spotting/ds_cnn_large/tflite_int8/ds_cnn_l_quantized.tflite b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_int8/ds_cnn_l_quantized.tflite
similarity index 100%
rename from models/keyword_spotting/ds_cnn_large/tflite_int8/ds_cnn_l_quantized.tflite
rename to models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_int8/ds_cnn_l_quantized.tflite
diff --git a/models/keyword_spotting/ds_cnn_large/tflite_int8/testing_input/input/0.npy b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input/0.npy
similarity index 100%
rename from models/keyword_spotting/ds_cnn_large/tflite_int8/testing_input/input/0.npy
rename to models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input/0.npy
diff --git a/models/keyword_spotting/ds_cnn_large/tflite_int8/testing_output/Identity/0.npy b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity/0.npy
similarity index 100%
rename from models/keyword_spotting/ds_cnn_large/tflite_int8/testing_output/Identity/0.npy
rename to models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity/0.npy
diff --git a/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/model_source/saved_model/ds_cnn_large/keras_metadata.pb b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/model_source/saved_model/ds_cnn_large/keras_metadata.pb
new file mode 100644
index 0000000..454265f
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/model_source/saved_model/ds_cnn_large/keras_metadata.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb5e42915f74efe437002d09ef323928da8efdc68b403118711d05871534690e
+size 78436
diff --git a/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/model_source/saved_model/ds_cnn_large/saved_model.pb b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/model_source/saved_model/ds_cnn_large/saved_model.pb
new file mode 100644
index 0000000..95b9f8f
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/model_source/saved_model/ds_cnn_large/saved_model.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ac426a86f4d862a0055c945b92ecb0e8f3de3ea90542b2731764b67c2e9ae3f3
+size 859950
diff --git a/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/model_source/saved_model/ds_cnn_large/variables/variables.data-00000-of-00001 b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/model_source/saved_model/ds_cnn_large/variables/variables.data-00000-of-00001
new file mode 100644
index 0000000..77a395d
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/model_source/saved_model/ds_cnn_large/variables/variables.data-00000-of-00001
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:efd31c705f2114c88f89660862742beb82a0bea80efd245969076e5339bccdf4
+size 1713786
diff --git a/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/model_source/saved_model/ds_cnn_large/variables/variables.index b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/model_source/saved_model/ds_cnn_large/variables/variables.index
new file mode 100644
index 0000000..7493cc8
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/model_source/saved_model/ds_cnn_large/variables/variables.index
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f30be633de16e745ef0a11a3842ad8dbc70d8ead948acf049a613aff0c64cd3d
+size 4397
diff --git a/models/keyword_spotting/ds_cnn_large/tflite_int8/ckpt/checkpoint b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/model_source/weights/checkpoint
similarity index 100%
rename from models/keyword_spotting/ds_cnn_large/tflite_int8/ckpt/checkpoint
rename to models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/model_source/weights/checkpoint
diff --git a/models/keyword_spotting/ds_cnn_large/tflite_int8/ckpt/ds_cnn_0.95_ckpt.data-00000-of-00001 b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/model_source/weights/ds_cnn_0.95_ckpt.data-00000-of-00001
similarity index 100%
rename from models/keyword_spotting/ds_cnn_large/tflite_int8/ckpt/ds_cnn_0.95_ckpt.data-00000-of-00001
rename to models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/model_source/weights/ds_cnn_0.95_ckpt.data-00000-of-00001
diff --git a/models/keyword_spotting/ds_cnn_large/tflite_int8/ckpt/ds_cnn_0.95_ckpt.index b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/model_source/weights/ds_cnn_0.95_ckpt.index
similarity index 100%
rename from models/keyword_spotting/ds_cnn_large/tflite_int8/ckpt/ds_cnn_0.95_ckpt.index
rename to models/keyword_spotting/ds_cnn_large/model_package_tf/model_archive/model_source/weights/ds_cnn_0.95_ckpt.index
diff --git a/models/keyword_spotting/ds_cnn_large/model_package_tf/model_core_utils/__init__.py b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_core_utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/models/keyword_spotting/ds_cnn_large/model_package_tf/model_core_utils/models.py b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_core_utils/models.py
new file mode 100644
index 0000000..1978136
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_large/model_package_tf/model_core_utils/models.py
@@ -0,0 +1,327 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Model definitions for simple keyword spotting."""
+
+import math
+
+import tensorflow as tf
+
+
+def prepare_model_settings(label_count, sample_rate, clip_duration_ms,
+                           window_size_ms, window_stride_ms,
+                           dct_coefficient_count):
+    """Calculates common settings needed for all models.
+
+    Args:
+        label_count: How many classes are to be recognized.
+        sample_rate: Number of audio samples per second.
+        clip_duration_ms: Length of each audio clip to be analyzed.
+        window_size_ms: Duration of frequency analysis window.
+        window_stride_ms: How far to move in time between frequency windows.
+        dct_coefficient_count: Number of frequency bins to use for analysis.
+
+    Returns:
+        Dictionary containing common settings.
+    """
+    desired_samples = int(sample_rate * clip_duration_ms / 1000)
+    window_size_samples = int(sample_rate * window_size_ms / 1000)
+    window_stride_samples = int(sample_rate * window_stride_ms / 1000)
+    length_minus_window = (desired_samples - window_size_samples)
+    if length_minus_window < 0:
+        spectrogram_length = 0
+    else:
+        spectrogram_length = 1 + int(length_minus_window / window_stride_samples)
+    fingerprint_size = dct_coefficient_count * spectrogram_length
+
+    return {
+        'desired_samples': desired_samples,
+        'window_size_samples': window_size_samples,
+        'window_stride_samples': window_stride_samples,
+        'spectrogram_length': spectrogram_length,
+        'dct_coefficient_count': dct_coefficient_count,
+        'fingerprint_size': fingerprint_size,
+        'label_count': label_count,
+        'sample_rate': sample_rate,
+    }
+
+
+def create_model(model_settings, model_architecture, model_size_info, is_training):
+    """Builds a tf.keras model of the requested architecture compatible with the settings.
+
+    Args:
+        model_settings: Dictionary of information about the model.
+        model_architecture: String specifying which kind of model to create.
+        model_size_info: Array with specific information for the chosen architecture
+            (e.g convolutional parameters, number of layers).
+
+    Returns:
+        A tf.keras Model with the requested architecture.
+
+    Raises:
+        Exception: If the architecture type isn't recognized.
+    """
+
+    if model_architecture == 'dnn':
+        return create_dnn_model(model_settings, model_size_info)
+
+    elif model_architecture == 'cnn':
+        return create_cnn_model(model_settings, model_size_info)
+
+    elif model_architecture == 'ds_cnn':
+        return create_ds_cnn_model(model_settings, model_size_info)
+    elif model_architecture == 'single_fc':
+        return create_single_fc_model(model_settings)
+    elif model_architecture == 'basic_lstm':
+        return create_basic_lstm_model(model_settings, model_size_info, is_training)
+    else:
+        raise Exception(f'model_architecture argument {model_architecture} not recognized'
+                        f', should be one of, "dnn", "cnn", "ds_cnn" ')
+
+
+def create_single_fc_model(model_settings):
+    """Builds a model with a single fully-connected layer.
+
+    For details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+
+    Returns:
+        tf.keras Model of the 'SINGLE_FC' architecture.
+    """
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size'],), name='input')
+    # Fully connected layer
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(inputs)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_basic_lstm_model(model_settings, model_size_info, is_training):
+    """Builds a model with a basic lstm layer.
+
+        For details see https://arxiv.org/abs/1711.07128.
+
+        Args:
+            model_settings: Dict of different settings for model training.
+            model_size_info: Length of the array defines the number of hidden-layers and
+                each element in the array represent the number of neurons in that layer.
+            is_training: Determining whether the use of the model is for training or for something else.
+
+        Returns:
+            tf.keras Model of the 'Basic_LSTM' architecture.
+        """
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size'], ), name='input')
+
+    input_frequency_size = model_settings['dct_coefficient_count']
+    input_time_size = model_settings['spectrogram_length']
+    x = tf.reshape(inputs, shape=(-1, input_time_size, input_frequency_size))
+
+    # LSTM layer, and unrolling depending on whether you are training or not
+    if is_training:
+        x = tf.keras.layers.LSTM(units=model_size_info[0], time_major=False, unroll=False)(x)
+    else:
+        x = tf.keras.layers.LSTM(units=model_size_info[0], time_major=False, unroll=True)(x)
+
+    # Outputs a fully connected layer
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_dnn_model(model_settings, model_size_info):
+    """Builds a model with multiple hidden fully-connected layers.
+
+    For details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+        model_size_info: Length of the array defines the number of hidden-layers and
+            each element in the array represent the number of neurons in that layer.
+
+    Returns:
+        tf.keras Model of the 'DNN' architecture.
+    """
+
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size'], ), name='input')
+
+    # First fully connected layer.
+    x = tf.keras.layers.Dense(units=model_size_info[0], activation='relu')(inputs)
+
+    # Hidden layers with ReLU activations.
+    for i in range(1, len(model_size_info)):
+        x = tf.keras.layers.Dense(units=model_size_info[i], activation='relu')(x)
+
+    # Output fully connected layer.
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_cnn_model(model_settings, model_size_info):
+    """Builds a model with 2 convolution layers followed by a linear layer and a hidden fully-connected layer.
+
+    For details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+        model_size_info: Defines the first and second convolution parameters in
+            {number of conv features, conv filter height, width, stride in y,x dir.},
+            followed by linear layer size and fully-connected layer size.
+
+    Returns:
+        tf.keras Model of the 'CNN' architecture.
+    """
+
+    input_frequency_size = model_settings['dct_coefficient_count']
+    input_time_size = model_settings['spectrogram_length']
+
+    first_filter_count = model_size_info[0]
+    first_filter_height = model_size_info[1]  # Time axis.
+    first_filter_width = model_size_info[2]  # Frequency axis.
+    first_filter_stride_y = model_size_info[3]  # Time axis.
+    first_filter_stride_x = model_size_info[4]  # Frequency_axis.
+
+    second_filter_count = model_size_info[5]
+    second_filter_height = model_size_info[6]  # Time axis.
+    second_filter_width = model_size_info[7]  # Frequency axis.
+    second_filter_stride_y = model_size_info[8]  # Time axis.
+    second_filter_stride_x = model_size_info[9]  # Frequency axis.
+
+    linear_layer_size = model_size_info[10]
+    fc_size = model_size_info[11]
+
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size']), name='input')
+
+    # Reshape the flattened input.
+    x = tf.reshape(inputs, shape=(-1, input_time_size, input_frequency_size, 1))
+
+    # First convolution.
+    x = tf.keras.layers.Conv2D(filters=first_filter_count,
+                               kernel_size=(first_filter_height, first_filter_width),
+                               strides=(first_filter_stride_y, first_filter_stride_x),
+                               padding='VALID')(x)
+    x = tf.keras.layers.BatchNormalization()(x)
+    x = tf.keras.layers.ReLU()(x)
+    x = tf.keras.layers.Dropout(rate=0)(x)
+
+    # Second convolution.
+    x = tf.keras.layers.Conv2D(filters=second_filter_count,
+                               kernel_size=(second_filter_height, second_filter_width),
+                               strides=(second_filter_stride_y, second_filter_stride_x),
+                               padding='VALID')(x)
+    x = tf.keras.layers.BatchNormalization()(x)
+    x = tf.keras.layers.ReLU()(x)
+    x = tf.keras.layers.Dropout(rate=0)(x)
+
+    # Flatten for fully connected layers.
+    x = tf.keras.layers.Flatten()(x)
+
+    # Fully connected layer with no activation.
+    x = tf.keras.layers.Dense(units=linear_layer_size)(x)
+
+    # Fully connected layer with ReLU activation.
+    x = tf.keras.layers.Dense(units=fc_size)(x)
+    x = tf.keras.layers.BatchNormalization()(x)
+    x = tf.keras.layers.ReLU()(x)
+    x = tf.keras.layers.Dropout(rate=0)(x)
+
+    # Output fully connected.
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_ds_cnn_model(model_settings, model_size_info):
+    """Builds a model with convolutional & depthwise separable convolutional layers.
+
+    For more details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+        model_size_info: Defines number of layers, followed by the DS-Conv layer
+            parameters in the order {number of conv features, conv filter height,
+            width and stride in y,x dir.} for each of the layers.
+
+    Returns:
+        tf.keras Model of the 'DS-CNN' architecture.
+    """
+
+    label_count = model_settings['label_count']
+    input_frequency_size = model_settings['dct_coefficient_count']
+    input_time_size = model_settings['spectrogram_length']
+
+    t_dim = input_time_size
+    f_dim = input_frequency_size
+
+    # Extract model dimensions from model_size_info.
+    num_layers = model_size_info[0]
+    conv_feat = [None]*num_layers
+    conv_kt = [None]*num_layers
+    conv_kf = [None]*num_layers
+    conv_st = [None]*num_layers
+    conv_sf = [None]*num_layers
+
+    i = 1
+    for layer_no in range(0, num_layers):
+        conv_feat[layer_no] = model_size_info[i]
+        i += 1
+        conv_kt[layer_no] = model_size_info[i]
+        i += 1
+        conv_kf[layer_no] = model_size_info[i]
+        i += 1
+        conv_st[layer_no] = model_size_info[i]
+        i += 1
+        conv_sf[layer_no] = model_size_info[i]
+        i += 1
+
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size']), name='input')
+
+    # Reshape the flattened input.
+    x = tf.reshape(inputs, shape=(-1, input_time_size, input_frequency_size, 1))
+
+    # Depthwise separable convolutions.
+    for layer_no in range(0, num_layers):
+        if layer_no == 0:
+            # First convolution.
+            x = tf.keras.layers.Conv2D(filters=conv_feat[0],
+                                       kernel_size=(conv_kt[0], conv_kf[0]),
+                                       strides=(conv_st[0], conv_sf[0]),
+                                       padding='SAME')(x)
+            x = tf.keras.layers.BatchNormalization()(x)
+            x = tf.keras.layers.ReLU()(x)
+        else:
+            # Depthwise convolution.
+            x = tf.keras.layers.DepthwiseConv2D(kernel_size=(conv_kt[layer_no], conv_kf[layer_no]),
+                                                strides=(conv_sf[layer_no], conv_st[layer_no]),
+                                                padding='SAME')(x)
+            x = tf.keras.layers.BatchNormalization()(x)
+            x = tf.keras.layers.ReLU()(x)
+
+            # Pointwise convolution.
+            x = tf.keras.layers.Conv2D(filters=conv_feat[layer_no], kernel_size=(1, 1))(x)
+            x = tf.keras.layers.BatchNormalization()(x)
+            x = tf.keras.layers.ReLU()(x)
+
+        t_dim = math.ceil(t_dim/float(conv_st[layer_no]))
+        f_dim = math.ceil(f_dim/float(conv_sf[layer_no]))
+
+    # Global average pool.
+    x = tf.keras.layers.AveragePooling2D(pool_size=(t_dim, f_dim), strides=1)(x)
+
+    # Squeeze before passing to output fully connected layer.
+    x = tf.reshape(x, shape=(-1, conv_feat[layer_no]))
+
+    # Output connected layer.
+    output = tf.keras.layers.Dense(units=label_count, activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
diff --git a/models/keyword_spotting/ds_cnn_large/model_package_tf/optimisations.py b/models/keyword_spotting/ds_cnn_large/model_package_tf/optimisations.py
new file mode 100644
index 0000000..16b6f4c
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_large/model_package_tf/optimisations.py
@@ -0,0 +1,259 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for optimizing simple keyword spotting models using clustering API."""
+
+import argparse
+from pathlib import Path
+
+import tensorflow as tf
+import numpy as np
+import tensorflow_model_optimization as tfmot
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+
+
+def print_model_weight_clusters(model):
+
+    for layer in model.layers:
+        if isinstance(layer, tf.keras.layers.Wrapper):
+            weights = layer.trainable_weights
+        else:
+            weights = layer.weights
+        for weight in weights:
+            if "kernel" in weight.name:
+                unique_count = len(np.unique(weight))
+                print(
+                    f"{layer.name}/{weight.name}: {unique_count} clusters "
+                )
+
+
+def optimize():
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    # Create the model to optimize from checkpoint.
+    model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info)
+    model.load_weights(FLAGS.checkpoint).expect_partial()
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    # We decay learning rate in a constant piecewise way to help learning.
+    training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(',')))
+    learning_rates_list = list(map(float, FLAGS.learning_rate.split(',')))
+    lr_boundary_list = training_steps_list[:-1]  # Only need the values at which to change lr.
+    lr_schedule = tf.keras.optimizers.schedules.PiecewiseConstantDecay(boundaries=lr_boundary_list,
+                                                                       values=learning_rates_list)
+
+    cluster_weights = tfmot.clustering.keras.cluster_weights
+    CentroidInitialization = tfmot.clustering.keras.CentroidInitialization
+
+    clustering_params = {
+        'number_of_clusters': 32,
+        'cluster_centroids_init': CentroidInitialization.KMEANS_PLUS_PLUS}
+
+    clustered_model = cluster_weights(model, **clustering_params)
+
+    # Specify the optimizer configurations.
+    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
+    clustered_model.compile(optimizer=optimizer,
+                            loss=tf.keras.losses.SparseCategoricalCrossentropy(),
+                            metrics=['accuracy'])
+
+    train_data = audio_processor.get_data(audio_processor.Modes.TRAINING,
+                                          FLAGS.background_frequency, FLAGS.background_volume,
+                                          int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000))
+    train_data = train_data.repeat().batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION)
+    val_data = val_data.batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+
+    # We train for a max number of iterations so need to calculate how many 'epochs' this will be.
+    training_steps_max = np.sum(training_steps_list)
+    training_epoch_max = int(np.ceil(training_steps_max / FLAGS.eval_step_interval))
+
+    # Train the model with clustering applied.
+    clustered_model.fit(x=train_data,
+                        steps_per_epoch=FLAGS.eval_step_interval,
+                        epochs=training_epoch_max,
+                        validation_data=val_data)
+
+    stripped_clustered_model = tfmot.clustering.keras.strip_clustering(clustered_model)
+
+    print_model_weight_clusters(stripped_clustered_model)
+
+    # Save the clustered model weights
+    train_dir = Path(FLAGS.train_dir) / "optimized"
+    train_dir.mkdir(parents=True, exist_ok=True)
+
+    stripped_clustered_model.save_weights((train_dir /
+                                          (FLAGS.model_architecture +
+                                           "_clustered_ckpt")))
+
+    # Test the model.
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING)
+    test_data = test_data.batch(FLAGS.batch_size)
+
+    stripped_clustered_model.compile(optimizer=optimizer,
+                                     loss=tf.keras.losses.SparseCategoricalCrossentropy(),
+                                     metrics=['accuracy'])
+
+    test_loss, test_acc = stripped_clustered_model.evaluate(x=test_data)
+    print(f'Final test accuracy: {test_acc*100:.2f}%')
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--background_volume',
+        type=float,
+        default=0.1,
+        help="""\
+        How loud the background noise should be, between 0 and 1.
+        """)
+    parser.add_argument(
+        '--background_frequency',
+        type=float,
+        default=0.8,
+        help="""\
+        How many of the training samples have background noise mixed in.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--time_shift_ms',
+        type=float,
+        default=100.0,
+        help="""\
+        Range to randomly shift the training audio by in time.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--how_many_training_steps',
+        type=str,
+        default='3750,750',
+        help='How many training loops to run',)
+    parser.add_argument(
+        '--eval_step_interval',
+        type=int,
+        default=400,
+        help='How often to evaluate the training results.')
+    parser.add_argument(
+        '--learning_rate',
+        type=str,
+        default='0.001,0.0001',
+        help='How large a learning rate to use when training.')
+    parser.add_argument(
+        '--batch_size',
+        type=int,
+        default=100,
+        help='How many items to train with at once',)
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--train_dir',
+        type=str,
+        default='/tmp/speech_commands_train',
+        help='Directory to write event logs and checkpoint.')
+    parser.add_argument(
+        '--save_step_interval',
+        type=int,
+        default=100,
+        help='Save model checkpoint every save_steps.')
+    parser.add_argument(
+        '--checkpoint',
+        type=str,
+        help='Checkpoint to load the weights from before fine-tuning.')
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+
+    FLAGS, _ = parser.parse_known_args()
+    optimize()
diff --git a/models/keyword_spotting/ds_cnn_large/model_package_tf/recreate_model.sh b/models/keyword_spotting/ds_cnn_large/model_package_tf/recreate_model.sh
new file mode 100644
index 0000000..fabe86c
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_large/model_package_tf/recreate_model.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+# Copyright (C) 2023 Arm Limited or its affiliates. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+ckpt_path=model_archive/model_source/weights/ds_cnn_0.95_ckpt
+train=false
+
+# Parse command line args
+while (( $# >= 1 )); do 
+    case $1 in
+    --ckpt)
+       if [ "$2" ]; then
+           ckpt_path=$2
+           shift
+       else
+           printf 'ERROR: "--ckpt" requires a path to be supplied.\n'
+           exit 1
+       fi
+       ;;
+    --train) 
+    	train=true
+	break;;
+    *) shift;
+    esac;
+done
+
+
+# DS-CNN Large training
+if [ "$train" = true ]
+then
+python train.py --model_architecture ds_cnn --model_size_info 6 276 10 4 2 1 276 3 3 2 2 276 3 3 1 1 276 3 3 1 1 276 3 3 1 1 276 3 3 1 1 --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --learning_rate 0.0005,0.0001,0.00002 --how_many_training_steps 10000,10000,10000 --summaries_dir work/DS_CNN/DS_CNN_L/retrain_logs --train_dir work/DS_CNN/DS_CNN_L/training
+fi
+
+# Conversion to TFLite fp32
+python convert_to_tflite.py --model_architecture ds_cnn --model_size_info 6 276 10 4 2 1 276 3 3 2 2 276 3 3 1 1 276 3 3 1 1 276 3 3 1 1 276 3 3 1 1 --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --checkpoint $ckpt_path --no-quantize
+
+# Conversion to TFLite int8
+python convert_to_tflite.py --model_architecture ds_cnn --model_size_info 6 276 10 4 2 1 276 3 3 2 2 276 3 3 1 1 276 3 3 1 1 276 3 3 1 1 276 3 3 1 1 --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --checkpoint $ckpt_path --inference_type int8
+
diff --git a/models/keyword_spotting/ds_cnn_large/model_package_tf/requirements.txt b/models/keyword_spotting/ds_cnn_large/model_package_tf/requirements.txt
new file mode 100644
index 0000000..3448cff
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_large/model_package_tf/requirements.txt
@@ -0,0 +1,3 @@
+numpy == 1.19.5
+tensorflow == 2.5.0
+tensorflow-model-optimization == 0.6.0
\ No newline at end of file
diff --git a/models/keyword_spotting/ds_cnn_large/model_package_tf/train.py b/models/keyword_spotting/ds_cnn_large/model_package_tf/train.py
new file mode 100644
index 0000000..8c488b3
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_large/model_package_tf/train.py
@@ -0,0 +1,227 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for training simple keyword spotting models."""
+
+import argparse
+from pathlib import Path
+
+import tensorflow as tf
+import numpy as np
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+
+
+def train():
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    # Create the model.
+    model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info, True)
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    # We decay learning rate in a constant piecewise way to help learning.
+    training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(',')))
+    learning_rates_list = list(map(float, FLAGS.learning_rate.split(',')))
+    lr_boundary_list = training_steps_list[:-1]  # Only need the values at which to change lr.
+    lr_schedule = tf.keras.optimizers.schedules.PiecewiseConstantDecay(boundaries=lr_boundary_list,
+                                                                       values=learning_rates_list)
+
+    # Specify the optimizer configurations.
+    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
+    model.compile(optimizer=optimizer,
+                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
+                  metrics=['accuracy'])
+
+    train_data = audio_processor.get_data(audio_processor.Modes.TRAINING,
+                                          FLAGS.background_frequency, FLAGS.background_volume,
+                                          int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000))
+    train_data = train_data.repeat().batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION)
+    val_data = val_data.batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+
+    # We train for a max number of iterations so need to calculate how many 'epochs' this will be.
+    training_steps_max = np.sum(training_steps_list)
+    training_epoch_max = int(np.ceil(training_steps_max / FLAGS.eval_step_interval))
+
+    # Callbacks.
+    train_dir = Path(FLAGS.train_dir) / "best"
+    train_dir.mkdir(parents=True, exist_ok=True)
+    model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
+        filepath=(train_dir / (FLAGS.model_architecture + "_{val_accuracy:.3f}_ckpt")),
+        save_weights_only=True,
+        monitor='val_accuracy',
+        mode='max',
+        save_best_only=True)
+    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=FLAGS.summaries_dir)
+
+    # Train the model.
+    model.fit(x=train_data,
+              steps_per_epoch=FLAGS.eval_step_interval,
+              epochs=training_epoch_max,
+              validation_data=val_data,
+              callbacks=[model_checkpoint_callback, tensorboard_callback])
+
+    # Test and save the model.
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING)
+    test_data = test_data.batch(FLAGS.batch_size)
+
+    test_loss, test_acc = model.evaluate(x=test_data)
+    print(f'Final test accuracy: {test_acc*100:.2f}%')
+    model.save(f'saved_model/{FLAGS.model_architecture}')
+    model.save(f'keras/{FLAGS.model_architecture}.h5')
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--background_volume',
+        type=float,
+        default=0.1,
+        help="""\
+        How loud the background noise should be, between 0 and 1.
+        """)
+    parser.add_argument(
+        '--background_frequency',
+        type=float,
+        default=0.8,
+        help="""\
+        How many of the training samples have background noise mixed in.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--time_shift_ms',
+        type=float,
+        default=100.0,
+        help="""\
+        Range to randomly shift the training audio by in time.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--how_many_training_steps',
+        type=str,
+        default='15000,3000',
+        help='How many training loops to run',)
+    parser.add_argument(
+        '--eval_step_interval',
+        type=int,
+        default=400,
+        help='How often to evaluate the training results.')
+    parser.add_argument(
+        '--learning_rate',
+        type=str,
+        default='0.001,0.0001',
+        help='How large a learning rate to use when training.')
+    parser.add_argument(
+        '--batch_size',
+        type=int,
+        default=100,
+        help='How many items to train with at once',)
+    parser.add_argument(
+        '--summaries_dir',
+        type=str,
+        default='/tmp/retrain_logs',
+        help='Where to save summary logs for TensorBoard.')
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--train_dir',
+        type=str,
+        default='/tmp/speech_commands_train',
+        help='Directory to write event logs and checkpoint.')
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+
+    FLAGS, _ = parser.parse_known_args()
+    train()
diff --git a/models/keyword_spotting/ds_cnn_large/model_package_tf/validation_utils/labels.txt b/models/keyword_spotting/ds_cnn_large/model_package_tf/validation_utils/labels.txt
new file mode 100644
index 0000000..ba41645
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_large/model_package_tf/validation_utils/labels.txt
@@ -0,0 +1,12 @@
+_silence_
+_unknown_
+yes
+no
+up
+down
+left
+right
+on
+off
+stop
+go
\ No newline at end of file
diff --git a/models/keyword_spotting/ds_cnn_large/tflite_clustered_fp32/README.md b/models/keyword_spotting/ds_cnn_large/tflite_clustered_fp32/README.md
deleted file mode 100644
index 0643dd8..0000000
--- a/models/keyword_spotting/ds_cnn_large/tflite_clustered_fp32/README.md
+++ /dev/null
@@ -1,76 +0,0 @@
-# DS-CNN Clustered FP32
-
-## Description
-This is a clustered (32 clusters, kmeans++ centroid initialization) and retrained (fine-tuned) FP32 version of the DS-CNN Large model developed by Arm from the Hello Edge paper. Code for the original DS-CNN implementation can be found here: https://github.com/ARM-software/ML-KWS-for-MCU. The original model was converted to Keras and optimized using the Clustering API in TensorFlow Model Optimization Toolkit.
-
-## License
-[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
-
-## Related Materials
-### Class Labels
-The class labels associated with this model can be downloaded by running the script `get_class_labels.sh`.
-
-### Model Recreation Code
-Code to recreate this model can be found here: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m.
-
-## Network Information
-| Network Information |  Value         |
-|---------------------|----------------|
-|  Framework          | TensorFlow Lite |
-|  SHA-1 Hash         | d9af9829a2363c21fd6158c7bc425d0b635eb55c |
-|  Size (Bytes)       | 1652648 |
-|  Provenance         | The original model (before clustering and quantization) is a pretrained checkpoint based on https://github.com/ARM-software/ML-KWS-for-MCU |
-|  Paper              | https://arxiv.org/abs/1711.07128 |
-
-## Performance
-| Platform | Optimized |
-|----------|:---------:|
-| Cortex-A |:heavy_check_mark:         |
-| Cortex-M |:heavy_multiplication_x:         |
-| Mali GPU |:heavy_check_mark:         |
-| Ethos U  |:heavy_multiplication_x:         |
-
-### Key
-* :heavy_check_mark: - Will run on this platform.
-* :heavy_multiplication_x: - Will not run on this platform.
-
-## Accuracy
-Dataset: Google Speech Commands Test Set
-
-| Metric | Value |
-|--------|-------|
-| Top 1 Accuracy | 0.950 |
-
-## Optimizations
-| Optimization |  Value  |
-|-----------------|---------|
-| Number of Clusters | 32 |
-| Cluster Initialization | K-Means |
-
-## Network Inputs
-<table>
-    <tr>
-        <th width="200">Input Node Name</th>
-        <th width="100">Shape</th>
-        <th width="300">Description</th>
-    </tr>
-    <tr>
-        <td>input</td>
-        <td>(1, 490)</td>
-        <td>The input is a processed MFCCs of shape (1,490)</td> 
-    </tr>
-</table>
-
-## Network Outputs
-<table>
-    <tr>
-        <th width="200">Output Node Name</th>
-        <th width="100">Shape</th>
-        <th width="300">Description</th>
-    </tr>
-    <tr>
-        <td>Identity</td>
-        <td>(1, 12)</td>
-        <td>The probability on 12 keywords.</td> 
-    </tr>
-</table>
diff --git a/models/keyword_spotting/ds_cnn_large/tflite_clustered_fp32/ckpt/checkpoint b/models/keyword_spotting/ds_cnn_large/tflite_clustered_fp32/ckpt/checkpoint
deleted file mode 100644
index be5b265..0000000
--- a/models/keyword_spotting/ds_cnn_large/tflite_clustered_fp32/ckpt/checkpoint
+++ /dev/null
@@ -1,2 +0,0 @@
-model_checkpoint_path: "ds_cnn_clustered_ckpt"
-all_model_checkpoint_paths: "ds_cnn_clustered_ckpt"
diff --git a/models/keyword_spotting/ds_cnn_large/tflite_clustered_fp32/ckpt/ds_cnn_clustered_ckpt.data-00000-of-00001 b/models/keyword_spotting/ds_cnn_large/tflite_clustered_fp32/ckpt/ds_cnn_clustered_ckpt.data-00000-of-00001
deleted file mode 100644
index fbbad53..0000000
--- a/models/keyword_spotting/ds_cnn_large/tflite_clustered_fp32/ckpt/ds_cnn_clustered_ckpt.data-00000-of-00001
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:77f79b7be1dec13fa39088ca249cc6ea1ab2a0e0bab595034a81a7915d0584f1
-size 1699733
diff --git a/models/keyword_spotting/ds_cnn_large/tflite_clustered_fp32/ckpt/ds_cnn_clustered_ckpt.index b/models/keyword_spotting/ds_cnn_large/tflite_clustered_fp32/ckpt/ds_cnn_clustered_ckpt.index
deleted file mode 100644
index f1630cc..0000000
--- a/models/keyword_spotting/ds_cnn_large/tflite_clustered_fp32/ckpt/ds_cnn_clustered_ckpt.index
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:62b15a99efc82778286c3de5248bbf4d246a751a95007d27c5e778527929b015
-size 4396
diff --git a/models/keyword_spotting/ds_cnn_large/tflite_clustered_fp32/definition.yaml b/models/keyword_spotting/ds_cnn_large/tflite_clustered_fp32/definition.yaml
deleted file mode 100644
index f9c2303..0000000
--- a/models/keyword_spotting/ds_cnn_large/tflite_clustered_fp32/definition.yaml
+++ /dev/null
@@ -1,47 +0,0 @@
-benchmark:
-  SpeechCommands:
-    top_1_accuracy: 0.950
-description: 'This is a clustered (32 clusters, kmeans++ centroid initialization)
-  and retrained (fine-tuned) FP32 version of the DS-CNN Large model developed by Arm
-  from the Hello Edge paper. Code for the original DS-CNN implementation can be found
-  here: https://github.com/ARM-software/ML-KWS-for-MCU. The original model was converted
-  to Keras and optimized using the Clustering API in TensorFlow Model Optimization
-  Toolkit.'
-license:
-- Apache-2.0
-network:
-  file_size_bytes: 1652648
-  filename: ds_cnn_clustered_fp32.tflite
-  framework: TensorFlow Lite
-  hash:
-    algorithm: sha1
-    value: d9af9829a2363c21fd6158c7bc425d0b635eb55c
-  provenance: The original model (before clustering and quantization) is a pretrained
-    checkpoint based on https://github.com/ARM-software/ML-KWS-for-MCU
-network_parameters:
-  input_nodes:
-  - description: The input is a processed MFCCs of shape (1,490)
-    example_input:
-      path: models/keyword_spotting/ds_cnn_large/tflite_clustered_fp32/testing_input/input
-    name: input
-    shape:
-    - 1
-    - 490
-    type: float32
-  output_nodes:
-  - description: The probability on 12 keywords.
-    name: Identity
-    shape:
-    - 1
-    - 12
-    test_output_path: models/keyword_spotting/ds_cnn_large/tflite_clustered_fp32/testing_output/Identity
-operators:
-  TensorFlow Lite:
-  - AVERAGE_POOL_2D
-  - CONV_2D
-  - DEPTHWISE_CONV_2D
-  - FULLY_CONNECTED
-  - RELU
-  - RESHAPE
-  - SOFTMAX
-paper: https://arxiv.org/abs/1711.07128
diff --git a/models/keyword_spotting/ds_cnn_large/tflite_clustered_fp32/get_class_labels.sh b/models/keyword_spotting/ds_cnn_large/tflite_clustered_fp32/get_class_labels.sh
deleted file mode 100755
index e59caf5..0000000
--- a/models/keyword_spotting/ds_cnn_large/tflite_clustered_fp32/get_class_labels.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (C) 2021 Arm Limited or its affiliates. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the License); you may
-# not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an AS IS BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#!/usr/bin/env bash
-
-wget https://raw.githubusercontent.com/ARM-software/ML-KWS-for-MCU/e9cf319e9aa2ff71d433e111477dd95329fb94cb/Pretrained_models/labels.txt
-mv labels.txt labelmappings.txt
\ No newline at end of file
diff --git a/models/keyword_spotting/ds_cnn_large/tflite_clustered_int8/README.md b/models/keyword_spotting/ds_cnn_large/tflite_clustered_int8/README.md
deleted file mode 100644
index 3e859ed..0000000
--- a/models/keyword_spotting/ds_cnn_large/tflite_clustered_int8/README.md
+++ /dev/null
@@ -1,77 +0,0 @@
-# DS-CNN Clustered INT8
-
-## Description
-This is a clustered (32 clusters, kmeans++ centroid initialization), retrained (fine-tuned) and fully quantized version (INT8) of the DS-CNN Large model developed by Arm from the Hello Edge paper. Code for the original DS-CNN implementation can be found here: https://github.com/ARM-software/ML-KWS-for-MCU. The original model was converted to Keras, optimized using the Clustering API in TensorFlow Model Optimization Toolkit, and quantized using post-training quantization in the TF Lite Converter.
-
-## License
-[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
-
-## Related Materials
-### Class Labels
-The class labels associated with this model can be downloaded by running the script `get_class_labels.sh`.
-
-### Model Recreation Code
-Code to recreate this model can be found here: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m.
-
-## Network Information
-| Network Information |  Value         |
-|---------------------|----------------|
-|  Framework          | TensorFlow Lite |
-|  SHA-1 Hash         | 2ee38794ed171c75d3313460a1633c5d6a79f530 |
-|  Size (Bytes)       | 503816 |
-|  Provenance         | The original model (before clustering) is a pretrained checkpoint based on https://github.com/ARM-software/ML-KWS-for-MCU |
-|  Paper              | https://arxiv.org/abs/1711.07128 |
-
-## Performance
-| Platform | Optimized |
-|----------|:---------:|
-| Cortex-A |:heavy_multiplication_x:         |
-| Cortex-M |:heavy_check_mark:         |
-| Mali GPU |:heavy_check_mark:         |
-| Ethos U  |:heavy_check_mark:         |
-
-### Key
-* :heavy_check_mark: - Will run on this platform.
-* :heavy_multiplication_x: - Will not run on this platform.
-
-## Accuracy
-Dataset: Google Speech Commands Test Set
-
-| Metric | Value |
-|--------|-------|
-| Top 1 Accuracy | 0.940 |
-
-## Optimizations
-| Optimization |  Value  |
-|--------------|---------|
-| Quantization | INT8 |
-| Number of Clusters | 32 |
-| Cluster Initialization | K-Means |
-
-## Network Inputs
-<table>
-    <tr>
-        <th width="200">Input Node Name</th>
-        <th width="100">Shape</th>
-        <th width="300">Description</th>
-    </tr>
-    <tr>
-        <td>input</td>
-        <td>(1, 490)</td>
-        <td>The input is a processed MFCCs of shape (1,490)</td> 
-    </tr>
-</table>
-
-## Network Outputs
-<table>
-    <tr>
-        <th width="200">Output Node Name</th>
-        <th width="100">Shape</th>
-        <th width="300">Description</th>
-    </tr>
-    <tr>
-        <td>Identity</td>
-        <td>(1, 12)</td>
-        <td>The probability on 12 keywords.</td> 
-    </tr>
-</table>
diff --git a/models/keyword_spotting/ds_cnn_large/tflite_clustered_int8/ckpt/checkpoint b/models/keyword_spotting/ds_cnn_large/tflite_clustered_int8/ckpt/checkpoint
deleted file mode 100644
index be5b265..0000000
--- a/models/keyword_spotting/ds_cnn_large/tflite_clustered_int8/ckpt/checkpoint
+++ /dev/null
@@ -1,2 +0,0 @@
-model_checkpoint_path: "ds_cnn_clustered_ckpt"
-all_model_checkpoint_paths: "ds_cnn_clustered_ckpt"
diff --git a/models/keyword_spotting/ds_cnn_large/tflite_clustered_int8/ckpt/ds_cnn_clustered_ckpt.data-00000-of-00001 b/models/keyword_spotting/ds_cnn_large/tflite_clustered_int8/ckpt/ds_cnn_clustered_ckpt.data-00000-of-00001
deleted file mode 100644
index fbbad53..0000000
--- a/models/keyword_spotting/ds_cnn_large/tflite_clustered_int8/ckpt/ds_cnn_clustered_ckpt.data-00000-of-00001
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:77f79b7be1dec13fa39088ca249cc6ea1ab2a0e0bab595034a81a7915d0584f1
-size 1699733
diff --git a/models/keyword_spotting/ds_cnn_large/tflite_clustered_int8/ckpt/ds_cnn_clustered_ckpt.index b/models/keyword_spotting/ds_cnn_large/tflite_clustered_int8/ckpt/ds_cnn_clustered_ckpt.index
deleted file mode 100644
index f1630cc..0000000
--- a/models/keyword_spotting/ds_cnn_large/tflite_clustered_int8/ckpt/ds_cnn_clustered_ckpt.index
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:62b15a99efc82778286c3de5248bbf4d246a751a95007d27c5e778527929b015
-size 4396
diff --git a/models/keyword_spotting/ds_cnn_large/tflite_clustered_int8/definition.yaml b/models/keyword_spotting/ds_cnn_large/tflite_clustered_int8/definition.yaml
deleted file mode 100644
index 3d65144..0000000
--- a/models/keyword_spotting/ds_cnn_large/tflite_clustered_int8/definition.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
-benchmark:
-  SpeechCommands:
-    top_1_accuracy: 0.940
-description: 'This is a clustered (32 clusters, kmeans++ centroid initialization),
-  retrained (fine-tuned) and fully quantized version (INT8) of the DS-CNN Large model
-  developed by Arm from the Hello Edge paper. Code for the original DS-CNN implementation
-  can be found here: https://github.com/ARM-software/ML-KWS-for-MCU. The original
-  model was converted to Keras, optimized using the Clustering API in TensorFlow Model
-  Optimization Toolkit, and quantized using post-training quantization in the TF Lite
-  Converter.'
-license:
-- Apache-2.0
-network:
-  file_size_bytes: 503816
-  filename: ds_cnn_clustered_int8.tflite
-  framework: TensorFlow Lite
-  hash:
-    algorithm: sha1
-    value: 2ee38794ed171c75d3313460a1633c5d6a79f530
-  provenance: The original model (before clustering) is a pretrained checkpoint based
-    on https://github.com/ARM-software/ML-KWS-for-MCU
-network_parameters:
-  input_nodes:
-  - description: The input is a processed MFCCs of shape (1,490)
-    example_input:
-      path: models/keyword_spotting/ds_cnn_large/tflite_clustered_int8/testing_input/input
-    name: input
-    shape:
-    - 1
-    - 490
-    type: int8
-  output_nodes:
-  - description: The probability on 12 keywords.
-    name: Identity
-    shape:
-    - 1
-    - 12
-    test_output_path: models/keyword_spotting/ds_cnn_large/tflite_clustered_int8/testing_output/Identity
-operators:
-  TensorFlow Lite:
-  - AVERAGE_POOL_2D
-  - CONV_2D
-  - DEPTHWISE_CONV_2D
-  - FULLY_CONNECTED
-  - RELU
-  - RESHAPE
-  - SOFTMAX
-paper: https://arxiv.org/abs/1711.07128
diff --git a/models/keyword_spotting/ds_cnn_large/tflite_clustered_int8/get_class_labels.sh b/models/keyword_spotting/ds_cnn_large/tflite_clustered_int8/get_class_labels.sh
deleted file mode 100755
index e59caf5..0000000
--- a/models/keyword_spotting/ds_cnn_large/tflite_clustered_int8/get_class_labels.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (C) 2021 Arm Limited or its affiliates. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the License); you may
-# not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an AS IS BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#!/usr/bin/env bash
-
-wget https://raw.githubusercontent.com/ARM-software/ML-KWS-for-MCU/e9cf319e9aa2ff71d433e111477dd95329fb94cb/Pretrained_models/labels.txt
-mv labels.txt labelmappings.txt
\ No newline at end of file
diff --git a/models/keyword_spotting/ds_cnn_large/tflite_int8/README.md b/models/keyword_spotting/ds_cnn_large/tflite_int8/README.md
deleted file mode 100644
index e132990..0000000
--- a/models/keyword_spotting/ds_cnn_large/tflite_int8/README.md
+++ /dev/null
@@ -1,59 +0,0 @@
-# DS-CNN Large INT8
-
-## Description
-This is a fully quantized version (asymmetrical int8) of the DS-CNN Large model developed by Arm, with training checkpoints, from the Hello Edge paper. Code to recreate this model can be found here: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m
-
-## License
-[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
-
-## Related Materials
-### Class Labels
-The class labels associated with this model can be downloaded by running the script `get_class_labels.sh`.
-
-### Model Recreation Code
-Code to recreate this model can be found here: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m.
-
-## Network Information
-| Network Information |  Value         |
-|---------------------|------------------|
-|  Framework          | TensorFlow Lite |
-|  SHA-1 Hash         | 504f8e7bfa5c0f15c5475e5d08637b3b8aad0972 |
-|  Size (Bytes)       | 503816 |
-|  Provenance         | https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m |
-|  Paper              | https://arxiv.org/abs/1711.07128 |
-
-## Accuracy
-Dataset: Google Speech Commands Test Set
-
-| Metric | Value |
-|--------|-------|
-| Accuracy | 0.946 |
-
-## Performance
-| Platform | Optimized |
-|----------|:---------:|
-| Cortex-A |:heavy_check_mark:          |
-| Cortex-M |:heavy_check_mark: HERO         |
-| Mali GPU |:heavy_check_mark:          |
-| Ethos U  |:heavy_check_mark:          |
-
-### Key
-* :heavy_check_mark: - Will run on this platform.
-* :heavy_multiplication_x: - Will not run on this platform.
-
-
-
-## Optimizations
-| Optimization |  Value  |
-|-----------------|---------|
-| Quantization | INT8 |
-
-## Network Inputs
-| Input Node Name |  Shape  | Description |
-|-----------------|---------|-------------|
-| input | (1, 490) | The input is a processed MFCCs of shape (1, 490) |
-
-## Network Outputs
-| Output Node Name |  Shape  | Description |
-|------------------|---------|-------------|
-| Identity | (1, 12) | The probability on 12 keywords. |
diff --git a/models/keyword_spotting/ds_cnn_large/tflite_int8/definition.yaml b/models/keyword_spotting/ds_cnn_large/tflite_int8/definition.yaml
deleted file mode 100644
index 54df622..0000000
--- a/models/keyword_spotting/ds_cnn_large/tflite_int8/definition.yaml
+++ /dev/null
@@ -1,45 +0,0 @@
-benchmark:
-  Google Speech Commands test set:
-    Accuracy: 94.58%
-description: 'This is a fully quantized version (asymmetrical int8) of the DS-CNN
-  Large model developed by Arm, with training checkpoints, from the Hello Edge paper.
-  Code to recreate this model can be found here: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m'
-license:
-- Apache-2.0
-network:
-  file_size_bytes: 503816
-  filename: ds_cnn_l_quantized.tflite
-  framework: TensorFlow Lite
-  hash:
-    algorithm: sha1
-    value: 504f8e7bfa5c0f15c5475e5d08637b3b8aad0972
-  provenance: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m
-  quality_level: hero#CORTEX-M
-network_parameters:
-  input_nodes:
-  - description: The input is a processed MFCCs of shape (1, 490)
-    example_input:
-      path: models/keyword_spotting/ds_cnn_large/tflite_int8/testing_input/input
-    name: input
-    shape:
-    - 1
-    - 490
-  output_nodes:
-  - description: The probability on 12 keywords.
-    name: Identity
-    shape:
-    - 1
-    - 12
-    test_output_path: models/keyword_spotting/ds_cnn_large/tflite_int8/testing_output/Identity
-operators:
-  TensorFlow Lite:
-  - AVERAGE_POOL_2D
-  - CONV_2D
-  - DEPTHWISE_CONV_2D
-  - DEQUANTIZE
-  - FULLY_CONNECTED
-  - QUANTIZE
-  - RELU
-  - RESHAPE
-  - SOFTMAX
-paper: https://arxiv.org/abs/1711.07128
diff --git a/models/keyword_spotting/ds_cnn_large/tflite_int8/get_class_labels.sh b/models/keyword_spotting/ds_cnn_large/tflite_int8/get_class_labels.sh
deleted file mode 100755
index e59caf5..0000000
--- a/models/keyword_spotting/ds_cnn_large/tflite_int8/get_class_labels.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (C) 2021 Arm Limited or its affiliates. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the License); you may
-# not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an AS IS BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#!/usr/bin/env bash
-
-wget https://raw.githubusercontent.com/ARM-software/ML-KWS-for-MCU/e9cf319e9aa2ff71d433e111477dd95329fb94cb/Pretrained_models/labels.txt
-mv labels.txt labelmappings.txt
\ No newline at end of file
diff --git a/models/keyword_spotting/ds_cnn_medium/model_package_tf/README.md b/models/keyword_spotting/ds_cnn_medium/model_package_tf/README.md
new file mode 100644
index 0000000..47e2846
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_medium/model_package_tf/README.md
@@ -0,0 +1,115 @@
+# DS-CNN Medium model package
+
+This folder contains code that will allow you to recreate the DS-CNN Medium keyword spotting model from
+the [Hello Edge paper](https://arxiv.org/pdf/1711.07128.pdf).
+
+## License
+[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
+
+## Model Package Overview
+| Model           	| DS_CNN_Medium                            	 |
+|:---------------:	|:------------------------------------------:|
+| <u>**Format**</u>:          	| Keras, Saved Model, TensorFlow Lite int8, TensorFlow Lite fp32 |
+| <u>**Feature**</u>:         	|   Keyword spotting for Arm Cortex-M CPUs   |
+| <u>**Architectural Delta w.r.t. Vanilla**</u>: |                    None                    |
+| <u>**Domain**</u>:         	|              Keyword spotting              |
+| <u>**Package Quality**</u>: 	|                    Hero                    |
+
+## Model Recreation
+
+In order to recreate the model you will first need to be using ```Python3.7``` and install the requirements in ```requirements.txt```.
+
+Once you have these requirements satisfied you can execute the recreation script contained within this folder, just run:
+
+```bash
+bash ./recreate_model.sh
+```
+
+Running this script will use the pre-trained checkpoint files supplied in the ```./model_archive/model_source/weights``` folder
+to generate the TFLite files and perform evaluation on the test sets. Both an fp32 version and a quantized version will be produced.
+The quantized version will use post-training quantization to fully quantize it.
+
+If you want to run training from scratch you can do this by supplying ```--train``` when running the script. For example:
+
+```bash
+bash ./recreate_model.sh --train
+```
+
+Training is then performed and should produce a model to the stated accuracy in this repository.
+Note that exporting to TFLite will still happen with the pre-trained checkpoint files so you will need to re-run the script
+and this time supply the path to the new checkpoint files you want to use, for example:
+
+```bash
+bash ./recreate_model.sh --ckpt <checkpoint_path>
+```
+
+
+## Training
+
+To train a DNN with 3 fully-connected layers with 128 neurons in each layer, run:
+
+```
+python train.py --model_architecture dnn --model_size_info 128 128 128
+```
+The command line argument *--model_size_info* is used to pass the neural network layer
+dimensions such as number of layers, convolution filter size/stride as a list to models.py,
+which builds the TensorFlow graph based on the provided model architecture
+and layer dimensions. For more info on *model_size_info* for each network architecture see
+[models.py](models.py).
+
+The training commands with all the hyperparameters to reproduce the models shown in the
+[paper](https://arxiv.org/pdf/1711.07128.pdf) are given [here](recreate_model.sh).
+
+## Testing
+To run inference on the trained model from a checkpoint and get accuracy on validation and test sets, run:
+```
+python evaluation.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>
+```
+The parameters used here should match those used in the Training step.
+
+## Optimization
+
+We introduce a new *optional* step to optimize the trained keyword spotting model for deployment.
+
+Here we use TensorFlow's [weight clustering API](https://www.tensorflow.org/model_optimization/guide/clustering) to reduce the compressed model size and optimize inference on supported hardware. 32 weight clusters and kmeans++ cluster intialization method are used as the clustering hyperparameters.
+
+To optimize your trained model (e.g. a DNN), a trained model checkpoint is needed to run clustering and fine-tuning on.
+You can use the pre-trained checkpoints provided, or train your own model and use the resulting checkpoint.
+
+To apply the optimization and fine-tuning, run the following command:
+```
+python optimisations.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>
+```
+The parameters used here should match those used in the Training step, except for the number of training steps.
+The number of training steps is reduced since the optimization step only requires fine-tuning.
+
+This will generate a clustered model checkpoint that can be used in the quantization step to generate a quantized and clustered TFLite model.
+
+## Quantization and TFLite Conversion
+
+As part of the update we now use TensorFlow's
+[post training quantization](https://www.tensorflow.org/lite/performance/post_training_quantization) to
+make quantization of the trained models super simple.
+
+To quantize your trained model (e.g. a DNN) run:
+```
+python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> [--inference_type int8|int16]
+```
+The parameters used here should match those used in the Training step.
+
+The inference_type parameter is *optional* and to be used if a fully quantized model with inputs and outputs of type int8 or int16 is needed. It defaults to fp32.
+
+This step will produce a quantized TFLite file *dnn_quantized.tflite*.
+You can test the accuracy of this quantized model on the test set by running:
+```
+python evaluation.py --tflite_path dnn_quantized.tflite
+```
+The parameters used here should match those used in the Training step.
+
+`convert_to_tflite.py` uses post-training quantization to generate a quantized model by default. If you wish to convert to a floating point TFLite model, use the command below:
+
+```
+python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> --no-quantize
+```
+
+This will produce a floating point TFLite file *dnn.tflite*. You can test the accuracy of this floating point model using `evaluation.py` as above.
diff --git a/models/keyword_spotting/ds_cnn_medium/model_package_tf/convert_to_tflite.py b/models/keyword_spotting/ds_cnn_medium/model_package_tf/convert_to_tflite.py
new file mode 100644
index 0000000..64ab8df
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_medium/model_package_tf/convert_to_tflite.py
@@ -0,0 +1,234 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for converting and quantizing a trained keyword spotting
+   model and saving to TFLite."""
+
+import argparse
+
+import tensorflow as tf
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+from evaluation import tflite_test
+
+NUM_REP_DATA_SAMPLES = 100  # How many samples to use for post training quantization.
+
+
+def convert(model_settings, audio_processor, checkpoint, quantize, inference_type, tflite_path):
+    """Load our trained floating point model and convert it.
+
+    TFLite conversion or post training quantization is performed and the
+    resulting model is saved as a TFLite file.
+    We use samples from the validation set to do post training quantization.
+
+    Args:
+        model_settings: Dictionary of common model settings.
+        audio_processor: Audio processor class object.
+        checkpoint: Path to training checkpoint to load.
+        quantize: Whether to quantize the model or convert to fp32 TFLite model.
+        inference_type: Input/output type of the quantized model.
+        tflite_path: Output TFLite file save path.
+    """
+    model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info, False)
+    model.load_weights(checkpoint).expect_partial()
+
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION).batch(1)
+
+    def _rep_dataset():
+        """Generator function to produce representative dataset."""
+        i = 0
+        for mfcc, label in val_data:
+            if i > NUM_REP_DATA_SAMPLES:
+                break
+            i += 1
+            yield [mfcc]
+
+    if quantize:
+        # Quantize model and save to disk.
+        tflite_model = post_training_quantize(model, inference_type, _rep_dataset)
+        with open(tflite_path, 'wb') as f:
+            f.write(tflite_model)
+        print(f'Quantized model saved to {tflite_path}.')
+    else:
+        converter = tf.lite.TFLiteConverter.from_keras_model(model)
+        tflite_model = converter.convert()
+        with open(tflite_path, 'wb') as f:
+            f.write(tflite_model)
+        print(f'Converted model saved to {tflite_path}.')
+
+
+def post_training_quantize(keras_model, inference_type, rep_dataset):
+    """Perform post training quantization and returns the TFLite model ready for saving.
+
+    See https://www.tensorflow.org/lite/performance/post_training_quantization#full_integer_quantization for
+    more details.
+
+    Args:
+        keras_model: The trained tf Keras model used for post training quantization.
+        inference_type: Input/output type of the quantized model.
+        rep_dataset: Function to use as a representative dataset, must be callable.
+
+    Returns:
+        Quantized TFLite model ready for saving to disk.
+    """
+    converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
+    converter.optimizations = [tf.lite.Optimize.DEFAULT]
+
+    if inference_type == 'int8':
+        converter.inference_input_type = tf.int8
+        converter.inference_output_type = tf.int8
+        supported_ops = tf.lite.OpsSet.TFLITE_BUILTINS_INT8
+    if inference_type == 'int16':
+        converter.inference_input_type = tf.int16
+        converter.inference_output_type = tf.int16
+        supported_ops = tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8
+
+    # Int8 post training quantization needs representative dataset.
+    converter.representative_dataset = rep_dataset
+    converter.target_spec.supported_ops = [supported_ops]
+
+    tflite_model = converter.convert()
+
+    return tflite_model
+
+
+def main():
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    if FLAGS.quantize:
+        tflite_path = f'{FLAGS.model_architecture}_quantized.tflite'
+    else:
+        tflite_path = f'{FLAGS.model_architecture}.tflite'
+
+    # Load floating point model from checkpoint and convert it.
+    convert(model_settings, audio_processor, FLAGS.checkpoint,
+            FLAGS.quantize, FLAGS.inference_type, tflite_path)
+
+    # Test the newly converted model on the test set.
+    tflite_test(model_settings, audio_processor, tflite_path)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+    parser.add_argument(
+        '--checkpoint',
+        type=str,
+        help='Checkpoint to load the weights from.')
+    parser.add_argument(
+        '--quantize',
+        dest='quantize',
+        action="store_true",
+        default=True,
+        help='Whether to quantize the model or convert to fp32 TFLite model. Defaults to True.')
+    parser.add_argument(
+        '--no-quantize',
+        dest='quantize',
+        action="store_false",
+        help='Whether to quantize the model or convert to fp32 TFLite model. Defaults to True.')
+    parser.add_argument(
+        '--inference_type',
+        type=str,
+        default='fp32',
+        help='If quantize is true, whether the model input and output is float32, int8 or int16')
+
+    FLAGS, _ = parser.parse_known_args()
+    main()
diff --git a/models/keyword_spotting/ds_cnn_medium/model_package_tf/data_processing/__init__.py b/models/keyword_spotting/ds_cnn_medium/model_package_tf/data_processing/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/models/keyword_spotting/ds_cnn_medium/model_package_tf/data_processing/data_preprocessing.py b/models/keyword_spotting/ds_cnn_medium/model_package_tf/data_processing/data_preprocessing.py
new file mode 100644
index 0000000..05cf5ba
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_medium/model_package_tf/data_processing/data_preprocessing.py
@@ -0,0 +1,462 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Modifications Copyright 2023 Arm Inc. All Rights Reserved.
+# Modified to use TensorFlow 2.0 and data pipelines.
+#
+"""Functions for loading and preparing data for keyword spotting."""
+
+import os
+import re
+import sys
+import urllib
+from pathlib import Path
+import tarfile
+import hashlib
+import random
+import math
+from enum import Enum
+
+import numpy as np
+import tensorflow as tf
+from tensorflow.python.ops import gen_audio_ops as audio_ops
+
+MAX_NUM_WAVS_PER_CLASS = 2**27 - 1  # ~134M
+RANDOM_SEED = 59185
+BACKGROUND_NOISE_DIR_NAME = '_background_noise_'
+SILENCE_LABEL = '_silence_'
+SILENCE_INDEX = 0
+UNKNOWN_WORD_INDEX = 1
+UNKNOWN_WORD_LABEL = '_unknown_'
+
+
+def load_wav_file(wav_filename, desired_samples):
+    """Loads and then decodes a given 16bit PCM wav file.
+
+    Decoded audio is scaled to the range [-1, 1] and padded or cropped to the desired number of samples.
+
+    Args:
+        wav_filename: 16bit PCM wav file to load.
+        desired_samples: Number of samples wanted from the audio file.
+
+    Returns:
+        Tuple consisting of the decoded audio and sample rate.
+    """
+    wav_file = tf.io.read_file(wav_filename)
+    decoded_wav = audio_ops.decode_wav(wav_file, desired_channels=1, desired_samples=desired_samples)
+
+    return decoded_wav.audio, decoded_wav.sample_rate
+
+
+def calculate_mfcc(audio_signal, audio_sample_rate, window_size, window_stride, num_mfcc):
+    """Returns Mel Frequency Cepstral Coefficients (MFCC) for a given audio signal.
+
+    Args:
+        audio_signal: Raw audio signal in range [-1, 1]
+        audio_sample_rate: Audio signal sample rate
+        window_size: Window size in samples for calculating spectrogram
+        window_stride: Window stride in samples for calculating spectrogram
+        num_mfcc: The number of MFCC features wanted.
+
+    Returns:
+        Calculated mffc features.
+    """
+    spectrogram = audio_ops.audio_spectrogram(input=audio_signal, window_size=window_size, stride=window_stride,
+                                              magnitude_squared=True)
+
+    mfcc_features = audio_ops.mfcc(spectrogram, audio_sample_rate, dct_coefficient_count=num_mfcc)
+
+    return mfcc_features
+
+
+def which_set(filename, validation_percentage, testing_percentage):
+    """Determines which data partition the file should belong to.
+
+    We want to keep files in the same training, validation, or testing sets even
+    if new ones are added over time. This makes it less likely that testing
+    samples will accidentally be reused in training when long runs are restarted
+    for example. To keep this stability, a hash of the filename is taken and used
+    to determine which set it should belong to. This determination only depends on
+    the name and the set proportions, so it won't change as other files are added.
+    It's also useful to associate particular files as related (for example words
+    spoken by the same person), so anything after '_nohash_' in a filename is
+    ignored for set determination. This ensures that 'bobby_nohash_0.wav' and
+    'bobby_nohash_1.wav' are always in the same set, for example.
+
+    Args:
+        filename: File path of the data sample.
+        validation_percentage: How much of the data set to use for validation.
+        testing_percentage: How much of the data set to use for testing.
+
+    Returns:
+        String, one of 'training', 'validation', or 'testing'.
+    """
+    base_name = os.path.basename(filename)
+    # We want to ignore anything after '_nohash_' in the file name when
+    # deciding which set to put a wav in, so the data set creator has a way of
+    # grouping wavs that are close variations of each other.
+    hash_name = re.sub(r'_nohash_.*$', '', base_name)
+    # This looks a bit magical, but we need to decide whether this file should
+    # go into the training, testing, or validation sets, and we want to keep
+    # existing files in the same set even if more files are subsequently
+    # added.
+    # To do that, we need a stable way of deciding based on just the file name
+    # itself, so we do a hash of that and then use that to generate a
+    # probability value that we use to assign it.
+    hash_name_hashed = hashlib.sha1(tf.compat.as_bytes(hash_name)).hexdigest()
+    percentage_hash = ((int(hash_name_hashed, 16) %
+                       (MAX_NUM_WAVS_PER_CLASS + 1)) *
+                       (100.0 / MAX_NUM_WAVS_PER_CLASS))
+    if percentage_hash < validation_percentage:
+        result = 'validation'
+    elif percentage_hash < (testing_percentage + validation_percentage):
+        result = 'testing'
+    else:
+        result = 'training'
+    return result
+
+
+def prepare_words_list(wanted_words):
+    """Prepends common tokens to the custom word list.
+
+    Args:
+        wanted_words: List of strings containing custom words to spot.
+
+    Returns:
+        List of words with silence and unknown tokens added.
+    """
+    return [SILENCE_LABEL, UNKNOWN_WORD_LABEL] + wanted_words
+
+
+class AudioProcessor:
+    """Handles loading, partitioning, and preparing audio training data."""
+
+    class Modes(Enum):
+        TRAINING = 1
+        VALIDATION = 2
+        TESTING = 3
+
+    def __init__(self, data_url, data_dir, silence_percentage, unknown_percentage,
+                 wanted_words, validation_percentage, testing_percentage, model_settings):
+        self.data_dir = Path(data_dir)
+        self.model_settings = model_settings
+        self.words_list = prepare_words_list(wanted_words)
+
+        self._tf_datasets = {}
+        self.background_data = None
+        self._set_size = {'training': 0, 'validation': 0, 'testing': 0}
+
+        self._download_and_extract_data(data_url, data_dir)
+        self._prepare_datasets(silence_percentage, unknown_percentage, wanted_words,
+                               validation_percentage, testing_percentage)
+        self._prepare_background_data()
+
+    def get_data(self, mode, background_frequency=0, background_volume_range=0, time_shift=0):
+        """Returns the train, validation or test set for KWS as a TF Dataset.
+
+        Args:
+            mode: The set to return, see AudioProcessor.Modes enumeration.
+            background_frequency: How many of the samples have background noise mixed in.
+            background_volume_range: How loud the background noise should be, between 0 and 1.
+            time_shift: Range to randomly shift the training audio by in time.
+
+        Returns:
+            TF dataset that will generate tuples containing an mfcc and corresponding label.
+
+        Raises:
+            ValueError: If mode is not recognised.
+        """
+        if mode == AudioProcessor.Modes.TRAINING:
+            dataset = self._tf_datasets['training']
+        elif mode == AudioProcessor.Modes.VALIDATION:
+            dataset = self._tf_datasets['validation']
+        elif mode == AudioProcessor.Modes.TESTING:
+            dataset = self._tf_datasets['testing']
+        else:
+            ValueError("Incorrect dataset type given")
+
+        use_background = (self.background_data is not None) and (mode == AudioProcessor.Modes.TRAINING)
+        dataset = dataset.map(lambda path, label: self._process_path(path, label, self.model_settings,
+                                                                     background_frequency, background_volume_range,
+                                                                     time_shift, use_background, self.background_data),
+                              num_parallel_calls=tf.data.experimental.AUTOTUNE)
+
+        return dataset
+
+    def set_size(self, mode):
+        """Get the number of samples in the requested dataset partition.
+
+        Args:
+            mode: Which partition, see AudioProcessor.Modes enumeration.
+
+        Returns:
+            Number of samples in the partition.
+
+        Raises:
+            ValueError: If mode is not recognised.
+        """
+        if mode == AudioProcessor.Modes.TRAINING:
+            return self._set_size['training']
+        elif mode == AudioProcessor.Modes.VALIDATION:
+            return self._set_size['validation']
+        elif mode == AudioProcessor.Modes.TESTING:
+            return self._set_size['testing']
+        else:
+            ValueError('Incorrect dataset type given')
+
+    @staticmethod
+    def _process_path(path, label, model_settings, background_frequency, background_volume_range, time_shift_samples,
+                      use_background, background_data):
+        """Load wav files and calculate mfcc features.
+
+        Random shifting of samples and adding in background noise is done within this function as well.
+        This function is meant to be mapped onto a TF Dataset by using a lambda function.
+
+        Args:
+            path: Path to the wav file to load.
+            label: Integer label for classifying the audio clip.
+            model_settings: Dictionary of settings for model being trained.
+            background_frequency: How many clips will have background noise, 0.0 to 1.0.
+            background_volume_range: How loud the background noise will be.
+            time_shift_samples: How much to randomly shift the clips by.
+            use_background: Add in background noise to audio clips or not.
+            background_data: Ragged tensor of loaded background noise samples.
+
+        Returns:
+            Tuple of calculated flattened mfcc and its class label.
+        """
+
+        desired_samples = model_settings['desired_samples']
+        audio, sample_rate = load_wav_file(path, desired_samples=desired_samples)
+
+        # Make our own silence audio data.
+        if label == SILENCE_INDEX:
+            audio = tf.multiply(audio, 0)
+
+        # Shift samples start position and pad any gaps with zeros.
+        if time_shift_samples > 0:
+            time_shift_amount = tf.random.uniform(shape=(), minval=-time_shift_samples, maxval=time_shift_samples,
+                                                  dtype=tf.int32)
+        else:
+            time_shift_amount = 0
+        if time_shift_amount > 0:
+            time_shift_padding = [[time_shift_amount, 0], [0, 0]]
+            time_shift_offset = [0, 0]
+        else:
+            time_shift_padding = [[0, -time_shift_amount], [0, 0]]
+            time_shift_offset = [-time_shift_amount, 0]
+
+        padded_foreground = tf.pad(audio, time_shift_padding, mode='CONSTANT')
+        sliced_foreground = tf.slice(padded_foreground, time_shift_offset, [desired_samples, -1])
+
+        # Get a random section of background noise.
+        if use_background:
+            background_index = tf.random.uniform(shape=(), maxval=background_data.shape[0], dtype=tf.int32)
+            background_sample = background_data[background_index]
+            background_offset = tf.random.uniform(shape=(), maxval=len(background_sample)-desired_samples,
+                                                  dtype=tf.int32)
+            background_clipped = background_sample[background_offset:(background_offset + desired_samples)]
+            background_reshaped = tf.reshape(background_clipped, [desired_samples, 1])
+            if tf.random.uniform(shape=(), maxval=1) < background_frequency:
+                background_volume = tf.random.uniform(shape=(), maxval=background_volume_range)
+            else:
+                background_volume = tf.constant(0, dtype='float32')
+        else:
+            background_reshaped = np.zeros([desired_samples, 1], dtype=np.float32)
+            background_volume = tf.constant(0, dtype='float32')
+
+        # Mix in background noise.
+        background_mul = tf.multiply(background_reshaped, background_volume)
+        background_add = tf.add(background_mul, sliced_foreground)
+        background_clamp = tf.clip_by_value(background_add, -1.0, 1.0)
+
+        mfcc = calculate_mfcc(background_clamp, sample_rate, model_settings['window_size_samples'],
+                              model_settings['window_stride_samples'],
+                              model_settings['dct_coefficient_count'])
+        mfcc = tf.reshape(mfcc, [-1])
+
+        return mfcc, label
+
+    def _download_and_extract_data(self, data_url, target_directory):
+        """Downloads and extracts file to target directory.
+
+        If the file does not already exist download it and then untar into the target directory.
+
+        Args:
+            data_url: Web link to the tarred data to download.
+            target_directory: Directory to download and extract to.
+        """
+        target_directory = Path(target_directory)
+        target_directory.mkdir(exist_ok=True)
+
+        filename = data_url.split('/')[-1]
+        filepath = target_directory / filename
+
+        if not filepath.exists():
+            def _report_hook(block_num, block_size, total_size):
+                """Function to track download progress in urllib"""
+                read_so_far = block_num * block_size
+                percent = (read_so_far / total_size) * 100.0
+
+                s = f"\rDownloading {filename} {percent:.1f}%"
+
+                sys.stdout.write(s)
+                sys.stdout.flush()
+
+            filepath, _ = urllib.request.urlretrieve(data_url, filepath, _report_hook)
+            print()
+
+        print(f'Untarring {filename}...')
+        tarfile.open(filepath, 'r:gz').extractall(target_directory)
+
+    def _prepare_datasets(self, silence_percentage, unknown_percentage, wanted_words,
+                          validation_percentage, testing_percentage):
+        """Split the data into train, validation and testing sets.
+
+        Silence and unknown data is added, then sets are converted to TF Datasets.
+
+        Args:
+            silence_percentage: Percent of words should be silence.
+            unknown_percentage: Percent of words that should be unknown.
+            wanted_words: List of words wanted to classify.
+            validation_percentage: Percent to split off for validation.
+            testing_percentage: Percent to split off for testing.
+        """
+        # Make sure the shuffling and picking of unknowns is deterministic.
+        random.seed(RANDOM_SEED)
+        wanted_words_index = {}
+
+        for index, wanted_word in enumerate(wanted_words):
+            wanted_words_index[wanted_word] = index + 2
+
+        # Find all wav files in subfolders.
+        search_path = self.data_dir / '*' / '*.wav'
+        data_index, unknown_index, all_words = self._find_and_sort_wavs(search_path, validation_percentage,
+                                                                        testing_percentage, wanted_words_index)
+
+        for index, wanted_word in enumerate(wanted_words):
+            if wanted_word not in all_words:
+                raise Exception(f'Tried to find {wanted_word} in labels but only found: {", ".join(all_words.keys())}')
+
+        word_to_index = {}
+        for word in all_words:
+            if word in wanted_words_index:
+                word_to_index[word] = wanted_words_index[word]
+            else:
+                word_to_index[word] = UNKNOWN_WORD_INDEX
+        word_to_index[SILENCE_LABEL] = SILENCE_INDEX
+
+        # We need an arbitrary file to load as the input for the silence samples.
+        # It's multiplied by zero later, so the content doesn't matter.
+        silence_wav_path = data_index['training'][0]['file']
+        for set_index in ['validation', 'testing', 'training']:
+            set_size = len(data_index[set_index])  # Size before adding silence and unknown samples.
+            silence_size = int(math.ceil(set_size * silence_percentage / 100))
+            for _ in range(silence_size):
+                data_index[set_index].append({
+                    'label': SILENCE_LABEL,
+                    'file': silence_wav_path
+                })
+            # Pick some unknowns to add to each partition of the data set.
+            random.shuffle(unknown_index[set_index])
+            unknown_size = int(math.ceil(set_size * unknown_percentage / 100))
+            data_index[set_index].extend(unknown_index[set_index][:unknown_size])
+
+            self._set_size[set_index] = len(data_index[set_index])  # Size after adding silence and unknown samples.
+
+            # Make sure the ordering is random.
+            random.shuffle(data_index[set_index])
+
+            # Transform into TF Datasets ready for easier processing later.
+            labels, paths = list(zip(*[d.values() for d in data_index[set_index]]))
+            labels = [word_to_index[label] for label in labels]
+            self._tf_datasets[set_index] = tf.data.Dataset.from_tensor_slices((list(paths), labels))
+
+    def _find_and_sort_wavs(self, search_pattern, validation_percentage, testing_percentage, wanted_words_index):
+        """Find and sort wav files into known and unknown word sets.
+
+        Known words are files containing words in the list of wanted words.
+        Any other clip goes to the unknown label set. Labels come from the folder names.
+        All clips are also assigned to train, test and validation sets.
+
+        Args:
+            search_pattern: Path pattern used by glob to find wav files.
+            validation_percentage: Percent to split off for validation.
+            testing_percentage: Percent to split off for testing.
+            wanted_words_index: Dict mapping wanted words to their label index.
+
+        Returns:
+            3-tuple of known words, unknown words and mapping of all word labels.
+        """
+        data_index = {'validation': [], 'testing': [], 'training': []}
+        unknown_index = {'validation': [], 'testing': [], 'training': []}
+        all_words = {}
+
+        for wav_path in sorted(tf.io.gfile.glob(str(search_pattern))):
+            word = Path(wav_path).parent.name.lower()
+
+            # Treat the '_background_noise_' folder as a special case, since we expect
+            # it to contain long audio samples we mix in to improve training.
+            if word == BACKGROUND_NOISE_DIR_NAME:
+                continue
+
+            all_words[word] = True
+            set_index = which_set(wav_path, validation_percentage, testing_percentage)
+            # If it's a known class, store its detail, otherwise add it to the list
+            # we'll use to train the unknown label.
+            if word in wanted_words_index:
+                data_index[set_index].append({'label': word, 'file': wav_path})
+            else:
+                unknown_index[set_index].append({'label': word, 'file': wav_path})
+        if not all_words:
+            raise Exception('No .wavs found at ' + str(search_pattern))
+
+        return data_index, unknown_index, all_words
+
+    def _prepare_background_data(self):
+        """Searches a folder for background noise audio, and loads it into memory.
+
+        It's expected that the background audio samples will be in a subdirectory
+        named '_background_noise_' inside the 'data_dir' folder, as .wavs that match
+        the sample rate of the training data, but can be much longer in duration.
+
+        If the '_background_noise_' folder doesn't exist at all, this isn't an
+        error, it's just taken to mean that no background noise augmentation should
+        be used. If the folder does exist, but it's empty, that's treated as an
+        error.
+
+        Returns:
+          Ragged tensor of raw PCM-encoded audio samples of background noise.
+          None if '_background_noise_' folder doesnt exist.
+
+        Raises:
+          Exception: If files aren't found in the folder.
+        """
+        background_data = []
+        background_dir = Path(self.data_dir / BACKGROUND_NOISE_DIR_NAME)
+        if not background_dir.exists():
+            self.background_data = None
+            return
+
+        search_path = Path(background_dir / '*.wav')
+        for wav_path in tf.io.gfile.glob(str(search_path)):
+            wav_data, _ = load_wav_file(wav_path, desired_samples=-1)
+            background_data.append(tf.reshape(wav_data, [-1]))
+
+        if not background_data:
+            raise Exception('No background wav files were found in ' + str(search_path))
+
+        # Ragged tensor as we cant use lists in tf dataset map functions.
+        self.background_data = tf.ragged.stack(background_data)
diff --git a/models/keyword_spotting/ds_cnn_medium/model_package_tf/ds_cnn_m_inference_keras.py b/models/keyword_spotting/ds_cnn_medium/model_package_tf/ds_cnn_m_inference_keras.py
new file mode 100644
index 0000000..db7694a
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_medium/model_package_tf/ds_cnn_m_inference_keras.py
@@ -0,0 +1,76 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from data_processing.data_preprocessing import load_wav_file, calculate_mfcc
+
+import tensorflow as tf
+import argparse
+
+
+def load_labels(filename):
+    """Read in labels, one label per line."""
+    f = open(filename, "r")
+    return f.read().splitlines()
+
+
+def main():
+    window_size_samples = int(FLAGS.sample_rate * FLAGS.window_size_ms / 1000)
+    window_stride_samples = int(FLAGS.sample_rate * FLAGS.window_stride_ms / 1000)
+    decoded, sample = load_wav_file(FLAGS.wav, FLAGS.sample_rate)
+    x = calculate_mfcc(decoded, sample, window_size_samples, window_stride_samples, FLAGS.dct_coefficient_count)
+    x = tf.reshape(x, [1, -1])
+
+    model = tf.keras.models.load_model(FLAGS.keras_file_path)
+    predictions = model.predict(x)
+
+    # Sort to show labels in order of confidence
+    top_k = predictions[0].argsort()[-1:][::-1]
+    for node_id in top_k:
+        human_string = load_labels(FLAGS.labels)[int(node_id)]
+        score = predictions[0,node_id]
+        print(f'model predicted: {human_string} with score {score:.5f}')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--wav', type=str, default='', help='Audio file to be identified.')
+    parser.add_argument(
+        '--labels', type=str, default='', help='Path to file containing labels.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs', )
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint', )
+    parser.add_argument(
+        '--keras_file_path',
+        type=str,
+        default='',
+        help='Path to the .h5 Keras model file to use for testing.')
+    FLAGS, unparsed = parser.parse_known_args()
+    main()
diff --git a/models/keyword_spotting/ds_cnn_medium/model_package_tf/ds_cnn_m_inference_tflite.py b/models/keyword_spotting/ds_cnn_medium/model_package_tf/ds_cnn_m_inference_tflite.py
new file mode 100644
index 0000000..9f79d99
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_medium/model_package_tf/ds_cnn_m_inference_tflite.py
@@ -0,0 +1,120 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from data_processing.data_preprocessing import load_wav_file, calculate_mfcc
+
+import tensorflow as tf
+import numpy as np
+import argparse
+
+
+def tflite_inference(input_data, tflite_path):
+    """Call forwards pass of TFLite file and returns the result.
+
+    Args:
+        input_data: Input data to use on forward pass.
+        tflite_path: Path to TFLite file to run.
+
+    Returns:
+        Output from inference.
+    """
+    supported_quant_dtypes = (np.int8, np.int16)
+    interpreter = tf.lite.Interpreter(model_path=tflite_path)
+    interpreter.allocate_tensors()
+
+    input_details = interpreter.get_input_details()
+    output_details = interpreter.get_output_details()
+
+    input_dtype = input_details[0]["dtype"]
+    output_dtype = output_details[0]["dtype"]
+
+    # Check if the input/output type is quantized,
+    # set scale and zero-point accordingly
+    if input_dtype in supported_quant_dtypes:
+        input_scale, input_zero_point = input_details[0]["quantization"]
+    else:
+        input_scale, input_zero_point = 1, 0
+
+    input_data = input_data / input_scale + input_zero_point
+    input_data = np.round(input_data) if input_dtype in supported_quant_dtypes else input_data
+
+    if output_dtype in supported_quant_dtypes:
+        output_scale, output_zero_point = output_details[0]["quantization"]
+    else:
+        output_scale, output_zero_point = 1, 0
+
+    interpreter.set_tensor(input_details[0]['index'], tf.cast(input_data, input_dtype))
+    interpreter.invoke()
+
+    output_data = interpreter.get_tensor(output_details[0]['index'])
+
+    output_data = output_scale * (output_data.astype(np.float32) - output_zero_point)
+
+    return output_data
+
+
+def load_labels(filename):
+    """Read in labels, one label per line."""
+    f = open(filename, "r")
+    return f.read().splitlines()
+
+
+def main():
+    window_size_samples = int(FLAGS.sample_rate * FLAGS.window_size_ms / 1000)
+    window_stride_samples = int(FLAGS.sample_rate * FLAGS.window_stride_ms / 1000)
+    decoded, sample = load_wav_file(FLAGS.wav, FLAGS.sample_rate)
+    x = calculate_mfcc(decoded, sample, window_size_samples, window_stride_samples, FLAGS.dct_coefficient_count)
+    x = tf.reshape(x, [1, -1])
+    predictions = tflite_inference(x, FLAGS.tflite_path)
+
+    # Sort to show labels in order of confidence
+    top_k = predictions[0].argsort()[-1:][::-1]
+    for node_id in top_k:
+        human_string = load_labels(FLAGS.labels)[int(node_id)]
+        score = predictions[0,node_id]
+        print(f'model predicted: {human_string} with score {score:.5f}')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--wav', type=str, default='', help='Audio file to be identified.')
+    parser.add_argument(
+        '--labels', type=str, default='', help='Path to file containing labels.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs', )
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint', )
+    parser.add_argument(
+        '--tflite_path',
+        type=str,
+        default='',
+        help='Path to TFLite file to use for testing.')
+    FLAGS, unparsed = parser.parse_known_args()
+    main()
diff --git a/models/keyword_spotting/ds_cnn_medium/model_package_tf/evaluation.py b/models/keyword_spotting/ds_cnn_medium/model_package_tf/evaluation.py
new file mode 100644
index 0000000..f1ea40a
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_medium/model_package_tf/evaluation.py
@@ -0,0 +1,250 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for testing trained keyword spotting models from checkpoint files and TFLite files."""
+
+import argparse
+
+import numpy as np
+import tensorflow as tf
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+from ds_cnn_m_inference_tflite import tflite_inference
+
+
+def tflite_test(model_settings, audio_processor, tflite_path):
+    """Calculate accuracy and confusion matrices on the validation and test sets.
+
+    A TFLite model is used for doing testing.
+
+    Args:
+        model_settings: Dictionary of common model settings.
+        audio_processor: Audio processor class object.
+        tflite_path: Path to TFLite file to use for inference.
+    """
+    # Evaluate on validation set.
+    print("Running TFLite evaluation on validation set...")
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION).batch(1)
+    expected_indices = np.concatenate([y for x, y in val_data])
+    predicted_indices = []
+
+    for mfcc, label in val_data:
+        prediction = tflite_inference(mfcc, tflite_path)
+        predicted_indices.append(np.squeeze(tf.argmax(prediction, axis=1)))
+
+    val_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+
+    print(confusion_matrix.numpy())
+    print(f'Validation accuracy = {val_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.VALIDATION)})')
+
+    # Evaluate on testing set.
+    print("Running TFLite evaluation on test set...")
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING).batch(1)
+    expected_indices = np.concatenate([y for x, y in test_data])
+    predicted_indices = []
+
+    for mfcc, label in test_data:
+        prediction = tflite_inference(mfcc, tflite_path)
+        predicted_indices.append(np.squeeze(tf.argmax(prediction, axis=1)))
+
+    test_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+
+    print(confusion_matrix.numpy())
+    print(f'Test accuracy = {test_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.TESTING)})')
+
+
+def keras_test(model_settings, audio_processor, model):
+    """Calculate accuracy and confusion matrices on the validation and test sets.
+
+    A loaded keras model is used for doing testing.
+
+    Args:
+        model_settings: Dictionary of common model settings.
+        audio_processor: Audio processor class object.
+        model: Loaded keras model.
+    """
+    # Evaluate on validation set.
+    print("Running TF evaluation on validation set...")
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION).batch(FLAGS.batch_size)
+    expected_indices = np.concatenate([y for x, y in val_data])
+
+    predictions = model.predict(val_data)
+    predicted_indices = tf.argmax(predictions, axis=1)
+
+    val_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+    print(confusion_matrix.numpy())
+    print(f'Validation accuracy = {val_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.VALIDATION)})')
+
+    # Evaluate on testing set.
+    print("Running TF evaluation on test set...")
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING).batch(FLAGS.batch_size)
+    expected_indices = np.concatenate([y for x, y in test_data])
+
+    predictions = model.predict(test_data)
+    predicted_indices = tf.argmax(predictions, axis=1)
+
+    test_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+    print(confusion_matrix.numpy())
+    print(f'Test accuracy = {test_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.TESTING)})')
+
+
+def calculate_accuracy(predicted_indices, expected_indices):
+    """Calculates and returns accuracy.
+
+    Args:
+        predicted_indices: List of predicted integer indices.
+        expected_indices: List of expected integer indices.
+
+    Returns:
+        Accuracy value between 0 and 1.
+    """
+    correct_prediction = tf.equal(predicted_indices, expected_indices)
+    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
+    return accuracy
+
+
+def evaluate():
+    """Calculate accuracy and confusion matrices on validation and test sets.
+
+    Model is created and weights loaded from supplied command line arguments.
+    """
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    if FLAGS.tflite_path:
+        tflite_test(model_settings, audio_processor, FLAGS.tflite_path)
+
+    if FLAGS.checkpoint:
+        model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info, False)
+        model.load_weights(FLAGS.checkpoint).expect_partial()
+        keras_test(model_settings, audio_processor, model)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--batch_size',
+        type=int,
+        default=100,
+        help='How many items to train with at once',)
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--checkpoint',
+        type=str,
+        help='Checkpoint to load the weights from')
+    parser.add_argument(
+        '--tflite_path',
+        type=str,
+        help='Path to TFLite file to use for evaluation')
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+
+    FLAGS, _ = parser.parse_known_args()
+    evaluate()
diff --git a/models/keyword_spotting/ds_cnn_medium/model_package_tf/how_to_guidance.ipynb b/models/keyword_spotting/ds_cnn_medium/model_package_tf/how_to_guidance.ipynb
new file mode 100644
index 0000000..fea007f
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_medium/model_package_tf/how_to_guidance.ipynb
@@ -0,0 +1,428 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Copyright (C) 2023 Arm Limited or its affiliates. All rights reserved.\n",
+    "#\n",
+    "# SPDX-License-Identifier: Apache-2.0\n",
+    "#\n",
+    "# Licensed under the Apache License, Version 2.0 (the License); you may\n",
+    "# not use this file except in compliance with the License.\n",
+    "# You may obtain a copy of the License at\n",
+    "#\n",
+    "# www.apache.org/licenses/LICENSE-2.0\n",
+    "#\n",
+    "# Unless required by applicable law or agreed to in writing, software\n",
+    "# distributed under the License is distributed on an AS IS BASIS, WITHOUT\n",
+    "# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+    "# See the License for the specific language governing permissions and\n",
+    "# limitations under the License."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# DS_CNN_Medium - Hero\n",
+    "\n",
+    "Here we reproduce the models with our established codebase and ModelPackage approach for your convenience.\n",
+    "\n",
+    "## Model-Package Overview:\n",
+    "\n",
+    "| Model           \t| DS_CNN_Medium                            \t|\n",
+    "|:---------------:\t|:---------------------------------------------------------------:\t|\n",
+    "| <u>**Format**</u>:          \t| Keras, Saved Model, TensorFlow Lite int8, TensorFlow Lite fp32 |\n",
+    "| <u>**Feature**</u>:         \t| Keyword spotting for Arm Cortex-M CPUs |\n",
+    "| <u>**Architectural Delta w.r.t. Vanilla**</u>: | None |\n",
+    "| <u>**Domain**</u>:         \t| Keyword spotting |\n",
+    "| <u>**Package Quality**</u>: \t| Hero |"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Table of contents <a name=\"index_page\"></a>\n",
+    "\n",
+    "This how-to guidance presents the key steps to reproduce everything in this package. The contents are organised as below. We provided the internal navigation links for users to easy-jump among different sections.  \n",
+    "\n",
+    "    \n",
+    "* [1.0 Model recreation](#model_recreation)\n",
+    "\n",
+    "* [2.0 Training](#training)\n",
+    "\n",
+    "* [3.0 Testing](#testing)\n",
+    "\n",
+    "* [4.0 Optimization](#optimization)\n",
+    "\n",
+    "* [5.0 Quantization and TFLite conversion](#tflite_conversion)\n",
+    "\n",
+    "* [6.0 Inference the TFLite model files](#tflite_inference)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1.0 Model Recreation<a name=\"model_recreation\"></a>\n",
+    "\n",
+    "In order to recreate the model you will first need to be using ```Python3.7``` and install the requirements in ```requirements.txt```.\n",
+    "\n",
+    "Once you have these requirements satisfied you can execute the recreation script contained within this folder, just run:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-01-31 11:54:08.485801: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "Untarring speech_commands_v0.02.tar.gz...\n",
+      "2023-01-31 11:54:58.475678: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1\n",
+      "2023-01-31 11:54:58.516721: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 11:54:58.516765: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 11:54:58.537249: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublas.so.11\n",
+      "2023-01-31 11:54:58.537321: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublasLt.so.11\n",
+      "2023-01-31 11:54:58.540057: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcufft.so.10\n",
+      "2023-01-31 11:54:58.540315: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcurand.so.10\n",
+      "2023-01-31 11:54:58.540872: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusolver.so.11\n",
+      "2023-01-31 11:54:58.541591: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusparse.so.11\n",
+      "2023-01-31 11:54:58.541745: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudnn.so.8\n",
+      "2023-01-31 11:54:58.542218: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 11:54:58.542511: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2023-01-31 11:54:58.543331: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 11:54:58.543822: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 11:54:58.543872: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 11:54:58.966709: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 11:54:58.966747: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 11:54:58.966761: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 11:54:58.967266: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 11002 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.\n",
+      "2023-01-31 11:55:01.322474: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.\n",
+      "2023-01-31 11:55:03.039244: I tensorflow/core/grappler/devices.cc:69] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1\n",
+      "2023-01-31 11:55:03.039493: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session\n",
+      "2023-01-31 11:55:03.039987: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 11:55:03.040276: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 11:55:03.040309: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 11:55:03.040317: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 11:55:03.040325: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 11:55:03.040640: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 11002 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "2023-01-31 11:55:03.059483: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 3492140000 Hz\n",
+      "2023-01-31 11:55:03.063108: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1144] Optimization results for grappler item: graph_to_optimize\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.01ms.\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.001ms.\n",
+      "\n",
+      "2023-01-31 11:55:03.313219: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:345] Ignored output_format.\n",
+      "2023-01-31 11:55:03.313256: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:348] Ignored drop_control_dependency.\n",
+      "2023-01-31 11:55:03.318616: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:210] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n",
+      "2023-01-31 11:55:03.321473: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 11:55:03.321732: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 11:55:03.321763: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 11:55:03.321773: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 11:55:03.321780: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 11:55:03.322065: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 11002 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "Converted model saved to ds_cnn.tflite.\n",
+      "Running TFLite evaluation on validation set...\n",
+      "2023-01-31 11:55:03.376097: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)\n",
+      "[[371   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  0 332   3   5   1   5   6   3   8   3   2   3]\n",
+      " [  0   4 386   1   0   0   4   0   0   0   0   2]\n",
+      " [  0   5   2 378   2   3   3   0   0   1   1  11]\n",
+      " [  0   1   2   0 324   1   0   0   1  16   4   1]\n",
+      " [  0   3   0   8   1 360   0   0   1   1   1   2]\n",
+      " [  1   0   8   1   1   0 338   3   0   0   0   0]\n",
+      " [  0   2   1   1   0   0   1 356   0   1   1   0]\n",
+      " [  1   5   0   2   4   0   0   0 341  10   0   0]\n",
+      " [  0   2   0   0  16   0   3   0   4 345   2   1]\n",
+      " [  1   1   0   0  12   1   0   1   0   1 332   1]\n",
+      " [  0   4   0  13   2   4   1   0   1   1   1 345]]\n",
+      "Validation accuracy = 94.67%(N=4445)\n",
+      "Running TFLite evaluation on test set...\n",
+      "[[408   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  0 362   2   8   5   2   8   3   5   1   2  10]\n",
+      " [  0   7 402   2   0   0   7   0   0   1   0   0]\n",
+      " [  0   4   1 389   0   4   1   0   0   0   0   6]\n",
+      " [  0   6   0   0 397   1   0   0   4  12   5   0]\n",
+      " [  0   8   1  14   0 374   3   1   1   0   1   3]\n",
+      " [  0   8   5   1   0   0 396   2   0   0   0   0]\n",
+      " [  0   6   0   0   0   1   4 383   0   1   1   0]\n",
+      " [  0   4   0   0   7   3   1   0 368  13   0   0]\n",
+      " [  0   5   0   2  11   0   1   0   5 375   0   3]\n",
+      " [  0   3   0   0   8   2   1   1   0   0 394   2]\n",
+      " [  0   5   1  27   3   1   1   1   0   1   0 362]]\n",
+      "Test accuracy = 94.27%(N=4890)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-01-31 11:55:32.290813: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "Untarring speech_commands_v0.02.tar.gz...\n",
+      "2023-01-31 11:56:25.228757: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1\n",
+      "2023-01-31 11:56:25.264869: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 11:56:25.264908: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 11:56:25.285323: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublas.so.11\n",
+      "2023-01-31 11:56:25.285388: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublasLt.so.11\n",
+      "2023-01-31 11:56:25.288128: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcufft.so.10\n",
+      "2023-01-31 11:56:25.288385: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcurand.so.10\n",
+      "2023-01-31 11:56:25.288944: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusolver.so.11\n",
+      "2023-01-31 11:56:25.289667: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusparse.so.11\n",
+      "2023-01-31 11:56:25.289820: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudnn.so.8\n",
+      "2023-01-31 11:56:25.292002: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 11:56:25.292281: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2023-01-31 11:56:25.293162: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 11:56:25.293718: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 11:56:25.293799: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 11:56:25.736053: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 11:56:25.736092: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 11:56:25.736100: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 11:56:25.736608: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 11002 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.\n",
+      "2023-01-31 11:56:28.038374: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.\n",
+      "2023-01-31 11:56:29.838652: I tensorflow/core/grappler/devices.cc:69] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1\n",
+      "2023-01-31 11:56:29.838886: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session\n",
+      "2023-01-31 11:56:29.839342: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 11:56:29.839606: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 11:56:29.839637: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 11:56:29.839648: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 11:56:29.839655: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 11:56:29.839941: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 11002 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "2023-01-31 11:56:29.859427: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 3492140000 Hz\n",
+      "2023-01-31 11:56:29.863763: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1144] Optimization results for grappler item: graph_to_optimize\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.013ms.\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.002ms.\n",
+      "\n",
+      "2023-01-31 11:56:30.003088: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:345] Ignored output_format.\n",
+      "2023-01-31 11:56:30.003122: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:348] Ignored drop_control_dependency.\n",
+      "2023-01-31 11:56:30.008047: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:210] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n",
+      "2023-01-31 11:56:30.010836: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 11:56:30.011085: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 11:56:30.011115: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 11:56:30.011125: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 11:56:30.011131: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 11:56:30.011421: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 11002 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "2023-01-31 11:56:30.051239: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)\n",
+      "fully_quantize: 0, inference_type: 6, input_inference_type: 9, output_inference_type: 9\n",
+      "Quantized model saved to ds_cnn_quantized.tflite.\n",
+      "Running TFLite evaluation on validation set...\n",
+      "[[371   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  0 328   2   5   1   8   5   3   8   4   2   5]\n",
+      " [  0   6 375   1   0   1  10   0   0   0   1   3]\n",
+      " [  0   9   2 368   1   3   6   0   1   0   4  12]\n",
+      " [  0   3   1   0 319   1   1   0   2  13   9   1]\n",
+      " [  0   3   2   9   0 350   1   0   3   1   2   6]\n",
+      " [  1   3   8   1   1   0 334   3   0   0   0   1]\n",
+      " [  0   4   1   0   1   0   1 351   0   1   2   2]\n",
+      " [  1   6   0   1   4   0   0   0 343   7   0   1]\n",
+      " [  0   5   0   0  21   0   3   1   4 333   3   3]\n",
+      " [  1   2   0   0  11   0   0   1   1   2 331   1]\n",
+      " [  0   7   0  15   2   4   1   0   0   1   3 339]]\n",
+      "Validation accuracy = 93.18%(N=4445)\n",
+      "Running TFLite evaluation on test set...\n",
+      "[[408   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  0 363   1   7   2   1  11   3   4   1   3  12]\n",
+      " [  0   7 399   2   0   0   8   0   1   0   0   2]\n",
+      " [  0   5   1 380   0   4   4   0   0   0   2   9]\n",
+      " [  0   8   0   0 390   1   1   2   4  10   8   1]\n",
+      " [  0   8   1  14   2 370   2   1   0   0   4   4]\n",
+      " [  0   9   4   1   1   0 395   2   0   0   0   0]\n",
+      " [  0   8   2   0   2   1   8 372   0   1   2   0]\n",
+      " [  0   9   0   0   9   3   1   0 358  12   1   3]\n",
+      " [  0   7   0   2  15   0   1   0   4 362   4   7]\n",
+      " [  0   3   0   0   7   4   1   2   0   1 391   2]\n",
+      " [  0   9   2  26   3   4   0   0   2   1   4 351]]\n",
+      "Test accuracy = 92.82%(N=4890)\n"
+     ]
+    }
+   ],
+   "source": [
+    "!bash ./recreate_model.sh"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Running this script will use the pre-trained checkpoint files supplied in the ```./model_archive/model_source/weights``` folder to generate the TFLite files and perform evaluation on the test set. Both an fp32 version and a quantized version will be produced. The quantized version will use post-training quantization to fully quantize it.\n",
+    "\n",
+    "If you want to run training from scratch you can do this by supplying ```--train``` when running the script. For example:\n",
+    "\n",
+    "```bash\n",
+    "bash ./recreate_model.sh --train\n",
+    "```\n",
+    "\n",
+    "Training is then performed and should produce a model to the stated accuracy in this repository. Note that exporting to TFLite will still happen with the baseline pre-trained checkpoint files, so you will need to re-run the script and this time supply the path to the new checkpoint files you want to use, for example:\n",
+    "\n",
+    "```bash\n",
+    "bash ./recreate_model.sh --ckpt <checkpoint_path>\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2.0 Training<a name=\"training\"></a>\n",
+    "\n",
+    "The training scripts can be used to recreate any of the models from the [Hello Edge paper](https://arxiv.org/pdf/1711.07128.pdf) provided the right hyperparameters are used. The training commands with all the hyperparameters to reproduce the model in this repository are given [here](recreate_model.sh). The model in this part of the repository represents just one variation of the models from the paper, other varieties are covered in other parts of the repository.\n",
+    "\n",
+    "\n",
+    "As a general example of how to train a DNN with 3 fully-connected layers with 128 neurons in each layer, run:\n",
+    "```\n",
+    "python train.py --model_architecture dnn --model_size_info 128 128 128\n",
+    "```\n",
+    "\n",
+    "The command line argument *--model_size_info* is used to pass the neural network layer\n",
+    "dimensions such as number of layers, convolution filter size/stride as a list to models.py,\n",
+    "which builds the TensorFlow graph based on the provided model architecture\n",
+    "and layer dimensions. For more info on *model_size_info* for each network architecture see\n",
+    "[models.py](model_core_utils/models.py).\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3.0 Testing<a name=\"testing\"></a>\n",
+    "To run inference on the trained model from a checkpoint and get accuracy on validation and test sets, run:\n",
+    "```\n",
+    "python evaluation.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>\n",
+    "```\n",
+    "**The model and feature extraction parameters passed to this script should match those used in the Training step.**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4.0 Optimization<a name=\"optimization\"></a>\n",
+    "\n",
+    "We introduce an *optional* step to optimize the trained keyword spotting model for deployment.\n",
+    "\n",
+    "Here we use TensorFlow's [weight clustering API](https://www.tensorflow.org/model_optimization/guide/clustering) to reduce the compressed model size and optimize inference on supported hardware. 32 weight clusters and kmeans++ cluster intialization method are used as the clustering hyperparameters.\n",
+    "\n",
+    "To optimize your trained model (e.g. a DNN), a trained model checkpoint is needed to run clustering and fine-tuning on.\n",
+    "You can use the pre-trained checkpoints provided, or train your own model and use the resulting checkpoint.\n",
+    "\n",
+    "To apply the optimization and fine-tuning, run the following command:\n",
+    "```\n",
+    "python optimisations.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>\n",
+    "```\n",
+    "**The model and feature extraction parameters used here should match those used in the Training step, except for the number of training steps.\n",
+    "The number of training steps is reduced since the optimization step only requires fine-tuning.**\n",
+    "\n",
+    "This will generate a clustered model checkpoint that can be used in the quantization step to generate a quantized and clustered TFLite model."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5.0 Quantization and TFLite Conversion<a name=\"tflite_conversion\"></a>\n",
+    "\n",
+    "You can now use TensorFlow's\n",
+    "[post training quantization](https://www.tensorflow.org/lite/performance/post_training_quantization) to\n",
+    "make quantization of the trained models super simple.\n",
+    "\n",
+    "To quantize your trained model (e.g. a DNN) run:\n",
+    "```\n",
+    "python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> [--inference_type int8|int16]\n",
+    "```\n",
+    "**The model and feature extraction parameters used here should match those used in the Training step.**\n",
+    "\n",
+    "The ```inference_type``` parameter is *optional* and to be used if a fully quantized model with inputs and outputs of type int8 or int16 is needed. It defaults to fp32.\n",
+    "\n",
+    "In this example, this step will produce a quantized TFLite file *dnn_quantized.tflite*."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can test the accuracy of this quantized model on the test set by running:\n",
+    "```\n",
+    "python evaluation.py --tflite_path dnn_quantized.tflite\n",
+    "```\n",
+    "**The model and feature extraction parameters used here should match those used in the Training step.**\n",
+    "\n",
+    "`convert_to_tflite.py` uses post-training quantization to generate a quantized model by default. If you wish to convert to a floating point TFLite model, use the command below:\n",
+    "\n",
+    "```\n",
+    "python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> --no-quantize\n",
+    "```\n",
+    "\n",
+    "This will produce a floating point TFLite file *dnn.tflite*. You can test the accuracy of this floating point model using `evaluation.py` as above.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 6.0 Single inference of the TFLite model files <a name=\"tflite_inference\"></a>\n",
+    "\n",
+    "You can conduct TFLite inference for .fp32 and .int8 model files by using the following command: \n",
+    "\n",
+    "```python ds_cnn_m_inference_tflite.py --labels validation_utils/labels.txt --wav <path_to_wav_file> --tflite_path <path_to_tflite_file>```\n",
+    "\n",
+    "**The feature extraction parameters used here should match those used in the Training step.**\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/README.md b/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/README.md
new file mode 100644
index 0000000..ae2c70e
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/README.md
@@ -0,0 +1,62 @@
+# keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32
+
+## Description
+This is a floating point fp32 version of the DS-CNN Medium model developed by Arm, from the Hello Edge paper.
+
+## License
+[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
+
+## Network Information
+| Network Information | Value |
+|---------------------|-------|
+|  Framework          | TensorFlow Lite |
+|  Datatype           | fp32 |
+|  SHA-1 Hash         | 620951417ca52a1640bb25490ca7b34507fe8881 |
+|  Size (Bytes)       | 548468 |
+|  Provenance         | https://arxiv.org/abs/1711.07128 |
+|  Training           | Trained by Arm |
+|  Paper | https://arxiv.org/abs/1711.07128 |
+
+## DataSet
+| Dataset Information | Value |
+|--------|-------|
+| Name | Google Speech Commands test set |
+
+## Accuracy
+
+| Metric | Value |
+|--------|-------|
+| accuracy | 94.27% |
+
+## HW Support
+| HW Support   | Value |
+|--------------|-------|
+| Cortex-A |:heavy_check_mark:         |
+| Cortex-M |:heavy_check_mark: HERO        |
+| Mali GPU |:heavy_check_mark:         |
+| Ethos U  |:heavy_multiplication_x:          |
+
+### Key
+* :heavy_check_mark: - Will run on this platform.
+* :heavy_multiplication_x: - Will not run on this platform.
+
+## Network Quality
+| Network Quality         | Value |
+|-------------------------|-------|
+|  Recreate               | :heavy_check_mark:    |
+|  Quality level          | Hero    |
+|  Vanilla                | :heavy_check_mark:    |
+|  Clustered              | :heavy_multiplication_x:    |
+|  Pruned                 | :heavy_multiplication_x:    |
+|  Quantization - default | :heavy_multiplication_x:    |
+|  Quantization - full    | :heavy_multiplication_x:    |
+
+## Network Inputs
+| Input Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| input | (1, 490) | fp32 | models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input | fp32 | [1, 490] | The input is a processed MFCCs |
+
+## Network Outputs
+| Output Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| Identity | (1, 12) | fp32 | models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity | fp32 | [1, 12] | The probability on 12 keywords |
\ No newline at end of file
diff --git a/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/definition.yaml b/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/definition.yaml
new file mode 100644
index 0000000..2277065
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/definition.yaml
@@ -0,0 +1,66 @@
+benchmark:
+  benchmark_metrics:
+    accuracy: 94.27%
+  benchmark_name: Google Speech Commands test set
+description: This is a floating point fp32 version of the DS-CNN Medium model developed
+  by Arm, from the Hello Edge paper.
+license:
+- Apache-2.0
+network:
+  datatype: fp32
+  file_size_bytes: 548468
+  filename: ds_cnn_m.tflite
+  framework: TensorFlow Lite
+  hash:
+    algorithm: sha1
+    value: 620951417ca52a1640bb25490ca7b34507fe8881
+  provenance: https://arxiv.org/abs/1711.07128
+  training: Trained by Arm
+network_parameters:
+  input_nodes:
+  - description: The input is a processed MFCCs of shape (1, 490)
+    example_input:
+      path: models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input
+      shape:
+      - 1
+      - 490
+      type: fp32
+      use_case: Random input for model regression.
+    input_datatype: fp32
+    name: input
+    shape:
+    - 1
+    - 490
+  output_nodes:
+  - description: The probability on 12 keywords.
+    example_output:
+      path: models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity
+      shape:
+      - 1
+      - 12
+      type: fp32
+      use_case: output for model regression.
+    name: Identity
+    output_datatype: fp32
+    shape:
+    - 1
+    - 12
+network_quality:
+  clustered: false
+  is_vanilla: true
+  pruned: false
+  quality_level: Deployable
+  quality_level_hero_hw: cortex_m
+  quantization_default: false
+  quantization_full: false
+  recreate: true
+operators:
+  TensorFlow Lite:
+  - AVERAGE_POOL_2D
+  - CONV_2D
+  - DEPTHWISE_CONV_2D
+  - FULLY_CONNECTED
+  - RELU
+  - RESHAPE
+  - SOFTMAX
+paper: https://arxiv.org/abs/1711.07128
\ No newline at end of file
diff --git a/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/ds_cnn_m.tflite b/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/ds_cnn_m.tflite
new file mode 100644
index 0000000..b4b2f28
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/ds_cnn_m.tflite
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:405ba6ec5977ae6bd42ac153deb02f471bcd76e6c07b127352e4a0f3ca5be054
+size 548468
diff --git a/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input/0.npy b/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input/0.npy
new file mode 100644
index 0000000..701fcd4
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input/0.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fcdf0702505989d7a0fdffca09308abde32082a1f56bad845c05fbca24e87aa4
+size 2088
diff --git a/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity/0.npy b/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity/0.npy
new file mode 100644
index 0000000..f6082ba
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity/0.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a107cccce62cb03a3aadc59387f87ecb46a6e4bf81ed5f67d15750fa8b78fec
+size 176
diff --git a/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/README.md b/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/README.md
new file mode 100644
index 0000000..331b883
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/README.md
@@ -0,0 +1,63 @@
+# keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8
+
+## Description
+This is a fully quantized int8 version of the DS-CNN Medium model developed by Arm, from the Hello Edge paper.
+
+## License
+[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
+
+## Network Information
+| Network Information | Value |
+|---------------------|-------|
+|  Framework          | TensorFlow Lite |
+|  Datatype           | int8 |
+|  SHA-1 Hash         | 740d32adde16948b2ab45e1e8c856de2925a05eb |
+|  Size (Bytes)       | 186288 |
+|  Provenance         | https://arxiv.org/abs/1711.07128 |
+|  Training           | Trained by Arm |
+|  Paper | https://arxiv.org/abs/1711.07128 |
+
+## DataSet
+| Dataset Information | Value |
+|--------|-------|
+| Name | Google Speech Commands test set |
+
+## Accuracy
+
+| Metric | Value |
+|--------|-------|
+| Accuracy | 93.93% |
+
+## HW Support
+| HW Support   | Value |
+|--------------|-------|
+| Cortex-A |:heavy_check_mark:          |
+| Cortex-M |:heavy_check_mark: HERO         |
+| Mali GPU |:heavy_check_mark:          |
+| Ethos U  |:heavy_check_mark:          |
+
+### Key
+* :heavy_check_mark: - Will run on this platform.
+* :heavy_multiplication_x: - Will not run on this platform.
+
+## Network Quality
+| Network Quality         | Value |
+|-------------------------|-------|
+|  Recreate               | :heavy_check_mark:    |
+|  Quality level          | Hero    |
+|  Vanilla                | :heavy_check_mark:    |
+|  Clustered              | :heavy_multiplication_x:    |
+|  Pruned                 | :heavy_multiplication_x:    |
+|  Quantization - default | :heavy_multiplication_x:    |
+|  Quantization - full    | :heavy_check_mark:    |
+
+## Network Inputs
+| Input Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| input | (1, 490) | int8 | models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input | int8 | [1, 490] | The input is a processed MFCCs |
+
+## Network Outputs
+| Output Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| Identity | (1, 12) | int8 | models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity | int8 | [1, 12] | The probability on 12 keywords |
+
diff --git a/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/definition.yaml b/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/definition.yaml
new file mode 100644
index 0000000..7cc5a2a
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/definition.yaml
@@ -0,0 +1,66 @@
+benchmark:
+  benchmark_metrics:
+    Accuracy: 93.93%
+  benchmark_name: Google Speech Commands test set
+description: This is a fully quantized int8 version of the DS-CNN Medium model developed
+  by Arm, from the Hello Edge paper.
+license:
+- Apache-2.0
+network:
+  datatype: int8
+  file_size_bytes: 186288
+  filename: ds_cnn_m_quantized.tflite
+  framework: TensorFlow Lite
+  hash:
+    algorithm: sha1
+    value: 740d32adde16948b2ab45e1e8c856de2925a05eb
+  provenance: https://arxiv.org/abs/1711.07128
+  training: Trained by Arm
+network_parameters:
+  input_nodes:
+  - description: The input is a processed MFCCs of shape (1, 490)
+    example_input:
+      path: models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input
+      shape:
+      - 1
+      - 490
+      type: int8
+      use_case: Random input for model regression.
+    input_datatype: int8
+    name: input
+    shape:
+    - 1
+    - 490
+  output_nodes:
+  - description: The probability on 12 keywords.
+    example_output:
+      path: models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity
+      shape:
+      - 1
+      - 12
+      type: int8
+      use_case: output for model regression.
+    name: Identity
+    output_datatype: int8
+    shape:
+    - 1
+    - 12
+network_quality:
+  clustered: false
+  is_vanilla: true
+  pruned: false
+  quality_level: Deployable
+  quality_level_hero_hw: cortex_m
+  quantization_default: false
+  quantization_full: true
+  recreate: true
+operators:
+  TensorFlow Lite:
+  - AVERAGE_POOL_2D
+  - CONV_2D
+  - DEPTHWISE_CONV_2D
+  - FULLY_CONNECTED
+  - RELU
+  - RESHAPE
+  - SOFTMAX
+paper: https://arxiv.org/abs/1711.07128
\ No newline at end of file
diff --git a/models/keyword_spotting/ds_cnn_medium/tflite_int8/ds_cnn_m_quantized.tflite b/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/ds_cnn_m_quantized.tflite
similarity index 100%
rename from models/keyword_spotting/ds_cnn_medium/tflite_int8/ds_cnn_m_quantized.tflite
rename to models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/ds_cnn_m_quantized.tflite
diff --git a/models/keyword_spotting/ds_cnn_medium/tflite_int8/testing_input/input/0.npy b/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input/0.npy
similarity index 100%
rename from models/keyword_spotting/ds_cnn_medium/tflite_int8/testing_input/input/0.npy
rename to models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input/0.npy
diff --git a/models/keyword_spotting/ds_cnn_medium/tflite_int8/testing_output/Identity/0.npy b/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity/0.npy
similarity index 100%
rename from models/keyword_spotting/ds_cnn_medium/tflite_int8/testing_output/Identity/0.npy
rename to models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity/0.npy
diff --git a/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/baseline/saved_model/ds_cnn_medium/keras_metadata.pb b/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/baseline/saved_model/ds_cnn_medium/keras_metadata.pb
new file mode 100644
index 0000000..d1cf98b
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/baseline/saved_model/ds_cnn_medium/keras_metadata.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0e2c679859ef8fe55a5240076d46d21fb6058d6f5eb6789e8f66484c0eb5606c
+size 65455
diff --git a/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/baseline/saved_model/ds_cnn_medium/saved_model.pb b/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/baseline/saved_model/ds_cnn_medium/saved_model.pb
new file mode 100644
index 0000000..edf9f9d
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/baseline/saved_model/ds_cnn_medium/saved_model.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f3441fda9da39b45faa7e26c777cb8608318cb6140df5aee5470f2a94c04b5a7
+size 711776
diff --git a/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/baseline/saved_model/ds_cnn_medium/variables/variables.data-00000-of-00001 b/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/baseline/saved_model/ds_cnn_medium/variables/variables.data-00000-of-00001
new file mode 100644
index 0000000..fa0e037
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/baseline/saved_model/ds_cnn_medium/variables/variables.data-00000-of-00001
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:89a822e0c17c8dc7500805a9833fd2558ffe89da671932747c508402e60c7405
+size 583382
diff --git a/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/baseline/saved_model/ds_cnn_medium/variables/variables.index b/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/baseline/saved_model/ds_cnn_medium/variables/variables.index
new file mode 100644
index 0000000..24cf127
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/baseline/saved_model/ds_cnn_medium/variables/variables.index
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f94d6215cd19d5651d333504aad08c2d1450afae072b86e9d6c344b8e23fd26
+size 3642
diff --git a/models/keyword_spotting/ds_cnn_medium/tflite_int8/ckpt/checkpoint b/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/baseline/weights/checkpoint
similarity index 100%
rename from models/keyword_spotting/ds_cnn_medium/tflite_int8/ckpt/checkpoint
rename to models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/baseline/weights/checkpoint
diff --git a/models/keyword_spotting/ds_cnn_medium/tflite_int8/ckpt/ds_cnn_0.95_ckpt.data-00000-of-00001 b/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/baseline/weights/ds_cnn_0.95_ckpt.data-00000-of-00001
similarity index 100%
rename from models/keyword_spotting/ds_cnn_medium/tflite_int8/ckpt/ds_cnn_0.95_ckpt.data-00000-of-00001
rename to models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/baseline/weights/ds_cnn_0.95_ckpt.data-00000-of-00001
diff --git a/models/keyword_spotting/ds_cnn_medium/tflite_int8/ckpt/ds_cnn_0.95_ckpt.index b/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/baseline/weights/ds_cnn_0.95_ckpt.index
similarity index 100%
rename from models/keyword_spotting/ds_cnn_medium/tflite_int8/ckpt/ds_cnn_0.95_ckpt.index
rename to models/keyword_spotting/ds_cnn_medium/model_package_tf/model_archive/baseline/weights/ds_cnn_0.95_ckpt.index
diff --git a/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_core_utils/__init__.py b/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_core_utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_core_utils/models.py b/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_core_utils/models.py
new file mode 100644
index 0000000..1978136
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_medium/model_package_tf/model_core_utils/models.py
@@ -0,0 +1,327 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Model definitions for simple keyword spotting."""
+
+import math
+
+import tensorflow as tf
+
+
+def prepare_model_settings(label_count, sample_rate, clip_duration_ms,
+                           window_size_ms, window_stride_ms,
+                           dct_coefficient_count):
+    """Calculates common settings needed for all models.
+
+    Args:
+        label_count: How many classes are to be recognized.
+        sample_rate: Number of audio samples per second.
+        clip_duration_ms: Length of each audio clip to be analyzed.
+        window_size_ms: Duration of frequency analysis window.
+        window_stride_ms: How far to move in time between frequency windows.
+        dct_coefficient_count: Number of frequency bins to use for analysis.
+
+    Returns:
+        Dictionary containing common settings.
+    """
+    desired_samples = int(sample_rate * clip_duration_ms / 1000)
+    window_size_samples = int(sample_rate * window_size_ms / 1000)
+    window_stride_samples = int(sample_rate * window_stride_ms / 1000)
+    length_minus_window = (desired_samples - window_size_samples)
+    if length_minus_window < 0:
+        spectrogram_length = 0
+    else:
+        spectrogram_length = 1 + int(length_minus_window / window_stride_samples)
+    fingerprint_size = dct_coefficient_count * spectrogram_length
+
+    return {
+        'desired_samples': desired_samples,
+        'window_size_samples': window_size_samples,
+        'window_stride_samples': window_stride_samples,
+        'spectrogram_length': spectrogram_length,
+        'dct_coefficient_count': dct_coefficient_count,
+        'fingerprint_size': fingerprint_size,
+        'label_count': label_count,
+        'sample_rate': sample_rate,
+    }
+
+
+def create_model(model_settings, model_architecture, model_size_info, is_training):
+    """Builds a tf.keras model of the requested architecture compatible with the settings.
+
+    Args:
+        model_settings: Dictionary of information about the model.
+        model_architecture: String specifying which kind of model to create.
+        model_size_info: Array with specific information for the chosen architecture
+            (e.g convolutional parameters, number of layers).
+
+    Returns:
+        A tf.keras Model with the requested architecture.
+
+    Raises:
+        Exception: If the architecture type isn't recognized.
+    """
+
+    if model_architecture == 'dnn':
+        return create_dnn_model(model_settings, model_size_info)
+
+    elif model_architecture == 'cnn':
+        return create_cnn_model(model_settings, model_size_info)
+
+    elif model_architecture == 'ds_cnn':
+        return create_ds_cnn_model(model_settings, model_size_info)
+    elif model_architecture == 'single_fc':
+        return create_single_fc_model(model_settings)
+    elif model_architecture == 'basic_lstm':
+        return create_basic_lstm_model(model_settings, model_size_info, is_training)
+    else:
+        raise Exception(f'model_architecture argument {model_architecture} not recognized'
+                        f', should be one of, "dnn", "cnn", "ds_cnn" ')
+
+
+def create_single_fc_model(model_settings):
+    """Builds a model with a single fully-connected layer.
+
+    For details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+
+    Returns:
+        tf.keras Model of the 'SINGLE_FC' architecture.
+    """
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size'],), name='input')
+    # Fully connected layer
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(inputs)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_basic_lstm_model(model_settings, model_size_info, is_training):
+    """Builds a model with a basic lstm layer.
+
+        For details see https://arxiv.org/abs/1711.07128.
+
+        Args:
+            model_settings: Dict of different settings for model training.
+            model_size_info: Length of the array defines the number of hidden-layers and
+                each element in the array represent the number of neurons in that layer.
+            is_training: Determining whether the use of the model is for training or for something else.
+
+        Returns:
+            tf.keras Model of the 'Basic_LSTM' architecture.
+        """
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size'], ), name='input')
+
+    input_frequency_size = model_settings['dct_coefficient_count']
+    input_time_size = model_settings['spectrogram_length']
+    x = tf.reshape(inputs, shape=(-1, input_time_size, input_frequency_size))
+
+    # LSTM layer, and unrolling depending on whether you are training or not
+    if is_training:
+        x = tf.keras.layers.LSTM(units=model_size_info[0], time_major=False, unroll=False)(x)
+    else:
+        x = tf.keras.layers.LSTM(units=model_size_info[0], time_major=False, unroll=True)(x)
+
+    # Outputs a fully connected layer
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_dnn_model(model_settings, model_size_info):
+    """Builds a model with multiple hidden fully-connected layers.
+
+    For details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+        model_size_info: Length of the array defines the number of hidden-layers and
+            each element in the array represent the number of neurons in that layer.
+
+    Returns:
+        tf.keras Model of the 'DNN' architecture.
+    """
+
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size'], ), name='input')
+
+    # First fully connected layer.
+    x = tf.keras.layers.Dense(units=model_size_info[0], activation='relu')(inputs)
+
+    # Hidden layers with ReLU activations.
+    for i in range(1, len(model_size_info)):
+        x = tf.keras.layers.Dense(units=model_size_info[i], activation='relu')(x)
+
+    # Output fully connected layer.
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_cnn_model(model_settings, model_size_info):
+    """Builds a model with 2 convolution layers followed by a linear layer and a hidden fully-connected layer.
+
+    For details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+        model_size_info: Defines the first and second convolution parameters in
+            {number of conv features, conv filter height, width, stride in y,x dir.},
+            followed by linear layer size and fully-connected layer size.
+
+    Returns:
+        tf.keras Model of the 'CNN' architecture.
+    """
+
+    input_frequency_size = model_settings['dct_coefficient_count']
+    input_time_size = model_settings['spectrogram_length']
+
+    first_filter_count = model_size_info[0]
+    first_filter_height = model_size_info[1]  # Time axis.
+    first_filter_width = model_size_info[2]  # Frequency axis.
+    first_filter_stride_y = model_size_info[3]  # Time axis.
+    first_filter_stride_x = model_size_info[4]  # Frequency_axis.
+
+    second_filter_count = model_size_info[5]
+    second_filter_height = model_size_info[6]  # Time axis.
+    second_filter_width = model_size_info[7]  # Frequency axis.
+    second_filter_stride_y = model_size_info[8]  # Time axis.
+    second_filter_stride_x = model_size_info[9]  # Frequency axis.
+
+    linear_layer_size = model_size_info[10]
+    fc_size = model_size_info[11]
+
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size']), name='input')
+
+    # Reshape the flattened input.
+    x = tf.reshape(inputs, shape=(-1, input_time_size, input_frequency_size, 1))
+
+    # First convolution.
+    x = tf.keras.layers.Conv2D(filters=first_filter_count,
+                               kernel_size=(first_filter_height, first_filter_width),
+                               strides=(first_filter_stride_y, first_filter_stride_x),
+                               padding='VALID')(x)
+    x = tf.keras.layers.BatchNormalization()(x)
+    x = tf.keras.layers.ReLU()(x)
+    x = tf.keras.layers.Dropout(rate=0)(x)
+
+    # Second convolution.
+    x = tf.keras.layers.Conv2D(filters=second_filter_count,
+                               kernel_size=(second_filter_height, second_filter_width),
+                               strides=(second_filter_stride_y, second_filter_stride_x),
+                               padding='VALID')(x)
+    x = tf.keras.layers.BatchNormalization()(x)
+    x = tf.keras.layers.ReLU()(x)
+    x = tf.keras.layers.Dropout(rate=0)(x)
+
+    # Flatten for fully connected layers.
+    x = tf.keras.layers.Flatten()(x)
+
+    # Fully connected layer with no activation.
+    x = tf.keras.layers.Dense(units=linear_layer_size)(x)
+
+    # Fully connected layer with ReLU activation.
+    x = tf.keras.layers.Dense(units=fc_size)(x)
+    x = tf.keras.layers.BatchNormalization()(x)
+    x = tf.keras.layers.ReLU()(x)
+    x = tf.keras.layers.Dropout(rate=0)(x)
+
+    # Output fully connected.
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_ds_cnn_model(model_settings, model_size_info):
+    """Builds a model with convolutional & depthwise separable convolutional layers.
+
+    For more details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+        model_size_info: Defines number of layers, followed by the DS-Conv layer
+            parameters in the order {number of conv features, conv filter height,
+            width and stride in y,x dir.} for each of the layers.
+
+    Returns:
+        tf.keras Model of the 'DS-CNN' architecture.
+    """
+
+    label_count = model_settings['label_count']
+    input_frequency_size = model_settings['dct_coefficient_count']
+    input_time_size = model_settings['spectrogram_length']
+
+    t_dim = input_time_size
+    f_dim = input_frequency_size
+
+    # Extract model dimensions from model_size_info.
+    num_layers = model_size_info[0]
+    conv_feat = [None]*num_layers
+    conv_kt = [None]*num_layers
+    conv_kf = [None]*num_layers
+    conv_st = [None]*num_layers
+    conv_sf = [None]*num_layers
+
+    i = 1
+    for layer_no in range(0, num_layers):
+        conv_feat[layer_no] = model_size_info[i]
+        i += 1
+        conv_kt[layer_no] = model_size_info[i]
+        i += 1
+        conv_kf[layer_no] = model_size_info[i]
+        i += 1
+        conv_st[layer_no] = model_size_info[i]
+        i += 1
+        conv_sf[layer_no] = model_size_info[i]
+        i += 1
+
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size']), name='input')
+
+    # Reshape the flattened input.
+    x = tf.reshape(inputs, shape=(-1, input_time_size, input_frequency_size, 1))
+
+    # Depthwise separable convolutions.
+    for layer_no in range(0, num_layers):
+        if layer_no == 0:
+            # First convolution.
+            x = tf.keras.layers.Conv2D(filters=conv_feat[0],
+                                       kernel_size=(conv_kt[0], conv_kf[0]),
+                                       strides=(conv_st[0], conv_sf[0]),
+                                       padding='SAME')(x)
+            x = tf.keras.layers.BatchNormalization()(x)
+            x = tf.keras.layers.ReLU()(x)
+        else:
+            # Depthwise convolution.
+            x = tf.keras.layers.DepthwiseConv2D(kernel_size=(conv_kt[layer_no], conv_kf[layer_no]),
+                                                strides=(conv_sf[layer_no], conv_st[layer_no]),
+                                                padding='SAME')(x)
+            x = tf.keras.layers.BatchNormalization()(x)
+            x = tf.keras.layers.ReLU()(x)
+
+            # Pointwise convolution.
+            x = tf.keras.layers.Conv2D(filters=conv_feat[layer_no], kernel_size=(1, 1))(x)
+            x = tf.keras.layers.BatchNormalization()(x)
+            x = tf.keras.layers.ReLU()(x)
+
+        t_dim = math.ceil(t_dim/float(conv_st[layer_no]))
+        f_dim = math.ceil(f_dim/float(conv_sf[layer_no]))
+
+    # Global average pool.
+    x = tf.keras.layers.AveragePooling2D(pool_size=(t_dim, f_dim), strides=1)(x)
+
+    # Squeeze before passing to output fully connected layer.
+    x = tf.reshape(x, shape=(-1, conv_feat[layer_no]))
+
+    # Output connected layer.
+    output = tf.keras.layers.Dense(units=label_count, activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
diff --git a/models/keyword_spotting/ds_cnn_medium/model_package_tf/optimisations.py b/models/keyword_spotting/ds_cnn_medium/model_package_tf/optimisations.py
new file mode 100644
index 0000000..16b6f4c
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_medium/model_package_tf/optimisations.py
@@ -0,0 +1,259 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for optimizing simple keyword spotting models using clustering API."""
+
+import argparse
+from pathlib import Path
+
+import tensorflow as tf
+import numpy as np
+import tensorflow_model_optimization as tfmot
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+
+
+def print_model_weight_clusters(model):
+
+    for layer in model.layers:
+        if isinstance(layer, tf.keras.layers.Wrapper):
+            weights = layer.trainable_weights
+        else:
+            weights = layer.weights
+        for weight in weights:
+            if "kernel" in weight.name:
+                unique_count = len(np.unique(weight))
+                print(
+                    f"{layer.name}/{weight.name}: {unique_count} clusters "
+                )
+
+
+def optimize():
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    # Create the model to optimize from checkpoint.
+    model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info)
+    model.load_weights(FLAGS.checkpoint).expect_partial()
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    # We decay learning rate in a constant piecewise way to help learning.
+    training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(',')))
+    learning_rates_list = list(map(float, FLAGS.learning_rate.split(',')))
+    lr_boundary_list = training_steps_list[:-1]  # Only need the values at which to change lr.
+    lr_schedule = tf.keras.optimizers.schedules.PiecewiseConstantDecay(boundaries=lr_boundary_list,
+                                                                       values=learning_rates_list)
+
+    cluster_weights = tfmot.clustering.keras.cluster_weights
+    CentroidInitialization = tfmot.clustering.keras.CentroidInitialization
+
+    clustering_params = {
+        'number_of_clusters': 32,
+        'cluster_centroids_init': CentroidInitialization.KMEANS_PLUS_PLUS}
+
+    clustered_model = cluster_weights(model, **clustering_params)
+
+    # Specify the optimizer configurations.
+    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
+    clustered_model.compile(optimizer=optimizer,
+                            loss=tf.keras.losses.SparseCategoricalCrossentropy(),
+                            metrics=['accuracy'])
+
+    train_data = audio_processor.get_data(audio_processor.Modes.TRAINING,
+                                          FLAGS.background_frequency, FLAGS.background_volume,
+                                          int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000))
+    train_data = train_data.repeat().batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION)
+    val_data = val_data.batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+
+    # We train for a max number of iterations so need to calculate how many 'epochs' this will be.
+    training_steps_max = np.sum(training_steps_list)
+    training_epoch_max = int(np.ceil(training_steps_max / FLAGS.eval_step_interval))
+
+    # Train the model with clustering applied.
+    clustered_model.fit(x=train_data,
+                        steps_per_epoch=FLAGS.eval_step_interval,
+                        epochs=training_epoch_max,
+                        validation_data=val_data)
+
+    stripped_clustered_model = tfmot.clustering.keras.strip_clustering(clustered_model)
+
+    print_model_weight_clusters(stripped_clustered_model)
+
+    # Save the clustered model weights
+    train_dir = Path(FLAGS.train_dir) / "optimized"
+    train_dir.mkdir(parents=True, exist_ok=True)
+
+    stripped_clustered_model.save_weights((train_dir /
+                                          (FLAGS.model_architecture +
+                                           "_clustered_ckpt")))
+
+    # Test the model.
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING)
+    test_data = test_data.batch(FLAGS.batch_size)
+
+    stripped_clustered_model.compile(optimizer=optimizer,
+                                     loss=tf.keras.losses.SparseCategoricalCrossentropy(),
+                                     metrics=['accuracy'])
+
+    test_loss, test_acc = stripped_clustered_model.evaluate(x=test_data)
+    print(f'Final test accuracy: {test_acc*100:.2f}%')
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--background_volume',
+        type=float,
+        default=0.1,
+        help="""\
+        How loud the background noise should be, between 0 and 1.
+        """)
+    parser.add_argument(
+        '--background_frequency',
+        type=float,
+        default=0.8,
+        help="""\
+        How many of the training samples have background noise mixed in.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--time_shift_ms',
+        type=float,
+        default=100.0,
+        help="""\
+        Range to randomly shift the training audio by in time.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--how_many_training_steps',
+        type=str,
+        default='3750,750',
+        help='How many training loops to run',)
+    parser.add_argument(
+        '--eval_step_interval',
+        type=int,
+        default=400,
+        help='How often to evaluate the training results.')
+    parser.add_argument(
+        '--learning_rate',
+        type=str,
+        default='0.001,0.0001',
+        help='How large a learning rate to use when training.')
+    parser.add_argument(
+        '--batch_size',
+        type=int,
+        default=100,
+        help='How many items to train with at once',)
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--train_dir',
+        type=str,
+        default='/tmp/speech_commands_train',
+        help='Directory to write event logs and checkpoint.')
+    parser.add_argument(
+        '--save_step_interval',
+        type=int,
+        default=100,
+        help='Save model checkpoint every save_steps.')
+    parser.add_argument(
+        '--checkpoint',
+        type=str,
+        help='Checkpoint to load the weights from before fine-tuning.')
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+
+    FLAGS, _ = parser.parse_known_args()
+    optimize()
diff --git a/models/keyword_spotting/ds_cnn_medium/model_package_tf/recreate_model.sh b/models/keyword_spotting/ds_cnn_medium/model_package_tf/recreate_model.sh
new file mode 100644
index 0000000..278bddd
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_medium/model_package_tf/recreate_model.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+# Copyright (C) 2023 Arm Limited or its affiliates. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+ckpt_path=model_archive/model_source/weights/ds_cnn_0.95_ckpt
+train=false
+
+# Parse command line args
+while (( $# >= 1 )); do 
+    case $1 in
+    --ckpt)
+       if [ "$2" ]; then
+           ckpt_path=$2
+           shift
+       else
+           printf 'ERROR: "--ckpt" requires a path to be supplied.\n'
+           exit 1
+       fi
+       ;;
+    --train) 
+    	train=true
+	break;;
+    *) shift;
+    esac;
+done
+
+
+# DS-CNN Medium training
+if [ "$train" = true ]
+then
+python train.py --model_architecture ds_cnn --model_size_info 5 172 10 4 2 1 172 3 3 2 2 172 3 3 1 1 172 3 3 1 1 172 3 3 1 1 --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --learning_rate 0.0005,0.0001,0.00002 --how_many_training_steps 10000,10000,10000 --summaries_dir work/DS_CNN/DS_CNN_M/retrain_logs --train_dir work/DS_CNN/DS_CNN_M/training
+fi
+
+# Conversion to TFLite fp32
+python convert_to_tflite.py --model_architecture ds_cnn --model_size_info 5 172 10 4 2 1 172 3 3 2 2 172 3 3 1 1 172 3 3 1 1 172 3 3 1 1 --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --checkpoint $ckpt_path --no-quantize
+
+# Conversion to TFLite int8
+python convert_to_tflite.py --model_architecture ds_cnn --model_size_info 5 172 10 4 2 1 172 3 3 2 2 172 3 3 1 1 172 3 3 1 1 172 3 3 1 1 --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --checkpoint $ckpt_path --inference_type int8
+
diff --git a/models/keyword_spotting/ds_cnn_medium/model_package_tf/requirements.txt b/models/keyword_spotting/ds_cnn_medium/model_package_tf/requirements.txt
new file mode 100644
index 0000000..3448cff
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_medium/model_package_tf/requirements.txt
@@ -0,0 +1,3 @@
+numpy == 1.19.5
+tensorflow == 2.5.0
+tensorflow-model-optimization == 0.6.0
\ No newline at end of file
diff --git a/models/keyword_spotting/ds_cnn_medium/model_package_tf/train.py b/models/keyword_spotting/ds_cnn_medium/model_package_tf/train.py
new file mode 100644
index 0000000..8c488b3
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_medium/model_package_tf/train.py
@@ -0,0 +1,227 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for training simple keyword spotting models."""
+
+import argparse
+from pathlib import Path
+
+import tensorflow as tf
+import numpy as np
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+
+
+def train():
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    # Create the model.
+    model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info, True)
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    # We decay learning rate in a constant piecewise way to help learning.
+    training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(',')))
+    learning_rates_list = list(map(float, FLAGS.learning_rate.split(',')))
+    lr_boundary_list = training_steps_list[:-1]  # Only need the values at which to change lr.
+    lr_schedule = tf.keras.optimizers.schedules.PiecewiseConstantDecay(boundaries=lr_boundary_list,
+                                                                       values=learning_rates_list)
+
+    # Specify the optimizer configurations.
+    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
+    model.compile(optimizer=optimizer,
+                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
+                  metrics=['accuracy'])
+
+    train_data = audio_processor.get_data(audio_processor.Modes.TRAINING,
+                                          FLAGS.background_frequency, FLAGS.background_volume,
+                                          int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000))
+    train_data = train_data.repeat().batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION)
+    val_data = val_data.batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+
+    # We train for a max number of iterations so need to calculate how many 'epochs' this will be.
+    training_steps_max = np.sum(training_steps_list)
+    training_epoch_max = int(np.ceil(training_steps_max / FLAGS.eval_step_interval))
+
+    # Callbacks.
+    train_dir = Path(FLAGS.train_dir) / "best"
+    train_dir.mkdir(parents=True, exist_ok=True)
+    model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
+        filepath=(train_dir / (FLAGS.model_architecture + "_{val_accuracy:.3f}_ckpt")),
+        save_weights_only=True,
+        monitor='val_accuracy',
+        mode='max',
+        save_best_only=True)
+    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=FLAGS.summaries_dir)
+
+    # Train the model.
+    model.fit(x=train_data,
+              steps_per_epoch=FLAGS.eval_step_interval,
+              epochs=training_epoch_max,
+              validation_data=val_data,
+              callbacks=[model_checkpoint_callback, tensorboard_callback])
+
+    # Test and save the model.
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING)
+    test_data = test_data.batch(FLAGS.batch_size)
+
+    test_loss, test_acc = model.evaluate(x=test_data)
+    print(f'Final test accuracy: {test_acc*100:.2f}%')
+    model.save(f'saved_model/{FLAGS.model_architecture}')
+    model.save(f'keras/{FLAGS.model_architecture}.h5')
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--background_volume',
+        type=float,
+        default=0.1,
+        help="""\
+        How loud the background noise should be, between 0 and 1.
+        """)
+    parser.add_argument(
+        '--background_frequency',
+        type=float,
+        default=0.8,
+        help="""\
+        How many of the training samples have background noise mixed in.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--time_shift_ms',
+        type=float,
+        default=100.0,
+        help="""\
+        Range to randomly shift the training audio by in time.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--how_many_training_steps',
+        type=str,
+        default='15000,3000',
+        help='How many training loops to run',)
+    parser.add_argument(
+        '--eval_step_interval',
+        type=int,
+        default=400,
+        help='How often to evaluate the training results.')
+    parser.add_argument(
+        '--learning_rate',
+        type=str,
+        default='0.001,0.0001',
+        help='How large a learning rate to use when training.')
+    parser.add_argument(
+        '--batch_size',
+        type=int,
+        default=100,
+        help='How many items to train with at once',)
+    parser.add_argument(
+        '--summaries_dir',
+        type=str,
+        default='/tmp/retrain_logs',
+        help='Where to save summary logs for TensorBoard.')
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--train_dir',
+        type=str,
+        default='/tmp/speech_commands_train',
+        help='Directory to write event logs and checkpoint.')
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+
+    FLAGS, _ = parser.parse_known_args()
+    train()
diff --git a/models/keyword_spotting/ds_cnn_medium/model_package_tf/validation_utils/labels.txt b/models/keyword_spotting/ds_cnn_medium/model_package_tf/validation_utils/labels.txt
new file mode 100644
index 0000000..ba41645
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_medium/model_package_tf/validation_utils/labels.txt
@@ -0,0 +1,12 @@
+_silence_
+_unknown_
+yes
+no
+up
+down
+left
+right
+on
+off
+stop
+go
\ No newline at end of file
diff --git a/models/keyword_spotting/ds_cnn_medium/tflite_int8/README.md b/models/keyword_spotting/ds_cnn_medium/tflite_int8/README.md
deleted file mode 100644
index c675a6f..0000000
--- a/models/keyword_spotting/ds_cnn_medium/tflite_int8/README.md
+++ /dev/null
@@ -1,59 +0,0 @@
-# DS-CNN Medium INT8
-
-## Description
-This is a fully quantized version (asymmetrical int8) of the DS-CNN Medium model developed by Arm, with training checkpoints, from the Hello Edge paper. Code to recreate this model can be found here: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m
-
-## License
-[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
-
-## Related Materials
-### Class Labels
-The class labels associated with this model can be downloaded by running the script `get_class_labels.sh`.
-
-### Model Recreation Code
-Code to recreate this model can be found here: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m.
-
-## Network Information
-| Network Information |  Value         |
-|---------------------|------------------|
-|  Framework          | TensorFlow Lite |
-|  SHA-1 Hash         | 740d32adde16948b2ab45e1e8c856de2925a05eb |
-|  Size (Bytes)       | 186288 |
-|  Provenance         | https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m |
-|  Paper              | https://arxiv.org/abs/1711.07128 |
-
-## Accuracy
-Dataset: Google Speech Commands Test Set
-
-| Metric | Value |
-|--------|-------|
-| Accuracy | 0.941 |
-
-## Performance
-| Platform | Optimized |
-|----------|:---------:|
-| Cortex-A |:heavy_check_mark:          |
-| Cortex-M |:heavy_check_mark: HERO         |
-| Mali GPU |:heavy_check_mark:          |
-| Ethos U  |:heavy_check_mark:          |
-
-### Key
-* :heavy_check_mark: - Will run on this platform.
-* :heavy_multiplication_x: - Will not run on this platform.
-
-
-
-## Optimizations
-| Optimization |  Value  |
-|-----------------|---------|
-| Quantization | INT8 |
-
-## Network Inputs
-| Input Node Name |  Shape  | Description |
-|-----------------|---------|-------------|
-| input | (1, 490) | The input is a processed MFCCs of shape (1, 490) |
-
-## Network Outputs
-| Output Node Name |  Shape  | Description |
-|------------------|---------|-------------|
-| Identity | (1, 12) | The probability on 12 keywords. |
diff --git a/models/keyword_spotting/ds_cnn_medium/tflite_int8/definition.yaml b/models/keyword_spotting/ds_cnn_medium/tflite_int8/definition.yaml
deleted file mode 100644
index c77867c..0000000
--- a/models/keyword_spotting/ds_cnn_medium/tflite_int8/definition.yaml
+++ /dev/null
@@ -1,45 +0,0 @@
-benchmark:
-  Google Speech Commands test set:
-    Accuracy: 94.13%
-description: 'This is a fully quantized version (asymmetrical int8) of the DS-CNN
-  Medium model developed by Arm, with training checkpoints, from the Hello Edge paper.
-  Code to recreate this model can be found here: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m'
-license:
-- Apache-2.0
-network:
-  file_size_bytes: 186288
-  filename: ds_cnn_m_quantized.tflite
-  framework: TensorFlow Lite
-  hash:
-    algorithm: sha1
-    value: 740d32adde16948b2ab45e1e8c856de2925a05eb
-  provenance: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m
-  quality_level: hero#CORTEX-M
-network_parameters:
-  input_nodes:
-  - description: The input is a processed MFCCs of shape (1, 490)
-    example_input:
-      path: models/keyword_spotting/ds_cnn_medium/tflite_int8/testing_input/input
-    name: input
-    shape:
-    - 1
-    - 490
-  output_nodes:
-  - description: The probability on 12 keywords.
-    name: Identity
-    shape:
-    - 1
-    - 12
-    test_output_path: models/keyword_spotting/ds_cnn_medium/tflite_int8/testing_output/Identity
-operators:
-  TensorFlow Lite:
-  - AVERAGE_POOL_2D
-  - CONV_2D
-  - DEPTHWISE_CONV_2D
-  - DEQUANTIZE
-  - FULLY_CONNECTED
-  - QUANTIZE
-  - RELU
-  - RESHAPE
-  - SOFTMAX
-paper: https://arxiv.org/abs/1711.07128
diff --git a/models/keyword_spotting/ds_cnn_medium/tflite_int8/get_class_labels.sh b/models/keyword_spotting/ds_cnn_medium/tflite_int8/get_class_labels.sh
deleted file mode 100755
index e59caf5..0000000
--- a/models/keyword_spotting/ds_cnn_medium/tflite_int8/get_class_labels.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (C) 2021 Arm Limited or its affiliates. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the License); you may
-# not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an AS IS BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#!/usr/bin/env bash
-
-wget https://raw.githubusercontent.com/ARM-software/ML-KWS-for-MCU/e9cf319e9aa2ff71d433e111477dd95329fb94cb/Pretrained_models/labels.txt
-mv labels.txt labelmappings.txt
\ No newline at end of file
diff --git a/models/keyword_spotting/ds_cnn_small/model_package_tf/README.md b/models/keyword_spotting/ds_cnn_small/model_package_tf/README.md
new file mode 100644
index 0000000..077f31c
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_small/model_package_tf/README.md
@@ -0,0 +1,115 @@
+# DS-CNN Small model package
+
+This folder contains code that will allow you to recreate the DS-CNN Small keyword spotting model from
+the [Hello Edge paper](https://arxiv.org/pdf/1711.07128.pdf).
+
+## License
+[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
+
+## Model Package Overview
+| Model           	| DS_CNN_Small                            	  |
+|:---------------:	|:------------------------------------------:|
+| <u>**Format**</u>:          	| Keras, Saved Model, TensorFlow Lite int8, TensorFlow Lite fp32 |
+| <u>**Feature**</u>:         	|   Keyword spotting for Arm Cortex-M CPUs   |
+| <u>**Architectural Delta w.r.t. Vanilla**</u>: |                    None                    |
+| <u>**Domain**</u>:         	|              Keyword spotting              |
+| <u>**Package Quality**</u>: 	|                    Hero                    |
+
+## Model Recreation
+
+In order to recreate the model you will first need to be using ```Python3.7``` and install the requirements in ```requirements.txt```.
+
+Once you have these requirements satisfied you can execute the recreation script contained within this folder, just run:
+
+```bash
+bash ./recreate_model.sh
+```
+
+Running this script will use the pre-trained checkpoint files supplied in the ```./model_archive/model_source/weights``` folder
+to generate the TFLite files and perform evaluation on the test sets. Both an fp32 version and a quantized version will be produced.
+The quantized version will use post-training quantization to fully quantize it.
+
+If you want to run training from scratch you can do this by supplying ```--train``` when running the script. For example:
+
+```bash
+bash ./recreate_model.sh --train
+```
+
+Training is then performed and should produce a model to the stated accuracy in this repository.
+Note that exporting to TFLite will still happen with the pre-trained checkpoint files so you will need to re-run the script
+and this time supply the path to the new checkpoint files you want to use, for example:
+
+```bash
+bash ./recreate_model.sh --ckpt <checkpoint_path>
+```
+
+
+## Training
+
+To train a DNN with 3 fully-connected layers with 128 neurons in each layer, run:
+
+```
+python train.py --model_architecture dnn --model_size_info 128 128 128
+```
+The command line argument *--model_size_info* is used to pass the neural network layer
+dimensions such as number of layers, convolution filter size/stride as a list to models.py,
+which builds the TensorFlow graph based on the provided model architecture
+and layer dimensions. For more info on *model_size_info* for each network architecture see
+[models.py](models.py).
+
+The training commands with all the hyperparameters to reproduce the models shown in the
+[paper](https://arxiv.org/pdf/1711.07128.pdf) are given [here](recreate_model.sh).
+
+## Testing
+To run inference on the trained model from a checkpoint and get accuracy on validation and test sets, run:
+```
+python evaluation.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>
+```
+The parameters used here should match those used in the Training step.
+
+## Optimization
+
+We introduce a new *optional* step to optimize the trained keyword spotting model for deployment.
+
+Here we use TensorFlow's [weight clustering API](https://www.tensorflow.org/model_optimization/guide/clustering) to reduce the compressed model size and optimize inference on supported hardware. 32 weight clusters and kmeans++ cluster intialization method are used as the clustering hyperparameters.
+
+To optimize your trained model (e.g. a DNN), a trained model checkpoint is needed to run clustering and fine-tuning on.
+You can use the pre-trained checkpoints provided, or train your own model and use the resulting checkpoint.
+
+To apply the optimization and fine-tuning, run the following command:
+```
+python optimisations.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>
+```
+The parameters used here should match those used in the Training step, except for the number of training steps.
+The number of training steps is reduced since the optimization step only requires fine-tuning.
+
+This will generate a clustered model checkpoint that can be used in the quantization step to generate a quantized and clustered TFLite model.
+
+## Quantization and TFLite Conversion
+
+As part of the update we now use TensorFlow's
+[post training quantization](https://www.tensorflow.org/lite/performance/post_training_quantization) to
+make quantization of the trained models super simple.
+
+To quantize your trained model (e.g. a DNN) run:
+```
+python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> [--inference_type int8|int16]
+```
+The parameters used here should match those used in the Training step.
+
+The inference_type parameter is *optional* and to be used if a fully quantized model with inputs and outputs of type int8 or int16 is needed. It defaults to fp32.
+
+This step will produce a quantized TFLite file *dnn_quantized.tflite*.
+You can test the accuracy of this quantized model on the test set by running:
+```
+python evaluation.py --tflite_path dnn_quantized.tflite
+```
+The parameters used here should match those used in the Training step.
+
+`convert_to_tflite.py` uses post-training quantization to generate a quantized model by default. If you wish to convert to a floating point TFLite model, use the command below:
+
+```
+python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> --no-quantize
+```
+
+This will produce a floating point TFLite file *dnn.tflite*. You can test the accuracy of this floating point model using `evaluation.py` as above.
diff --git a/models/keyword_spotting/ds_cnn_small/model_package_tf/convert_to_tflite.py b/models/keyword_spotting/ds_cnn_small/model_package_tf/convert_to_tflite.py
new file mode 100644
index 0000000..64ab8df
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_small/model_package_tf/convert_to_tflite.py
@@ -0,0 +1,234 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for converting and quantizing a trained keyword spotting
+   model and saving to TFLite."""
+
+import argparse
+
+import tensorflow as tf
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+from evaluation import tflite_test
+
+NUM_REP_DATA_SAMPLES = 100  # How many samples to use for post training quantization.
+
+
+def convert(model_settings, audio_processor, checkpoint, quantize, inference_type, tflite_path):
+    """Load our trained floating point model and convert it.
+
+    TFLite conversion or post training quantization is performed and the
+    resulting model is saved as a TFLite file.
+    We use samples from the validation set to do post training quantization.
+
+    Args:
+        model_settings: Dictionary of common model settings.
+        audio_processor: Audio processor class object.
+        checkpoint: Path to training checkpoint to load.
+        quantize: Whether to quantize the model or convert to fp32 TFLite model.
+        inference_type: Input/output type of the quantized model.
+        tflite_path: Output TFLite file save path.
+    """
+    model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info, False)
+    model.load_weights(checkpoint).expect_partial()
+
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION).batch(1)
+
+    def _rep_dataset():
+        """Generator function to produce representative dataset."""
+        i = 0
+        for mfcc, label in val_data:
+            if i > NUM_REP_DATA_SAMPLES:
+                break
+            i += 1
+            yield [mfcc]
+
+    if quantize:
+        # Quantize model and save to disk.
+        tflite_model = post_training_quantize(model, inference_type, _rep_dataset)
+        with open(tflite_path, 'wb') as f:
+            f.write(tflite_model)
+        print(f'Quantized model saved to {tflite_path}.')
+    else:
+        converter = tf.lite.TFLiteConverter.from_keras_model(model)
+        tflite_model = converter.convert()
+        with open(tflite_path, 'wb') as f:
+            f.write(tflite_model)
+        print(f'Converted model saved to {tflite_path}.')
+
+
+def post_training_quantize(keras_model, inference_type, rep_dataset):
+    """Perform post training quantization and returns the TFLite model ready for saving.
+
+    See https://www.tensorflow.org/lite/performance/post_training_quantization#full_integer_quantization for
+    more details.
+
+    Args:
+        keras_model: The trained tf Keras model used for post training quantization.
+        inference_type: Input/output type of the quantized model.
+        rep_dataset: Function to use as a representative dataset, must be callable.
+
+    Returns:
+        Quantized TFLite model ready for saving to disk.
+    """
+    converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
+    converter.optimizations = [tf.lite.Optimize.DEFAULT]
+
+    if inference_type == 'int8':
+        converter.inference_input_type = tf.int8
+        converter.inference_output_type = tf.int8
+        supported_ops = tf.lite.OpsSet.TFLITE_BUILTINS_INT8
+    if inference_type == 'int16':
+        converter.inference_input_type = tf.int16
+        converter.inference_output_type = tf.int16
+        supported_ops = tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8
+
+    # Int8 post training quantization needs representative dataset.
+    converter.representative_dataset = rep_dataset
+    converter.target_spec.supported_ops = [supported_ops]
+
+    tflite_model = converter.convert()
+
+    return tflite_model
+
+
+def main():
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    if FLAGS.quantize:
+        tflite_path = f'{FLAGS.model_architecture}_quantized.tflite'
+    else:
+        tflite_path = f'{FLAGS.model_architecture}.tflite'
+
+    # Load floating point model from checkpoint and convert it.
+    convert(model_settings, audio_processor, FLAGS.checkpoint,
+            FLAGS.quantize, FLAGS.inference_type, tflite_path)
+
+    # Test the newly converted model on the test set.
+    tflite_test(model_settings, audio_processor, tflite_path)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+    parser.add_argument(
+        '--checkpoint',
+        type=str,
+        help='Checkpoint to load the weights from.')
+    parser.add_argument(
+        '--quantize',
+        dest='quantize',
+        action="store_true",
+        default=True,
+        help='Whether to quantize the model or convert to fp32 TFLite model. Defaults to True.')
+    parser.add_argument(
+        '--no-quantize',
+        dest='quantize',
+        action="store_false",
+        help='Whether to quantize the model or convert to fp32 TFLite model. Defaults to True.')
+    parser.add_argument(
+        '--inference_type',
+        type=str,
+        default='fp32',
+        help='If quantize is true, whether the model input and output is float32, int8 or int16')
+
+    FLAGS, _ = parser.parse_known_args()
+    main()
diff --git a/models/keyword_spotting/ds_cnn_small/model_package_tf/data_processing/__init__.py b/models/keyword_spotting/ds_cnn_small/model_package_tf/data_processing/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/models/keyword_spotting/ds_cnn_small/model_package_tf/data_processing/data_preprocessing.py b/models/keyword_spotting/ds_cnn_small/model_package_tf/data_processing/data_preprocessing.py
new file mode 100644
index 0000000..05cf5ba
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_small/model_package_tf/data_processing/data_preprocessing.py
@@ -0,0 +1,462 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Modifications Copyright 2023 Arm Inc. All Rights Reserved.
+# Modified to use TensorFlow 2.0 and data pipelines.
+#
+"""Functions for loading and preparing data for keyword spotting."""
+
+import os
+import re
+import sys
+import urllib
+from pathlib import Path
+import tarfile
+import hashlib
+import random
+import math
+from enum import Enum
+
+import numpy as np
+import tensorflow as tf
+from tensorflow.python.ops import gen_audio_ops as audio_ops
+
+MAX_NUM_WAVS_PER_CLASS = 2**27 - 1  # ~134M
+RANDOM_SEED = 59185
+BACKGROUND_NOISE_DIR_NAME = '_background_noise_'
+SILENCE_LABEL = '_silence_'
+SILENCE_INDEX = 0
+UNKNOWN_WORD_INDEX = 1
+UNKNOWN_WORD_LABEL = '_unknown_'
+
+
+def load_wav_file(wav_filename, desired_samples):
+    """Loads and then decodes a given 16bit PCM wav file.
+
+    Decoded audio is scaled to the range [-1, 1] and padded or cropped to the desired number of samples.
+
+    Args:
+        wav_filename: 16bit PCM wav file to load.
+        desired_samples: Number of samples wanted from the audio file.
+
+    Returns:
+        Tuple consisting of the decoded audio and sample rate.
+    """
+    wav_file = tf.io.read_file(wav_filename)
+    decoded_wav = audio_ops.decode_wav(wav_file, desired_channels=1, desired_samples=desired_samples)
+
+    return decoded_wav.audio, decoded_wav.sample_rate
+
+
+def calculate_mfcc(audio_signal, audio_sample_rate, window_size, window_stride, num_mfcc):
+    """Returns Mel Frequency Cepstral Coefficients (MFCC) for a given audio signal.
+
+    Args:
+        audio_signal: Raw audio signal in range [-1, 1]
+        audio_sample_rate: Audio signal sample rate
+        window_size: Window size in samples for calculating spectrogram
+        window_stride: Window stride in samples for calculating spectrogram
+        num_mfcc: The number of MFCC features wanted.
+
+    Returns:
+        Calculated mffc features.
+    """
+    spectrogram = audio_ops.audio_spectrogram(input=audio_signal, window_size=window_size, stride=window_stride,
+                                              magnitude_squared=True)
+
+    mfcc_features = audio_ops.mfcc(spectrogram, audio_sample_rate, dct_coefficient_count=num_mfcc)
+
+    return mfcc_features
+
+
+def which_set(filename, validation_percentage, testing_percentage):
+    """Determines which data partition the file should belong to.
+
+    We want to keep files in the same training, validation, or testing sets even
+    if new ones are added over time. This makes it less likely that testing
+    samples will accidentally be reused in training when long runs are restarted
+    for example. To keep this stability, a hash of the filename is taken and used
+    to determine which set it should belong to. This determination only depends on
+    the name and the set proportions, so it won't change as other files are added.
+    It's also useful to associate particular files as related (for example words
+    spoken by the same person), so anything after '_nohash_' in a filename is
+    ignored for set determination. This ensures that 'bobby_nohash_0.wav' and
+    'bobby_nohash_1.wav' are always in the same set, for example.
+
+    Args:
+        filename: File path of the data sample.
+        validation_percentage: How much of the data set to use for validation.
+        testing_percentage: How much of the data set to use for testing.
+
+    Returns:
+        String, one of 'training', 'validation', or 'testing'.
+    """
+    base_name = os.path.basename(filename)
+    # We want to ignore anything after '_nohash_' in the file name when
+    # deciding which set to put a wav in, so the data set creator has a way of
+    # grouping wavs that are close variations of each other.
+    hash_name = re.sub(r'_nohash_.*$', '', base_name)
+    # This looks a bit magical, but we need to decide whether this file should
+    # go into the training, testing, or validation sets, and we want to keep
+    # existing files in the same set even if more files are subsequently
+    # added.
+    # To do that, we need a stable way of deciding based on just the file name
+    # itself, so we do a hash of that and then use that to generate a
+    # probability value that we use to assign it.
+    hash_name_hashed = hashlib.sha1(tf.compat.as_bytes(hash_name)).hexdigest()
+    percentage_hash = ((int(hash_name_hashed, 16) %
+                       (MAX_NUM_WAVS_PER_CLASS + 1)) *
+                       (100.0 / MAX_NUM_WAVS_PER_CLASS))
+    if percentage_hash < validation_percentage:
+        result = 'validation'
+    elif percentage_hash < (testing_percentage + validation_percentage):
+        result = 'testing'
+    else:
+        result = 'training'
+    return result
+
+
+def prepare_words_list(wanted_words):
+    """Prepends common tokens to the custom word list.
+
+    Args:
+        wanted_words: List of strings containing custom words to spot.
+
+    Returns:
+        List of words with silence and unknown tokens added.
+    """
+    return [SILENCE_LABEL, UNKNOWN_WORD_LABEL] + wanted_words
+
+
+class AudioProcessor:
+    """Handles loading, partitioning, and preparing audio training data."""
+
+    class Modes(Enum):
+        TRAINING = 1
+        VALIDATION = 2
+        TESTING = 3
+
+    def __init__(self, data_url, data_dir, silence_percentage, unknown_percentage,
+                 wanted_words, validation_percentage, testing_percentage, model_settings):
+        self.data_dir = Path(data_dir)
+        self.model_settings = model_settings
+        self.words_list = prepare_words_list(wanted_words)
+
+        self._tf_datasets = {}
+        self.background_data = None
+        self._set_size = {'training': 0, 'validation': 0, 'testing': 0}
+
+        self._download_and_extract_data(data_url, data_dir)
+        self._prepare_datasets(silence_percentage, unknown_percentage, wanted_words,
+                               validation_percentage, testing_percentage)
+        self._prepare_background_data()
+
+    def get_data(self, mode, background_frequency=0, background_volume_range=0, time_shift=0):
+        """Returns the train, validation or test set for KWS as a TF Dataset.
+
+        Args:
+            mode: The set to return, see AudioProcessor.Modes enumeration.
+            background_frequency: How many of the samples have background noise mixed in.
+            background_volume_range: How loud the background noise should be, between 0 and 1.
+            time_shift: Range to randomly shift the training audio by in time.
+
+        Returns:
+            TF dataset that will generate tuples containing an mfcc and corresponding label.
+
+        Raises:
+            ValueError: If mode is not recognised.
+        """
+        if mode == AudioProcessor.Modes.TRAINING:
+            dataset = self._tf_datasets['training']
+        elif mode == AudioProcessor.Modes.VALIDATION:
+            dataset = self._tf_datasets['validation']
+        elif mode == AudioProcessor.Modes.TESTING:
+            dataset = self._tf_datasets['testing']
+        else:
+            ValueError("Incorrect dataset type given")
+
+        use_background = (self.background_data is not None) and (mode == AudioProcessor.Modes.TRAINING)
+        dataset = dataset.map(lambda path, label: self._process_path(path, label, self.model_settings,
+                                                                     background_frequency, background_volume_range,
+                                                                     time_shift, use_background, self.background_data),
+                              num_parallel_calls=tf.data.experimental.AUTOTUNE)
+
+        return dataset
+
+    def set_size(self, mode):
+        """Get the number of samples in the requested dataset partition.
+
+        Args:
+            mode: Which partition, see AudioProcessor.Modes enumeration.
+
+        Returns:
+            Number of samples in the partition.
+
+        Raises:
+            ValueError: If mode is not recognised.
+        """
+        if mode == AudioProcessor.Modes.TRAINING:
+            return self._set_size['training']
+        elif mode == AudioProcessor.Modes.VALIDATION:
+            return self._set_size['validation']
+        elif mode == AudioProcessor.Modes.TESTING:
+            return self._set_size['testing']
+        else:
+            ValueError('Incorrect dataset type given')
+
+    @staticmethod
+    def _process_path(path, label, model_settings, background_frequency, background_volume_range, time_shift_samples,
+                      use_background, background_data):
+        """Load wav files and calculate mfcc features.
+
+        Random shifting of samples and adding in background noise is done within this function as well.
+        This function is meant to be mapped onto a TF Dataset by using a lambda function.
+
+        Args:
+            path: Path to the wav file to load.
+            label: Integer label for classifying the audio clip.
+            model_settings: Dictionary of settings for model being trained.
+            background_frequency: How many clips will have background noise, 0.0 to 1.0.
+            background_volume_range: How loud the background noise will be.
+            time_shift_samples: How much to randomly shift the clips by.
+            use_background: Add in background noise to audio clips or not.
+            background_data: Ragged tensor of loaded background noise samples.
+
+        Returns:
+            Tuple of calculated flattened mfcc and its class label.
+        """
+
+        desired_samples = model_settings['desired_samples']
+        audio, sample_rate = load_wav_file(path, desired_samples=desired_samples)
+
+        # Make our own silence audio data.
+        if label == SILENCE_INDEX:
+            audio = tf.multiply(audio, 0)
+
+        # Shift samples start position and pad any gaps with zeros.
+        if time_shift_samples > 0:
+            time_shift_amount = tf.random.uniform(shape=(), minval=-time_shift_samples, maxval=time_shift_samples,
+                                                  dtype=tf.int32)
+        else:
+            time_shift_amount = 0
+        if time_shift_amount > 0:
+            time_shift_padding = [[time_shift_amount, 0], [0, 0]]
+            time_shift_offset = [0, 0]
+        else:
+            time_shift_padding = [[0, -time_shift_amount], [0, 0]]
+            time_shift_offset = [-time_shift_amount, 0]
+
+        padded_foreground = tf.pad(audio, time_shift_padding, mode='CONSTANT')
+        sliced_foreground = tf.slice(padded_foreground, time_shift_offset, [desired_samples, -1])
+
+        # Get a random section of background noise.
+        if use_background:
+            background_index = tf.random.uniform(shape=(), maxval=background_data.shape[0], dtype=tf.int32)
+            background_sample = background_data[background_index]
+            background_offset = tf.random.uniform(shape=(), maxval=len(background_sample)-desired_samples,
+                                                  dtype=tf.int32)
+            background_clipped = background_sample[background_offset:(background_offset + desired_samples)]
+            background_reshaped = tf.reshape(background_clipped, [desired_samples, 1])
+            if tf.random.uniform(shape=(), maxval=1) < background_frequency:
+                background_volume = tf.random.uniform(shape=(), maxval=background_volume_range)
+            else:
+                background_volume = tf.constant(0, dtype='float32')
+        else:
+            background_reshaped = np.zeros([desired_samples, 1], dtype=np.float32)
+            background_volume = tf.constant(0, dtype='float32')
+
+        # Mix in background noise.
+        background_mul = tf.multiply(background_reshaped, background_volume)
+        background_add = tf.add(background_mul, sliced_foreground)
+        background_clamp = tf.clip_by_value(background_add, -1.0, 1.0)
+
+        mfcc = calculate_mfcc(background_clamp, sample_rate, model_settings['window_size_samples'],
+                              model_settings['window_stride_samples'],
+                              model_settings['dct_coefficient_count'])
+        mfcc = tf.reshape(mfcc, [-1])
+
+        return mfcc, label
+
+    def _download_and_extract_data(self, data_url, target_directory):
+        """Downloads and extracts file to target directory.
+
+        If the file does not already exist download it and then untar into the target directory.
+
+        Args:
+            data_url: Web link to the tarred data to download.
+            target_directory: Directory to download and extract to.
+        """
+        target_directory = Path(target_directory)
+        target_directory.mkdir(exist_ok=True)
+
+        filename = data_url.split('/')[-1]
+        filepath = target_directory / filename
+
+        if not filepath.exists():
+            def _report_hook(block_num, block_size, total_size):
+                """Function to track download progress in urllib"""
+                read_so_far = block_num * block_size
+                percent = (read_so_far / total_size) * 100.0
+
+                s = f"\rDownloading {filename} {percent:.1f}%"
+
+                sys.stdout.write(s)
+                sys.stdout.flush()
+
+            filepath, _ = urllib.request.urlretrieve(data_url, filepath, _report_hook)
+            print()
+
+        print(f'Untarring {filename}...')
+        tarfile.open(filepath, 'r:gz').extractall(target_directory)
+
+    def _prepare_datasets(self, silence_percentage, unknown_percentage, wanted_words,
+                          validation_percentage, testing_percentage):
+        """Split the data into train, validation and testing sets.
+
+        Silence and unknown data is added, then sets are converted to TF Datasets.
+
+        Args:
+            silence_percentage: Percent of words should be silence.
+            unknown_percentage: Percent of words that should be unknown.
+            wanted_words: List of words wanted to classify.
+            validation_percentage: Percent to split off for validation.
+            testing_percentage: Percent to split off for testing.
+        """
+        # Make sure the shuffling and picking of unknowns is deterministic.
+        random.seed(RANDOM_SEED)
+        wanted_words_index = {}
+
+        for index, wanted_word in enumerate(wanted_words):
+            wanted_words_index[wanted_word] = index + 2
+
+        # Find all wav files in subfolders.
+        search_path = self.data_dir / '*' / '*.wav'
+        data_index, unknown_index, all_words = self._find_and_sort_wavs(search_path, validation_percentage,
+                                                                        testing_percentage, wanted_words_index)
+
+        for index, wanted_word in enumerate(wanted_words):
+            if wanted_word not in all_words:
+                raise Exception(f'Tried to find {wanted_word} in labels but only found: {", ".join(all_words.keys())}')
+
+        word_to_index = {}
+        for word in all_words:
+            if word in wanted_words_index:
+                word_to_index[word] = wanted_words_index[word]
+            else:
+                word_to_index[word] = UNKNOWN_WORD_INDEX
+        word_to_index[SILENCE_LABEL] = SILENCE_INDEX
+
+        # We need an arbitrary file to load as the input for the silence samples.
+        # It's multiplied by zero later, so the content doesn't matter.
+        silence_wav_path = data_index['training'][0]['file']
+        for set_index in ['validation', 'testing', 'training']:
+            set_size = len(data_index[set_index])  # Size before adding silence and unknown samples.
+            silence_size = int(math.ceil(set_size * silence_percentage / 100))
+            for _ in range(silence_size):
+                data_index[set_index].append({
+                    'label': SILENCE_LABEL,
+                    'file': silence_wav_path
+                })
+            # Pick some unknowns to add to each partition of the data set.
+            random.shuffle(unknown_index[set_index])
+            unknown_size = int(math.ceil(set_size * unknown_percentage / 100))
+            data_index[set_index].extend(unknown_index[set_index][:unknown_size])
+
+            self._set_size[set_index] = len(data_index[set_index])  # Size after adding silence and unknown samples.
+
+            # Make sure the ordering is random.
+            random.shuffle(data_index[set_index])
+
+            # Transform into TF Datasets ready for easier processing later.
+            labels, paths = list(zip(*[d.values() for d in data_index[set_index]]))
+            labels = [word_to_index[label] for label in labels]
+            self._tf_datasets[set_index] = tf.data.Dataset.from_tensor_slices((list(paths), labels))
+
+    def _find_and_sort_wavs(self, search_pattern, validation_percentage, testing_percentage, wanted_words_index):
+        """Find and sort wav files into known and unknown word sets.
+
+        Known words are files containing words in the list of wanted words.
+        Any other clip goes to the unknown label set. Labels come from the folder names.
+        All clips are also assigned to train, test and validation sets.
+
+        Args:
+            search_pattern: Path pattern used by glob to find wav files.
+            validation_percentage: Percent to split off for validation.
+            testing_percentage: Percent to split off for testing.
+            wanted_words_index: Dict mapping wanted words to their label index.
+
+        Returns:
+            3-tuple of known words, unknown words and mapping of all word labels.
+        """
+        data_index = {'validation': [], 'testing': [], 'training': []}
+        unknown_index = {'validation': [], 'testing': [], 'training': []}
+        all_words = {}
+
+        for wav_path in sorted(tf.io.gfile.glob(str(search_pattern))):
+            word = Path(wav_path).parent.name.lower()
+
+            # Treat the '_background_noise_' folder as a special case, since we expect
+            # it to contain long audio samples we mix in to improve training.
+            if word == BACKGROUND_NOISE_DIR_NAME:
+                continue
+
+            all_words[word] = True
+            set_index = which_set(wav_path, validation_percentage, testing_percentage)
+            # If it's a known class, store its detail, otherwise add it to the list
+            # we'll use to train the unknown label.
+            if word in wanted_words_index:
+                data_index[set_index].append({'label': word, 'file': wav_path})
+            else:
+                unknown_index[set_index].append({'label': word, 'file': wav_path})
+        if not all_words:
+            raise Exception('No .wavs found at ' + str(search_pattern))
+
+        return data_index, unknown_index, all_words
+
+    def _prepare_background_data(self):
+        """Searches a folder for background noise audio, and loads it into memory.
+
+        It's expected that the background audio samples will be in a subdirectory
+        named '_background_noise_' inside the 'data_dir' folder, as .wavs that match
+        the sample rate of the training data, but can be much longer in duration.
+
+        If the '_background_noise_' folder doesn't exist at all, this isn't an
+        error, it's just taken to mean that no background noise augmentation should
+        be used. If the folder does exist, but it's empty, that's treated as an
+        error.
+
+        Returns:
+          Ragged tensor of raw PCM-encoded audio samples of background noise.
+          None if '_background_noise_' folder doesnt exist.
+
+        Raises:
+          Exception: If files aren't found in the folder.
+        """
+        background_data = []
+        background_dir = Path(self.data_dir / BACKGROUND_NOISE_DIR_NAME)
+        if not background_dir.exists():
+            self.background_data = None
+            return
+
+        search_path = Path(background_dir / '*.wav')
+        for wav_path in tf.io.gfile.glob(str(search_path)):
+            wav_data, _ = load_wav_file(wav_path, desired_samples=-1)
+            background_data.append(tf.reshape(wav_data, [-1]))
+
+        if not background_data:
+            raise Exception('No background wav files were found in ' + str(search_path))
+
+        # Ragged tensor as we cant use lists in tf dataset map functions.
+        self.background_data = tf.ragged.stack(background_data)
diff --git a/models/keyword_spotting/ds_cnn_small/model_package_tf/ds_cnn_s_inference_keras.py b/models/keyword_spotting/ds_cnn_small/model_package_tf/ds_cnn_s_inference_keras.py
new file mode 100644
index 0000000..db7694a
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_small/model_package_tf/ds_cnn_s_inference_keras.py
@@ -0,0 +1,76 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from data_processing.data_preprocessing import load_wav_file, calculate_mfcc
+
+import tensorflow as tf
+import argparse
+
+
+def load_labels(filename):
+    """Read in labels, one label per line."""
+    f = open(filename, "r")
+    return f.read().splitlines()
+
+
+def main():
+    window_size_samples = int(FLAGS.sample_rate * FLAGS.window_size_ms / 1000)
+    window_stride_samples = int(FLAGS.sample_rate * FLAGS.window_stride_ms / 1000)
+    decoded, sample = load_wav_file(FLAGS.wav, FLAGS.sample_rate)
+    x = calculate_mfcc(decoded, sample, window_size_samples, window_stride_samples, FLAGS.dct_coefficient_count)
+    x = tf.reshape(x, [1, -1])
+
+    model = tf.keras.models.load_model(FLAGS.keras_file_path)
+    predictions = model.predict(x)
+
+    # Sort to show labels in order of confidence
+    top_k = predictions[0].argsort()[-1:][::-1]
+    for node_id in top_k:
+        human_string = load_labels(FLAGS.labels)[int(node_id)]
+        score = predictions[0,node_id]
+        print(f'model predicted: {human_string} with score {score:.5f}')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--wav', type=str, default='', help='Audio file to be identified.')
+    parser.add_argument(
+        '--labels', type=str, default='', help='Path to file containing labels.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs', )
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint', )
+    parser.add_argument(
+        '--keras_file_path',
+        type=str,
+        default='',
+        help='Path to the .h5 Keras model file to use for testing.')
+    FLAGS, unparsed = parser.parse_known_args()
+    main()
diff --git a/models/keyword_spotting/ds_cnn_small/model_package_tf/ds_cnn_s_inference_tflite.py b/models/keyword_spotting/ds_cnn_small/model_package_tf/ds_cnn_s_inference_tflite.py
new file mode 100644
index 0000000..9f79d99
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_small/model_package_tf/ds_cnn_s_inference_tflite.py
@@ -0,0 +1,120 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from data_processing.data_preprocessing import load_wav_file, calculate_mfcc
+
+import tensorflow as tf
+import numpy as np
+import argparse
+
+
+def tflite_inference(input_data, tflite_path):
+    """Call forwards pass of TFLite file and returns the result.
+
+    Args:
+        input_data: Input data to use on forward pass.
+        tflite_path: Path to TFLite file to run.
+
+    Returns:
+        Output from inference.
+    """
+    supported_quant_dtypes = (np.int8, np.int16)
+    interpreter = tf.lite.Interpreter(model_path=tflite_path)
+    interpreter.allocate_tensors()
+
+    input_details = interpreter.get_input_details()
+    output_details = interpreter.get_output_details()
+
+    input_dtype = input_details[0]["dtype"]
+    output_dtype = output_details[0]["dtype"]
+
+    # Check if the input/output type is quantized,
+    # set scale and zero-point accordingly
+    if input_dtype in supported_quant_dtypes:
+        input_scale, input_zero_point = input_details[0]["quantization"]
+    else:
+        input_scale, input_zero_point = 1, 0
+
+    input_data = input_data / input_scale + input_zero_point
+    input_data = np.round(input_data) if input_dtype in supported_quant_dtypes else input_data
+
+    if output_dtype in supported_quant_dtypes:
+        output_scale, output_zero_point = output_details[0]["quantization"]
+    else:
+        output_scale, output_zero_point = 1, 0
+
+    interpreter.set_tensor(input_details[0]['index'], tf.cast(input_data, input_dtype))
+    interpreter.invoke()
+
+    output_data = interpreter.get_tensor(output_details[0]['index'])
+
+    output_data = output_scale * (output_data.astype(np.float32) - output_zero_point)
+
+    return output_data
+
+
+def load_labels(filename):
+    """Read in labels, one label per line."""
+    f = open(filename, "r")
+    return f.read().splitlines()
+
+
+def main():
+    window_size_samples = int(FLAGS.sample_rate * FLAGS.window_size_ms / 1000)
+    window_stride_samples = int(FLAGS.sample_rate * FLAGS.window_stride_ms / 1000)
+    decoded, sample = load_wav_file(FLAGS.wav, FLAGS.sample_rate)
+    x = calculate_mfcc(decoded, sample, window_size_samples, window_stride_samples, FLAGS.dct_coefficient_count)
+    x = tf.reshape(x, [1, -1])
+    predictions = tflite_inference(x, FLAGS.tflite_path)
+
+    # Sort to show labels in order of confidence
+    top_k = predictions[0].argsort()[-1:][::-1]
+    for node_id in top_k:
+        human_string = load_labels(FLAGS.labels)[int(node_id)]
+        score = predictions[0,node_id]
+        print(f'model predicted: {human_string} with score {score:.5f}')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--wav', type=str, default='', help='Audio file to be identified.')
+    parser.add_argument(
+        '--labels', type=str, default='', help='Path to file containing labels.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs', )
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is', )
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint', )
+    parser.add_argument(
+        '--tflite_path',
+        type=str,
+        default='',
+        help='Path to TFLite file to use for testing.')
+    FLAGS, unparsed = parser.parse_known_args()
+    main()
diff --git a/models/keyword_spotting/ds_cnn_small/model_package_tf/evaluation.py b/models/keyword_spotting/ds_cnn_small/model_package_tf/evaluation.py
new file mode 100644
index 0000000..9488d35
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_small/model_package_tf/evaluation.py
@@ -0,0 +1,250 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for testing trained keyword spotting models from checkpoint files and TFLite files."""
+
+import argparse
+
+import numpy as np
+import tensorflow as tf
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+from ds_cnn_s_inference_tflite import tflite_inference
+
+
+def tflite_test(model_settings, audio_processor, tflite_path):
+    """Calculate accuracy and confusion matrices on the validation and test sets.
+
+    A TFLite model is used for doing testing.
+
+    Args:
+        model_settings: Dictionary of common model settings.
+        audio_processor: Audio processor class object.
+        tflite_path: Path to TFLite file to use for inference.
+    """
+    # Evaluate on validation set.
+    print("Running TFLite evaluation on validation set...")
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION).batch(1)
+    expected_indices = np.concatenate([y for x, y in val_data])
+    predicted_indices = []
+
+    for mfcc, label in val_data:
+        prediction = tflite_inference(mfcc, tflite_path)
+        predicted_indices.append(np.squeeze(tf.argmax(prediction, axis=1)))
+
+    val_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+
+    print(confusion_matrix.numpy())
+    print(f'Validation accuracy = {val_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.VALIDATION)})')
+
+    # Evaluate on testing set.
+    print("Running TFLite evaluation on test set...")
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING).batch(1)
+    expected_indices = np.concatenate([y for x, y in test_data])
+    predicted_indices = []
+
+    for mfcc, label in test_data:
+        prediction = tflite_inference(mfcc, tflite_path)
+        predicted_indices.append(np.squeeze(tf.argmax(prediction, axis=1)))
+
+    test_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+
+    print(confusion_matrix.numpy())
+    print(f'Test accuracy = {test_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.TESTING)})')
+
+
+def keras_test(model_settings, audio_processor, model):
+    """Calculate accuracy and confusion matrices on the validation and test sets.
+
+    A loaded keras model is used for doing testing.
+
+    Args:
+        model_settings: Dictionary of common model settings.
+        audio_processor: Audio processor class object.
+        model: Loaded keras model.
+    """
+    # Evaluate on validation set.
+    print("Running TF evaluation on validation set...")
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION).batch(FLAGS.batch_size)
+    expected_indices = np.concatenate([y for x, y in val_data])
+
+    predictions = model.predict(val_data)
+    predicted_indices = tf.argmax(predictions, axis=1)
+
+    val_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+    print(confusion_matrix.numpy())
+    print(f'Validation accuracy = {val_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.VALIDATION)})')
+
+    # Evaluate on testing set.
+    print("Running TF evaluation on test set...")
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING).batch(FLAGS.batch_size)
+    expected_indices = np.concatenate([y for x, y in test_data])
+
+    predictions = model.predict(test_data)
+    predicted_indices = tf.argmax(predictions, axis=1)
+
+    test_accuracy = calculate_accuracy(predicted_indices, expected_indices)
+    confusion_matrix = tf.math.confusion_matrix(expected_indices, predicted_indices,
+                                                num_classes=model_settings['label_count'])
+    print(confusion_matrix.numpy())
+    print(f'Test accuracy = {test_accuracy * 100:.2f}%'
+          f'(N={audio_processor.set_size(audio_processor.Modes.TESTING)})')
+
+
+def calculate_accuracy(predicted_indices, expected_indices):
+    """Calculates and returns accuracy.
+
+    Args:
+        predicted_indices: List of predicted integer indices.
+        expected_indices: List of expected integer indices.
+
+    Returns:
+        Accuracy value between 0 and 1.
+    """
+    correct_prediction = tf.equal(predicted_indices, expected_indices)
+    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
+    return accuracy
+
+
+def evaluate():
+    """Calculate accuracy and confusion matrices on validation and test sets.
+
+    Model is created and weights loaded from supplied command line arguments.
+    """
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    if FLAGS.tflite_path:
+        tflite_test(model_settings, audio_processor, FLAGS.tflite_path)
+
+    if FLAGS.checkpoint:
+        model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info, False)
+        model.load_weights(FLAGS.checkpoint).expect_partial()
+        keras_test(model_settings, audio_processor, model)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--batch_size',
+        type=int,
+        default=100,
+        help='How many items to train with at once',)
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--checkpoint',
+        type=str,
+        help='Checkpoint to load the weights from')
+    parser.add_argument(
+        '--tflite_path',
+        type=str,
+        help='Path to TFLite file to use for evaluation')
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+
+    FLAGS, _ = parser.parse_known_args()
+    evaluate()
diff --git a/models/keyword_spotting/ds_cnn_small/model_package_tf/how_to_guidance.ipynb b/models/keyword_spotting/ds_cnn_small/model_package_tf/how_to_guidance.ipynb
new file mode 100644
index 0000000..1391914
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_small/model_package_tf/how_to_guidance.ipynb
@@ -0,0 +1,428 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Copyright (C) 2023 Arm Limited or its affiliates. All rights reserved.\n",
+    "#\n",
+    "# SPDX-License-Identifier: Apache-2.0\n",
+    "#\n",
+    "# Licensed under the Apache License, Version 2.0 (the License); you may\n",
+    "# not use this file except in compliance with the License.\n",
+    "# You may obtain a copy of the License at\n",
+    "#\n",
+    "# www.apache.org/licenses/LICENSE-2.0\n",
+    "#\n",
+    "# Unless required by applicable law or agreed to in writing, software\n",
+    "# distributed under the License is distributed on an AS IS BASIS, WITHOUT\n",
+    "# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+    "# See the License for the specific language governing permissions and\n",
+    "# limitations under the License."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# DS_CNN_Small - Hero\n",
+    "\n",
+    "Here we reproduce the models with our established codebase and ModelPackage approach for your convenience.\n",
+    "\n",
+    "## Model-Package Overview:\n",
+    "\n",
+    "| Model           \t| DS_CNN_Small                            \t|\n",
+    "|:---------------:\t|:---------------------------------------------------------------:\t|\n",
+    "| <u>**Format**</u>:          \t| Keras, Saved Model, TensorFlow Lite int8, TensorFlow Lite fp32 |\n",
+    "| <u>**Feature**</u>:         \t| Keyword spotting for Arm Cortex-M CPUs |\n",
+    "| <u>**Architectural Delta w.r.t. Vanilla**</u>: | None |\n",
+    "| <u>**Domain**</u>:         \t| Keyword spotting |\n",
+    "| <u>**Package Quality**</u>: \t| Hero |"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Table of contents <a name=\"index_page\"></a>\n",
+    "\n",
+    "This how-to guidance presents the key steps to reproduce everything in this package. The contents are organised as below. We provided the internal navigation links for users to easy-jump among different sections.  \n",
+    "\n",
+    "    \n",
+    "* [1.0 Model recreation](#model_recreation)\n",
+    "\n",
+    "* [2.0 Training](#training)\n",
+    "\n",
+    "* [3.0 Testing](#testing)\n",
+    "\n",
+    "* [4.0 Optimization](#optimization)\n",
+    "\n",
+    "* [5.0 Quantization and TFLite conversion](#tflite_conversion)\n",
+    "\n",
+    "* [6.0 Inference the TFLite model files](#tflite_inference)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1.0 Model Recreation<a name=\"model_recreation\"></a>\n",
+    "\n",
+    "In order to recreate the model you will first need to be using ```Python3.7``` and install the requirements in ```requirements.txt```.\n",
+    "\n",
+    "Once you have these requirements satisfied you can execute the recreation script contained within this folder, just run:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-01-31 12:04:29.102214: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "Untarring speech_commands_v0.02.tar.gz...\n",
+      "2023-01-31 12:05:19.918303: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1\n",
+      "2023-01-31 12:05:19.952173: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 12:05:19.952211: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 12:05:19.971851: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublas.so.11\n",
+      "2023-01-31 12:05:19.971921: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublasLt.so.11\n",
+      "2023-01-31 12:05:19.974596: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcufft.so.10\n",
+      "2023-01-31 12:05:19.974884: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcurand.so.10\n",
+      "2023-01-31 12:05:19.975441: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusolver.so.11\n",
+      "2023-01-31 12:05:19.976147: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusparse.so.11\n",
+      "2023-01-31 12:05:19.976295: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudnn.so.8\n",
+      "2023-01-31 12:05:19.976755: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 12:05:19.977035: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2023-01-31 12:05:19.977720: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 12:05:19.978052: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 12:05:19.978106: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 12:05:20.390120: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 12:05:20.390158: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 12:05:20.390167: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 12:05:20.390683: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 11007 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.\n",
+      "2023-01-31 12:05:22.730373: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.\n",
+      "2023-01-31 12:05:24.433377: I tensorflow/core/grappler/devices.cc:69] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1\n",
+      "2023-01-31 12:05:24.433576: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session\n",
+      "2023-01-31 12:05:24.434021: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 12:05:24.434280: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 12:05:24.434312: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 12:05:24.434324: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 12:05:24.434333: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 12:05:24.434616: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 11007 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "2023-01-31 12:05:24.451559: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 3492140000 Hz\n",
+      "2023-01-31 12:05:24.458087: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1144] Optimization results for grappler item: graph_to_optimize\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.014ms.\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.001ms.\n",
+      "\n",
+      "2023-01-31 12:05:24.730913: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:345] Ignored output_format.\n",
+      "2023-01-31 12:05:24.730951: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:348] Ignored drop_control_dependency.\n",
+      "2023-01-31 12:05:24.736446: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:210] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n",
+      "2023-01-31 12:05:24.739564: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 12:05:24.739849: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 12:05:24.739885: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 12:05:24.739895: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 12:05:24.739902: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 12:05:24.740218: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 11007 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "Converted model saved to ds_cnn.tflite.\n",
+      "Running TFLite evaluation on validation set...\n",
+      "2023-01-31 12:05:24.804992: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)\n",
+      "[[371   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  0 311   1   9   2   9   8   4  11   3   4   9]\n",
+      " [  0   5 387   1   0   0   3   0   0   0   0   1]\n",
+      " [  0  11   5 372   1   6   5   0   0   0   0   6]\n",
+      " [  0   4   0   0 327   0   2   0   1  10   6   0]\n",
+      " [  0   2   2   6   0 360   0   1   1   0   1   4]\n",
+      " [  0   1   7   0   3   1 333   5   0   0   0   2]\n",
+      " [  0   5   0   1   0   0   5 350   1   0   0   1]\n",
+      " [  1   5   0   1   4   1   0   1 343   7   0   0]\n",
+      " [  0   1   1   1  16   0   2   1   5 343   1   2]\n",
+      " [  1   2   0   0   9   1   0   0   0   3 334   0]\n",
+      " [  0  15   0  14   1   6   0   0   0   2   3 331]]\n",
+      "Validation accuracy = 93.63%(N=4445)\n",
+      "Running TFLite evaluation on test set...\n",
+      "[[408   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  0 354   5   8   4   2   7   8   6   0   5   9]\n",
+      " [  0   5 404   1   0   0   9   0   0   0   0   0]\n",
+      " [  0   4   1 380   0   7   3   0   0   0   0  10]\n",
+      " [  0   4   0   0 396   1   1   0   2  14   4   3]\n",
+      " [  0  12   1   9   0 376   2   0   1   0   1   4]\n",
+      " [  0   2   7   1   1   0 399   1   0   0   1   0]\n",
+      " [  0  10   0   0   1   1   6 376   0   0   2   0]\n",
+      " [  0   7   1   0   4   0   0   0 364  16   1   3]\n",
+      " [  1   5   1   3  12   0   1   0   2 369   1   7]\n",
+      " [  0   1   0   1   4   2   1   0   1   1 397   3]\n",
+      " [  0   3   2  18   1   5   1   0   0   2   2 368]]\n",
+      "Test accuracy = 93.89%(N=4890)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2023-01-31 12:05:46.655980: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "Untarring speech_commands_v0.02.tar.gz...\n",
+      "2023-01-31 12:06:37.310206: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1\n",
+      "2023-01-31 12:06:37.346033: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 12:06:37.346068: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 12:06:37.365782: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublas.so.11\n",
+      "2023-01-31 12:06:37.365855: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublasLt.so.11\n",
+      "2023-01-31 12:06:37.368622: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcufft.so.10\n",
+      "2023-01-31 12:06:37.368939: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcurand.so.10\n",
+      "2023-01-31 12:06:37.369500: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusolver.so.11\n",
+      "2023-01-31 12:06:37.370276: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcusparse.so.11\n",
+      "2023-01-31 12:06:37.370427: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudnn.so.8\n",
+      "2023-01-31 12:06:37.370808: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 12:06:37.371101: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2023-01-31 12:06:37.371913: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 12:06:37.372648: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 12:06:37.372708: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n",
+      "2023-01-31 12:06:37.810221: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 12:06:37.810261: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 12:06:37.810269: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 12:06:37.810782: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 11007 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.\n",
+      "2023-01-31 12:06:40.113450: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.\n",
+      "2023-01-31 12:06:41.895930: I tensorflow/core/grappler/devices.cc:69] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1\n",
+      "2023-01-31 12:06:41.896029: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session\n",
+      "2023-01-31 12:06:41.896600: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 12:06:41.896861: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 12:06:41.896892: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 12:06:41.896901: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 12:06:41.896909: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 12:06:41.897198: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 11007 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "2023-01-31 12:06:41.915523: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 3492140000 Hz\n",
+      "2023-01-31 12:06:41.922229: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1144] Optimization results for grappler item: graph_to_optimize\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.019ms.\n",
+      "  function_optimizer: function_optimizer did nothing. time = 0.003ms.\n",
+      "\n",
+      "2023-01-31 12:06:42.074632: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:345] Ignored output_format.\n",
+      "2023-01-31 12:06:42.074672: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:348] Ignored drop_control_dependency.\n",
+      "2023-01-31 12:06:42.079631: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:210] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n",
+      "2023-01-31 12:06:42.082664: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: \n",
+      "pciBusID: 0000:03:00.0 name: NVIDIA TITAN Xp computeCapability: 6.1\n",
+      "coreClock: 1.582GHz coreCount: 30 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 510.07GiB/s\n",
+      "2023-01-31 12:06:42.082962: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1871] Adding visible gpu devices: 0\n",
+      "2023-01-31 12:06:42.083001: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
+      "2023-01-31 12:06:42.083013: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 \n",
+      "2023-01-31 12:06:42.083021: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N \n",
+      "2023-01-31 12:06:42.083360: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1418] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 11007 MB memory) -> physical GPU (device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:03:00.0, compute capability: 6.1)\n",
+      "2023-01-31 12:06:42.114217: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)\n",
+      "fully_quantize: 0, inference_type: 6, input_inference_type: 9, output_inference_type: 9\n",
+      "Quantized model saved to ds_cnn_quantized.tflite.\n",
+      "Running TFLite evaluation on validation set...\n",
+      "[[371   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  0 308   4   6   1  11   5   6  11   4   5  10]\n",
+      " [  0   5 379   0   0   1   9   0   0   1   0   2]\n",
+      " [  0  10   3 369   3   6   6   0   1   2   0   6]\n",
+      " [  0   5   0   1 315   0   3   0   2  13  10   1]\n",
+      " [  0   5   2   9   0 347   0   1   3   1   5   4]\n",
+      " [  0   2   5   1   2   1 335   4   0   1   1   0]\n",
+      " [  0   7   0   1   2   0   7 342   1   1   1   1]\n",
+      " [  1   4   0   1   6   1   0   0 343   6   1   0]\n",
+      " [  0   2   0   1  22   0   1   0   6 336   2   3]\n",
+      " [  1   4   0   0  14   0   0   0   0   1 328   2]\n",
+      " [  0  12   0  16   2   9   0   0   1   2   4 326]]\n",
+      "Validation accuracy = 92.22%(N=4445)\n",
+      "Running TFLite evaluation on test set...\n",
+      "[[408   0   0   0   0   0   0   0   0   0   0   0]\n",
+      " [  0 347   3   8   5   5   8   8   9   1   6   8]\n",
+      " [  0   7 399   2   0   1   8   0   0   1   1   0]\n",
+      " [  0   4   1 377   4   7   2   0   0   0   1   9]\n",
+      " [  0   5   1   0 390   1   1   1   2  14   6   4]\n",
+      " [  0  15   0  12   2 361   4   0   2   1   1   8]\n",
+      " [  0   6   5   2   4   0 393   2   0   0   0   0]\n",
+      " [  0   9   0   0   5   0  10 365   1   1   2   3]\n",
+      " [  0   9   0   1   6   1   3   1 357  15   1   2]\n",
+      " [  0   4   1   2  15   0   1   0   2 369   1   7]\n",
+      " [  0   1   0   2   4   3   2   0   1   3 393   2]\n",
+      " [  0   5   2  21   3   7   2   0   0   3   1 358]]\n",
+      "Test accuracy = 92.37%(N=4890)\n"
+     ]
+    }
+   ],
+   "source": [
+    "!bash ./recreate_model.sh"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Running this script will use the pre-trained checkpoint files supplied in the ```./model_archive/model_source/weights``` folder to generate the TFLite files and perform evaluation on the test set. Both an fp32 version and a quantized version will be produced. The quantized version will use post-training quantization to fully quantize it.\n",
+    "\n",
+    "If you want to run training from scratch you can do this by supplying ```--train``` when running the script. For example:\n",
+    "\n",
+    "```bash\n",
+    "bash ./recreate_model.sh --train\n",
+    "```\n",
+    "\n",
+    "Training is then performed and should produce a model to the stated accuracy in this repository. Note that exporting to TFLite will still happen with the baseline pre-trained checkpoint files, so you will need to re-run the script and this time supply the path to the new checkpoint files you want to use, for example:\n",
+    "\n",
+    "```bash\n",
+    "bash ./recreate_model.sh --ckpt <checkpoint_path>\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2.0 Training<a name=\"training\"></a>\n",
+    "\n",
+    "The training scripts can be used to recreate any of the models from the [Hello Edge paper](https://arxiv.org/pdf/1711.07128.pdf) provided the right hyperparameters are used. The training commands with all the hyperparameters to reproduce the model in this repository are given [here](recreate_model.sh). The model in this part of the repository represents just one variation of the models from the paper, other varieties are covered in other parts of the repository.\n",
+    "\n",
+    "\n",
+    "As a general example of how to train a DNN with 3 fully-connected layers with 128 neurons in each layer, run:\n",
+    "```\n",
+    "python train.py --model_architecture dnn --model_size_info 128 128 128\n",
+    "```\n",
+    "\n",
+    "The command line argument *--model_size_info* is used to pass the neural network layer\n",
+    "dimensions such as number of layers, convolution filter size/stride as a list to models.py,\n",
+    "which builds the TensorFlow graph based on the provided model architecture\n",
+    "and layer dimensions. For more info on *model_size_info* for each network architecture see\n",
+    "[models.py](model_core_utils/models.py).\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3.0 Testing<a name=\"testing\"></a>\n",
+    "To run inference on the trained model from a checkpoint and get accuracy on validation and test sets, run:\n",
+    "```\n",
+    "python evaluation.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>\n",
+    "```\n",
+    "**The model and feature extraction parameters passed to this script should match those used in the Training step.**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4.0 Optimization<a name=\"optimization\"></a>\n",
+    "\n",
+    "We introduce an *optional* step to optimize the trained keyword spotting model for deployment.\n",
+    "\n",
+    "Here we use TensorFlow's [weight clustering API](https://www.tensorflow.org/model_optimization/guide/clustering) to reduce the compressed model size and optimize inference on supported hardware. 32 weight clusters and kmeans++ cluster intialization method are used as the clustering hyperparameters.\n",
+    "\n",
+    "To optimize your trained model (e.g. a DNN), a trained model checkpoint is needed to run clustering and fine-tuning on.\n",
+    "You can use the pre-trained checkpoints provided, or train your own model and use the resulting checkpoint.\n",
+    "\n",
+    "To apply the optimization and fine-tuning, run the following command:\n",
+    "```\n",
+    "python optimisations.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path>\n",
+    "```\n",
+    "**The model and feature extraction parameters used here should match those used in the Training step, except for the number of training steps.\n",
+    "The number of training steps is reduced since the optimization step only requires fine-tuning.**\n",
+    "\n",
+    "This will generate a clustered model checkpoint that can be used in the quantization step to generate a quantized and clustered TFLite model."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5.0 Quantization and TFLite Conversion<a name=\"tflite_conversion\"></a>\n",
+    "\n",
+    "You can now use TensorFlow's\n",
+    "[post training quantization](https://www.tensorflow.org/lite/performance/post_training_quantization) to\n",
+    "make quantization of the trained models super simple.\n",
+    "\n",
+    "To quantize your trained model (e.g. a DNN) run:\n",
+    "```\n",
+    "python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> [--inference_type int8|int16]\n",
+    "```\n",
+    "**The model and feature extraction parameters used here should match those used in the Training step.**\n",
+    "\n",
+    "The ```inference_type``` parameter is *optional* and to be used if a fully quantized model with inputs and outputs of type int8 or int16 is needed. It defaults to fp32.\n",
+    "\n",
+    "In this example, this step will produce a quantized TFLite file *dnn_quantized.tflite*."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can test the accuracy of this quantized model on the test set by running:\n",
+    "```\n",
+    "python evaluation.py --tflite_path dnn_quantized.tflite\n",
+    "```\n",
+    "**The model and feature extraction parameters used here should match those used in the Training step.**\n",
+    "\n",
+    "`convert_to_tflite.py` uses post-training quantization to generate a quantized model by default. If you wish to convert to a floating point TFLite model, use the command below:\n",
+    "\n",
+    "```\n",
+    "python convert_to_tflite.py --model_architecture dnn --model_size_info 128 128 128 --checkpoint <checkpoint_path> --no-quantize\n",
+    "```\n",
+    "\n",
+    "This will produce a floating point TFLite file *dnn.tflite*. You can test the accuracy of this floating point model using `evaluation.py` as above.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 6.0 Single inference of the TFLite model files <a name=\"tflite_inference\"></a>\n",
+    "\n",
+    "You can conduct TFLite inference for .fp32 and .int8 model files by using the following command: \n",
+    "\n",
+    "```python ds_cnn_s_inference_tflite.py --labels validation_utils/labels.txt --wav <path_to_wav_file> --tflite_path <path_to_tflite_file>```\n",
+    "\n",
+    "**The feature extraction parameters used here should match those used in the Training step.**\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/README.md b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/README.md
new file mode 100644
index 0000000..b8fbdcb
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/README.md
@@ -0,0 +1,62 @@
+# keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32
+
+## Description
+This is a floating point fp32 version of the DS-CNN Small model developed by Arm, from the Hello Edge paper.
+
+## License
+[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
+
+## Network Information
+| Network Information | Value |
+|---------------------|-------|
+|  Framework          | TensorFlow Lite |
+|  Datatype           | fp32 |
+|  SHA-1 Hash         | 8aadd5126bc0d3371c1b834d027c853e794423c1 |
+|  Size (Bytes)       | 98756 |
+|  Provenance         | https://arxiv.org/abs/1711.07128 |
+|  Training           | Trained by Arm |
+|  Paper | https://arxiv.org/abs/1711.07128 |
+
+## DataSet
+| Dataset Information | Value |
+|--------|-------|
+| Name | Google Speech Commands test set |
+
+## Accuracy
+
+| Metric | Value |
+|--------|-------|
+| accuracy | 93.89% |
+
+## HW Support
+| HW Support   | Value |
+|--------------|-------|
+| Cortex-A |:heavy_check_mark:         |
+| Cortex-M |:heavy_check_mark: HERO        |
+| Mali GPU |:heavy_check_mark:         |
+| Ethos U  |:heavy_multiplication_x:          |
+
+### Key
+* :heavy_check_mark: - Will run on this platform.
+* :heavy_multiplication_x: - Will not run on this platform.
+
+## Network Quality
+| Network Quality         | Value |
+|-------------------------|-------|
+|  Recreate               | :heavy_check_mark:    |
+|  Quality level          | Hero    |
+|  Vanilla                | :heavy_check_mark:    |
+|  Clustered              | :heavy_multiplication_x:    |
+|  Pruned                 | :heavy_multiplication_x:    |
+|  Quantization - default | :heavy_multiplication_x:    |
+|  Quantization - full    | :heavy_multiplication_x:    |
+
+## Network Inputs
+| Input Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| input | (1, 490) | fp32 | models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input | fp32 | [1, 490] | The input is a processed MFCCs |
+
+## Network Outputs
+| Output Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| Identity | (1, 12) | fp32 | models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity | fp32 | [1, 12] | The probability on 12 keywords |
\ No newline at end of file
diff --git a/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/definition.yaml b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/definition.yaml
new file mode 100644
index 0000000..71aa3f6
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/definition.yaml
@@ -0,0 +1,66 @@
+benchmark:
+  benchmark_metrics:
+    accuracy: 93.89%
+  benchmark_name: Google Speech Commands test set
+description: This is a floating point fp32 version of the DS-CNN Small model developed
+  by Arm, from the Hello Edge paper.
+license:
+- Apache-2.0
+network:
+  datatype: fp32
+  file_size_bytes: 98756
+  filename: ds_cnn_s.tflite
+  framework: TensorFlow Lite
+  hash:
+    algorithm: sha1
+    value: 8aadd5126bc0d3371c1b834d027c853e794423c1
+  provenance: https://arxiv.org/abs/1711.07128
+  training: Trained by Arm
+network_parameters:
+  input_nodes:
+  - description: The input is a processed MFCCs of shape (1, 490)
+    example_input:
+      path: models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input
+      shape:
+      - 1
+      - 490
+      type: fp32
+      use_case: Random input for model regression.
+    input_datatype: fp32
+    name: input
+    shape:
+    - 1
+    - 490
+  output_nodes:
+  - description: The probability on 12 keywords.
+    example_output:
+      path: models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity
+      shape:
+      - 1
+      - 12
+      type: fp32
+      use_case: output for model regression.
+    name: Identity
+    output_datatype: fp32
+    shape:
+    - 1
+    - 12
+network_quality:
+  clustered: false
+  is_vanilla: true
+  pruned: false
+  quality_level: Hero
+  quality_level_hero_hw: cortex_m
+  quantization_default: false
+  quantization_full: false
+  recreate: true
+operators:
+  TensorFlow Lite:
+  - AVERAGE_POOL_2D
+  - CONV_2D
+  - DEPTHWISE_CONV_2D
+  - FULLY_CONNECTED
+  - RELU
+  - RESHAPE
+  - SOFTMAX
+paper: https://arxiv.org/abs/1711.07128
\ No newline at end of file
diff --git a/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/ds_cnn_s.tflite b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/ds_cnn_s.tflite
new file mode 100644
index 0000000..3fb7602
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/ds_cnn_s.tflite
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0d302f1f2c53c1344edcde850e28130c0877b60e1567db977292239a9391f59b
+size 98756
diff --git a/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input/0.npy b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input/0.npy
new file mode 100644
index 0000000..27d44a7
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/testing_input/input/0.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1ee7676110faaf59275371c1d6b27097d657f049967840cbd214d62a272fa543
+size 2088
diff --git a/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity/0.npy b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity/0.npy
new file mode 100644
index 0000000..38660ee
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_fp32/testing_output/Identity/0.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fad0cf24907c9eeb36f99fb498f09667e129f1cdbcca9b50cd826e9322b145d1
+size 176
diff --git a/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int16/README.md b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int16/README.md
new file mode 100644
index 0000000..b025116
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int16/README.md
@@ -0,0 +1,62 @@
+# keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int16
+
+## Description
+This is a fully quantized int16 version of the DS-CNN Small model developed by Arm, from the Hello Edge paper.
+
+## License
+[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
+
+## Network Information
+| Network Information | Value |
+|---------------------|-------|
+|  Framework          | TensorFlow Lite |
+|  Datatype           | int16 |
+|  SHA-1 Hash         | e82c7d645bec3dec580a096de0a297c6dd9a6463 |
+|  Size (Bytes)       | 55392 |
+|  Provenance         | https://github.com/ARM-software/ML-examples/tree/main/tflu-kws-cortex-m |
+|  Training           | Trained by Arm |
+|  Paper | https://arxiv.org/abs/1711.07128 |
+
+## DataSet
+| Dataset Information | Value |
+|--------|-------|
+| Name | Google Speech Commands test set |
+
+## Accuracy
+
+| Metric | Value |
+|--------|-------|
+| Accuracy | 93.39% |
+
+## HW Support
+| HW Support   | Value |
+|--------------|-------|
+| Cortex-A |:heavy_check_mark:          |
+| Cortex-M |:heavy_check_mark: HERO         |
+| Mali GPU |:heavy_check_mark:          |
+| Ethos U  |:heavy_check_mark:          |
+
+### Key
+* :heavy_check_mark: - Will run on this platform.
+* :heavy_multiplication_x: - Will not run on this platform.
+
+## Network Quality
+| Network Quality         | Value |
+|-------------------------|-------|
+|  Recreate               | :heavy_check_mark:    |
+|  Quality level          | Hero    |
+|  Vanilla                | :heavy_check_mark:    |
+|  Clustered              | :heavy_multiplication_x:    |
+|  Pruned                 | :heavy_multiplication_x:    |
+|  Quantization - default | :heavy_multiplication_x:    |
+|  Quantization - full    | :heavy_check_mark:    |
+
+## Network Inputs
+| Input Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| serving_default_input:0 | (1, 490) | int16 | models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int16/testing_input | int16 | [1, 490] | The input is a processed MFCCs |
+
+## Network Outputs
+| Output Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| StatefulPartitionedCall:0 | (1, 12) | int16 | models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int16/testing_output | int16 | [1, 12] | The probability on 12 keywords |
\ No newline at end of file
diff --git a/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int16/definition.yaml b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int16/definition.yaml
new file mode 100644
index 0000000..730a6cc
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int16/definition.yaml
@@ -0,0 +1,66 @@
+benchmark:
+  benchmark_metrics:
+    Accuracy: 93.39%
+  benchmark_name: Google Speech Commands test set
+description: This is a fully quantized int16 version of the DS-CNN Small model developed
+  by Arm, from the Hello Edge paper.
+license:
+- Apache-2.0
+network:
+  datatype: int16
+  file_size_bytes: 55392
+  filename: ds_cnn_s_quantized_int16.tflite
+  framework: TensorFlow Lite
+  hash:
+    algorithm: sha1
+    value: e82c7d645bec3dec580a096de0a297c6dd9a6463
+  provenance: https://arxiv.org/abs/1711.07128
+  training: Trained by Arm
+network_parameters:
+  input_nodes:
+  - description: The input is a processed MFCCs of shape (1, 490)
+    example_input:
+      path: models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int16/testing_input
+      shape:
+      - 1
+      - 490
+      type: int16
+      use_case: Random input for model regression.
+    input_datatype: int16
+    name: serving_default_input:0
+    shape:
+    - 1
+    - 490
+  output_nodes:
+  - description: The probability on 12 keywords.
+    example_output:
+      path: models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int16/testing_output
+      shape:
+      - 1
+      - 12
+      type: int16
+      use_case: output for model regression.
+    name: StatefulPartitionedCall:0
+    output_datatype: int16
+    shape:
+    - 1
+    - 12
+network_quality:
+  clustered: false
+  is_vanilla: true
+  pruned: false
+  quality_level: Hero
+  quality_level_hero_hw: cortex_m
+  quantization_default: false
+  quantization_full: true
+  recreate: true
+operators:
+  TensorFlow Lite:
+  - AVERAGE_POOL_2D
+  - CONV_2D
+  - DEPTHWISE_CONV_2D
+  - FULLY_CONNECTED
+  - RELU
+  - RESHAPE
+  - SOFTMAX
+paper: https://arxiv.org/abs/1711.07128
\ No newline at end of file
diff --git a/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int16/ds_cnn_s_quantized_int16.tflite b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int16/ds_cnn_s_quantized_int16.tflite
new file mode 100644
index 0000000..d3d56fe
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int16/ds_cnn_s_quantized_int16.tflite
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e80b231d6848e6de69d70d36a17f9bb64022ae46d9957b1f6972b6527f943186
+size 55392
diff --git a/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int16/testing_input/0.npy b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int16/testing_input/0.npy
new file mode 100644
index 0000000..797c2b0
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int16/testing_input/0.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e24c5c602a9c74776927198465769dc6e80645663bf7604ae45aed0586a066a
+size 1108
diff --git a/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int16/testing_output/0.npy b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int16/testing_output/0.npy
new file mode 100644
index 0000000..4e37127
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int16/testing_output/0.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:397aff56a28c4e81818c117ae49b216ad8ae501c3612b7abac2cdf9f45ccbf44
+size 152
diff --git a/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int8/README.md b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int8/README.md
new file mode 100644
index 0000000..3e9a6cc
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int8/README.md
@@ -0,0 +1,62 @@
+# keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int8
+
+## Description
+This is a fully quantized int8 version of the DS-CNN Small model developed by Arm, from the Hello Edge paper.
+
+## License
+[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
+
+## Network Information
+| Network Information | Value |
+|---------------------|-------|
+|  Framework          | TensorFlow Lite |
+|  Datatype           | int8 |
+|  SHA-1 Hash         | cf24429e86a9647b1632c382894bc68d26d34039 |
+|  Size (Bytes)       | 47616 |
+|  Provenance         | https://arxiv.org/abs/1711.07128 |
+|  Training           | Trained by Arm |
+|  Paper | https://arxiv.org/abs/1711.07128 |
+
+## DataSet
+| Dataset Information | Value |
+|--------|-------|
+| Name | Google Speech Commands test set |
+
+## Accuracy
+
+| Metric | Value |
+|--------|-------|
+| Accuracy | 93.11% |
+
+## HW Support
+| HW Support   | Value |
+|--------------|-------|
+| Cortex-A |:heavy_check_mark:          |
+| Cortex-M |:heavy_check_mark: HERO         |
+| Mali GPU |:heavy_check_mark:          |
+| Ethos U  |:heavy_check_mark:          |
+
+### Key
+* :heavy_check_mark: - Will run on this platform.
+* :heavy_multiplication_x: - Will not run on this platform.
+
+## Network Quality
+| Network Quality         | Value |
+|-------------------------|-------|
+|  Recreate               | :heavy_check_mark:    |
+|  Quality level          | Hero    |
+|  Vanilla                | :heavy_check_mark:    |
+|  Clustered              | :heavy_multiplication_x:    |
+|  Pruned                 | :heavy_multiplication_x:    |
+|  Quantization - default | :heavy_multiplication_x:    |
+|  Quantization - full    | :heavy_check_mark:    |
+
+## Network Inputs
+| Input Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| input | (1, 490) | int8 | models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input | int8 | [1, 490] | The input is a processed MFCCs |
+
+## Network Outputs
+| Output Node Name | Shape | Type | Example Path | Example Type | Example Shape | Example Use Case |
+|-----------------|-------|-------|--------------|-------|-------|-----------------|
+| Identity | (1, 12) | int8 | models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity | int8 | [1, 12] | The probability on 12 keywords |
\ No newline at end of file
diff --git a/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int8/definition.yaml b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int8/definition.yaml
new file mode 100644
index 0000000..6d2f978
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int8/definition.yaml
@@ -0,0 +1,66 @@
+benchmark:
+  benchmark_metrics:
+    Accuracy: 93.11%
+  benchmark_name: Google Speech Commands test set
+description: This is a fully quantized int8 version of the DS-CNN Small model developed
+  by Arm, from the Hello Edge paper.
+license:
+- Apache-2.0
+network:
+  datatype: int8
+  file_size_bytes: 47616
+  filename: ds_cnn_s_quantized.tflite
+  framework: TensorFlow Lite
+  hash:
+    algorithm: sha1
+    value: cf24429e86a9647b1632c382894bc68d26d34039
+  provenance: https://arxiv.org/abs/1711.07128
+  training: Trained by Arm
+network_parameters:
+  input_nodes:
+  - description: The input is a processed MFCCs of shape (1, 490)
+    example_input:
+      path: models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input
+      shape:
+      - 1
+      - 490
+      type: int8
+      use_case: Random input for model regression.
+    input_datatype: int8
+    name: input
+    shape:
+    - 1
+    - 490
+  output_nodes:
+  - description: The probability on 12 keywords.
+    example_output:
+      path: models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity
+      shape:
+      - 1
+      - 12
+      type: int8
+      use_case: output for model regression.
+    name: Identity
+    output_datatype: int8
+    shape:
+    - 1
+    - 12
+network_quality:
+  clustered: false
+  is_vanilla: true
+  pruned: false
+  quality_level: Hero
+  quality_level_hero_hw: cortex_m
+  quantization_default: false
+  quantization_full: true
+  recreate: true
+operators:
+  TensorFlow Lite:
+  - AVERAGE_POOL_2D
+  - CONV_2D
+  - DEPTHWISE_CONV_2D
+  - FULLY_CONNECTED
+  - RELU
+  - RESHAPE
+  - SOFTMAX
+paper: https://arxiv.org/abs/1711.07128
\ No newline at end of file
diff --git a/models/keyword_spotting/ds_cnn_small/tflite_int8/ds_cnn_s_quantized.tflite b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int8/ds_cnn_s_quantized.tflite
similarity index 100%
rename from models/keyword_spotting/ds_cnn_small/tflite_int8/ds_cnn_s_quantized.tflite
rename to models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int8/ds_cnn_s_quantized.tflite
diff --git a/models/keyword_spotting/ds_cnn_small/tflite_int8/testing_input/input/0.npy b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input/0.npy
similarity index 100%
rename from models/keyword_spotting/ds_cnn_small/tflite_int8/testing_input/input/0.npy
rename to models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int8/testing_input/input/0.npy
diff --git a/models/keyword_spotting/ds_cnn_small/tflite_int8/testing_output/Identity/0.npy b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity/0.npy
similarity index 100%
rename from models/keyword_spotting/ds_cnn_small/tflite_int8/testing_output/Identity/0.npy
rename to models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/TFLite/tflite_int8/testing_output/Identity/0.npy
diff --git a/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/model_source/saved_model/ds_cnn_small/keras_metadata.pb b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/model_source/saved_model/ds_cnn_small/keras_metadata.pb
new file mode 100644
index 0000000..a265c82
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/model_source/saved_model/ds_cnn_small/keras_metadata.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:edda8ec1a48de025c96dfcef1163b343f69616f516a6fec12279e71c5a58b4d2
+size 65399
diff --git a/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/model_source/saved_model/ds_cnn_small/saved_model.pb b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/model_source/saved_model/ds_cnn_small/saved_model.pb
new file mode 100644
index 0000000..3fd736c
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/model_source/saved_model/ds_cnn_small/saved_model.pb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ef43701d6901c7fa2452cf5390d2198b7ba14a3e5f41d10385ec152f0631349
+size 708163
diff --git a/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/model_source/saved_model/ds_cnn_small/variables/variables.data-00000-of-00001 b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/model_source/saved_model/ds_cnn_small/variables/variables.data-00000-of-00001
new file mode 100644
index 0000000..4217bf8
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/model_source/saved_model/ds_cnn_small/variables/variables.data-00000-of-00001
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5cee02f3a1e371e6de9e2192600842bd92be832739233b8bdeaf6f3f3b9f1e73
+size 118118
diff --git a/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/model_source/saved_model/ds_cnn_small/variables/variables.index b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/model_source/saved_model/ds_cnn_small/variables/variables.index
new file mode 100644
index 0000000..364f025
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/model_source/saved_model/ds_cnn_small/variables/variables.index
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e77718d9698810a79c1ce8989db07245c69ae8d0277c5337703e3f32c6a863f5
+size 3570
diff --git a/models/keyword_spotting/ds_cnn_small/tflite_int8/ckpt/checkpoint b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/model_source/weights/checkpoint
similarity index 100%
rename from models/keyword_spotting/ds_cnn_small/tflite_int8/ckpt/checkpoint
rename to models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/model_source/weights/checkpoint
diff --git a/models/keyword_spotting/ds_cnn_small/tflite_int8/ckpt/ds_cnn_0.94_ckpt.data-00000-of-00001 b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/model_source/weights/ds_cnn_0.94_ckpt.data-00000-of-00001
similarity index 100%
rename from models/keyword_spotting/ds_cnn_small/tflite_int8/ckpt/ds_cnn_0.94_ckpt.data-00000-of-00001
rename to models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/model_source/weights/ds_cnn_0.94_ckpt.data-00000-of-00001
diff --git a/models/keyword_spotting/ds_cnn_small/tflite_int8/ckpt/ds_cnn_0.94_ckpt.index b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/model_source/weights/ds_cnn_0.94_ckpt.index
similarity index 100%
rename from models/keyword_spotting/ds_cnn_small/tflite_int8/ckpt/ds_cnn_0.94_ckpt.index
rename to models/keyword_spotting/ds_cnn_small/model_package_tf/model_archive/model_source/weights/ds_cnn_0.94_ckpt.index
diff --git a/models/keyword_spotting/ds_cnn_small/model_package_tf/model_core_utils/__init__.py b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_core_utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/models/keyword_spotting/ds_cnn_small/model_package_tf/model_core_utils/models.py b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_core_utils/models.py
new file mode 100644
index 0000000..1978136
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_small/model_package_tf/model_core_utils/models.py
@@ -0,0 +1,327 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Model definitions for simple keyword spotting."""
+
+import math
+
+import tensorflow as tf
+
+
+def prepare_model_settings(label_count, sample_rate, clip_duration_ms,
+                           window_size_ms, window_stride_ms,
+                           dct_coefficient_count):
+    """Calculates common settings needed for all models.
+
+    Args:
+        label_count: How many classes are to be recognized.
+        sample_rate: Number of audio samples per second.
+        clip_duration_ms: Length of each audio clip to be analyzed.
+        window_size_ms: Duration of frequency analysis window.
+        window_stride_ms: How far to move in time between frequency windows.
+        dct_coefficient_count: Number of frequency bins to use for analysis.
+
+    Returns:
+        Dictionary containing common settings.
+    """
+    desired_samples = int(sample_rate * clip_duration_ms / 1000)
+    window_size_samples = int(sample_rate * window_size_ms / 1000)
+    window_stride_samples = int(sample_rate * window_stride_ms / 1000)
+    length_minus_window = (desired_samples - window_size_samples)
+    if length_minus_window < 0:
+        spectrogram_length = 0
+    else:
+        spectrogram_length = 1 + int(length_minus_window / window_stride_samples)
+    fingerprint_size = dct_coefficient_count * spectrogram_length
+
+    return {
+        'desired_samples': desired_samples,
+        'window_size_samples': window_size_samples,
+        'window_stride_samples': window_stride_samples,
+        'spectrogram_length': spectrogram_length,
+        'dct_coefficient_count': dct_coefficient_count,
+        'fingerprint_size': fingerprint_size,
+        'label_count': label_count,
+        'sample_rate': sample_rate,
+    }
+
+
+def create_model(model_settings, model_architecture, model_size_info, is_training):
+    """Builds a tf.keras model of the requested architecture compatible with the settings.
+
+    Args:
+        model_settings: Dictionary of information about the model.
+        model_architecture: String specifying which kind of model to create.
+        model_size_info: Array with specific information for the chosen architecture
+            (e.g convolutional parameters, number of layers).
+
+    Returns:
+        A tf.keras Model with the requested architecture.
+
+    Raises:
+        Exception: If the architecture type isn't recognized.
+    """
+
+    if model_architecture == 'dnn':
+        return create_dnn_model(model_settings, model_size_info)
+
+    elif model_architecture == 'cnn':
+        return create_cnn_model(model_settings, model_size_info)
+
+    elif model_architecture == 'ds_cnn':
+        return create_ds_cnn_model(model_settings, model_size_info)
+    elif model_architecture == 'single_fc':
+        return create_single_fc_model(model_settings)
+    elif model_architecture == 'basic_lstm':
+        return create_basic_lstm_model(model_settings, model_size_info, is_training)
+    else:
+        raise Exception(f'model_architecture argument {model_architecture} not recognized'
+                        f', should be one of, "dnn", "cnn", "ds_cnn" ')
+
+
+def create_single_fc_model(model_settings):
+    """Builds a model with a single fully-connected layer.
+
+    For details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+
+    Returns:
+        tf.keras Model of the 'SINGLE_FC' architecture.
+    """
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size'],), name='input')
+    # Fully connected layer
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(inputs)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_basic_lstm_model(model_settings, model_size_info, is_training):
+    """Builds a model with a basic lstm layer.
+
+        For details see https://arxiv.org/abs/1711.07128.
+
+        Args:
+            model_settings: Dict of different settings for model training.
+            model_size_info: Length of the array defines the number of hidden-layers and
+                each element in the array represent the number of neurons in that layer.
+            is_training: Determining whether the use of the model is for training or for something else.
+
+        Returns:
+            tf.keras Model of the 'Basic_LSTM' architecture.
+        """
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size'], ), name='input')
+
+    input_frequency_size = model_settings['dct_coefficient_count']
+    input_time_size = model_settings['spectrogram_length']
+    x = tf.reshape(inputs, shape=(-1, input_time_size, input_frequency_size))
+
+    # LSTM layer, and unrolling depending on whether you are training or not
+    if is_training:
+        x = tf.keras.layers.LSTM(units=model_size_info[0], time_major=False, unroll=False)(x)
+    else:
+        x = tf.keras.layers.LSTM(units=model_size_info[0], time_major=False, unroll=True)(x)
+
+    # Outputs a fully connected layer
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_dnn_model(model_settings, model_size_info):
+    """Builds a model with multiple hidden fully-connected layers.
+
+    For details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+        model_size_info: Length of the array defines the number of hidden-layers and
+            each element in the array represent the number of neurons in that layer.
+
+    Returns:
+        tf.keras Model of the 'DNN' architecture.
+    """
+
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size'], ), name='input')
+
+    # First fully connected layer.
+    x = tf.keras.layers.Dense(units=model_size_info[0], activation='relu')(inputs)
+
+    # Hidden layers with ReLU activations.
+    for i in range(1, len(model_size_info)):
+        x = tf.keras.layers.Dense(units=model_size_info[i], activation='relu')(x)
+
+    # Output fully connected layer.
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_cnn_model(model_settings, model_size_info):
+    """Builds a model with 2 convolution layers followed by a linear layer and a hidden fully-connected layer.
+
+    For details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+        model_size_info: Defines the first and second convolution parameters in
+            {number of conv features, conv filter height, width, stride in y,x dir.},
+            followed by linear layer size and fully-connected layer size.
+
+    Returns:
+        tf.keras Model of the 'CNN' architecture.
+    """
+
+    input_frequency_size = model_settings['dct_coefficient_count']
+    input_time_size = model_settings['spectrogram_length']
+
+    first_filter_count = model_size_info[0]
+    first_filter_height = model_size_info[1]  # Time axis.
+    first_filter_width = model_size_info[2]  # Frequency axis.
+    first_filter_stride_y = model_size_info[3]  # Time axis.
+    first_filter_stride_x = model_size_info[4]  # Frequency_axis.
+
+    second_filter_count = model_size_info[5]
+    second_filter_height = model_size_info[6]  # Time axis.
+    second_filter_width = model_size_info[7]  # Frequency axis.
+    second_filter_stride_y = model_size_info[8]  # Time axis.
+    second_filter_stride_x = model_size_info[9]  # Frequency axis.
+
+    linear_layer_size = model_size_info[10]
+    fc_size = model_size_info[11]
+
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size']), name='input')
+
+    # Reshape the flattened input.
+    x = tf.reshape(inputs, shape=(-1, input_time_size, input_frequency_size, 1))
+
+    # First convolution.
+    x = tf.keras.layers.Conv2D(filters=first_filter_count,
+                               kernel_size=(first_filter_height, first_filter_width),
+                               strides=(first_filter_stride_y, first_filter_stride_x),
+                               padding='VALID')(x)
+    x = tf.keras.layers.BatchNormalization()(x)
+    x = tf.keras.layers.ReLU()(x)
+    x = tf.keras.layers.Dropout(rate=0)(x)
+
+    # Second convolution.
+    x = tf.keras.layers.Conv2D(filters=second_filter_count,
+                               kernel_size=(second_filter_height, second_filter_width),
+                               strides=(second_filter_stride_y, second_filter_stride_x),
+                               padding='VALID')(x)
+    x = tf.keras.layers.BatchNormalization()(x)
+    x = tf.keras.layers.ReLU()(x)
+    x = tf.keras.layers.Dropout(rate=0)(x)
+
+    # Flatten for fully connected layers.
+    x = tf.keras.layers.Flatten()(x)
+
+    # Fully connected layer with no activation.
+    x = tf.keras.layers.Dense(units=linear_layer_size)(x)
+
+    # Fully connected layer with ReLU activation.
+    x = tf.keras.layers.Dense(units=fc_size)(x)
+    x = tf.keras.layers.BatchNormalization()(x)
+    x = tf.keras.layers.ReLU()(x)
+    x = tf.keras.layers.Dropout(rate=0)(x)
+
+    # Output fully connected.
+    output = tf.keras.layers.Dense(units=model_settings['label_count'], activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
+
+
+def create_ds_cnn_model(model_settings, model_size_info):
+    """Builds a model with convolutional & depthwise separable convolutional layers.
+
+    For more details see https://arxiv.org/abs/1711.07128.
+
+    Args:
+        model_settings: Dict of different settings for model training.
+        model_size_info: Defines number of layers, followed by the DS-Conv layer
+            parameters in the order {number of conv features, conv filter height,
+            width and stride in y,x dir.} for each of the layers.
+
+    Returns:
+        tf.keras Model of the 'DS-CNN' architecture.
+    """
+
+    label_count = model_settings['label_count']
+    input_frequency_size = model_settings['dct_coefficient_count']
+    input_time_size = model_settings['spectrogram_length']
+
+    t_dim = input_time_size
+    f_dim = input_frequency_size
+
+    # Extract model dimensions from model_size_info.
+    num_layers = model_size_info[0]
+    conv_feat = [None]*num_layers
+    conv_kt = [None]*num_layers
+    conv_kf = [None]*num_layers
+    conv_st = [None]*num_layers
+    conv_sf = [None]*num_layers
+
+    i = 1
+    for layer_no in range(0, num_layers):
+        conv_feat[layer_no] = model_size_info[i]
+        i += 1
+        conv_kt[layer_no] = model_size_info[i]
+        i += 1
+        conv_kf[layer_no] = model_size_info[i]
+        i += 1
+        conv_st[layer_no] = model_size_info[i]
+        i += 1
+        conv_sf[layer_no] = model_size_info[i]
+        i += 1
+
+    inputs = tf.keras.Input(shape=(model_settings['fingerprint_size']), name='input')
+
+    # Reshape the flattened input.
+    x = tf.reshape(inputs, shape=(-1, input_time_size, input_frequency_size, 1))
+
+    # Depthwise separable convolutions.
+    for layer_no in range(0, num_layers):
+        if layer_no == 0:
+            # First convolution.
+            x = tf.keras.layers.Conv2D(filters=conv_feat[0],
+                                       kernel_size=(conv_kt[0], conv_kf[0]),
+                                       strides=(conv_st[0], conv_sf[0]),
+                                       padding='SAME')(x)
+            x = tf.keras.layers.BatchNormalization()(x)
+            x = tf.keras.layers.ReLU()(x)
+        else:
+            # Depthwise convolution.
+            x = tf.keras.layers.DepthwiseConv2D(kernel_size=(conv_kt[layer_no], conv_kf[layer_no]),
+                                                strides=(conv_sf[layer_no], conv_st[layer_no]),
+                                                padding='SAME')(x)
+            x = tf.keras.layers.BatchNormalization()(x)
+            x = tf.keras.layers.ReLU()(x)
+
+            # Pointwise convolution.
+            x = tf.keras.layers.Conv2D(filters=conv_feat[layer_no], kernel_size=(1, 1))(x)
+            x = tf.keras.layers.BatchNormalization()(x)
+            x = tf.keras.layers.ReLU()(x)
+
+        t_dim = math.ceil(t_dim/float(conv_st[layer_no]))
+        f_dim = math.ceil(f_dim/float(conv_sf[layer_no]))
+
+    # Global average pool.
+    x = tf.keras.layers.AveragePooling2D(pool_size=(t_dim, f_dim), strides=1)(x)
+
+    # Squeeze before passing to output fully connected layer.
+    x = tf.reshape(x, shape=(-1, conv_feat[layer_no]))
+
+    # Output connected layer.
+    output = tf.keras.layers.Dense(units=label_count, activation='softmax')(x)
+
+    return tf.keras.Model(inputs, output)
diff --git a/models/keyword_spotting/ds_cnn_small/model_package_tf/optimisations.py b/models/keyword_spotting/ds_cnn_small/model_package_tf/optimisations.py
new file mode 100644
index 0000000..16b6f4c
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_small/model_package_tf/optimisations.py
@@ -0,0 +1,259 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for optimizing simple keyword spotting models using clustering API."""
+
+import argparse
+from pathlib import Path
+
+import tensorflow as tf
+import numpy as np
+import tensorflow_model_optimization as tfmot
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+
+
+def print_model_weight_clusters(model):
+
+    for layer in model.layers:
+        if isinstance(layer, tf.keras.layers.Wrapper):
+            weights = layer.trainable_weights
+        else:
+            weights = layer.weights
+        for weight in weights:
+            if "kernel" in weight.name:
+                unique_count = len(np.unique(weight))
+                print(
+                    f"{layer.name}/{weight.name}: {unique_count} clusters "
+                )
+
+
+def optimize():
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    # Create the model to optimize from checkpoint.
+    model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info)
+    model.load_weights(FLAGS.checkpoint).expect_partial()
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    # We decay learning rate in a constant piecewise way to help learning.
+    training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(',')))
+    learning_rates_list = list(map(float, FLAGS.learning_rate.split(',')))
+    lr_boundary_list = training_steps_list[:-1]  # Only need the values at which to change lr.
+    lr_schedule = tf.keras.optimizers.schedules.PiecewiseConstantDecay(boundaries=lr_boundary_list,
+                                                                       values=learning_rates_list)
+
+    cluster_weights = tfmot.clustering.keras.cluster_weights
+    CentroidInitialization = tfmot.clustering.keras.CentroidInitialization
+
+    clustering_params = {
+        'number_of_clusters': 32,
+        'cluster_centroids_init': CentroidInitialization.KMEANS_PLUS_PLUS}
+
+    clustered_model = cluster_weights(model, **clustering_params)
+
+    # Specify the optimizer configurations.
+    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
+    clustered_model.compile(optimizer=optimizer,
+                            loss=tf.keras.losses.SparseCategoricalCrossentropy(),
+                            metrics=['accuracy'])
+
+    train_data = audio_processor.get_data(audio_processor.Modes.TRAINING,
+                                          FLAGS.background_frequency, FLAGS.background_volume,
+                                          int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000))
+    train_data = train_data.repeat().batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION)
+    val_data = val_data.batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+
+    # We train for a max number of iterations so need to calculate how many 'epochs' this will be.
+    training_steps_max = np.sum(training_steps_list)
+    training_epoch_max = int(np.ceil(training_steps_max / FLAGS.eval_step_interval))
+
+    # Train the model with clustering applied.
+    clustered_model.fit(x=train_data,
+                        steps_per_epoch=FLAGS.eval_step_interval,
+                        epochs=training_epoch_max,
+                        validation_data=val_data)
+
+    stripped_clustered_model = tfmot.clustering.keras.strip_clustering(clustered_model)
+
+    print_model_weight_clusters(stripped_clustered_model)
+
+    # Save the clustered model weights
+    train_dir = Path(FLAGS.train_dir) / "optimized"
+    train_dir.mkdir(parents=True, exist_ok=True)
+
+    stripped_clustered_model.save_weights((train_dir /
+                                          (FLAGS.model_architecture +
+                                           "_clustered_ckpt")))
+
+    # Test the model.
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING)
+    test_data = test_data.batch(FLAGS.batch_size)
+
+    stripped_clustered_model.compile(optimizer=optimizer,
+                                     loss=tf.keras.losses.SparseCategoricalCrossentropy(),
+                                     metrics=['accuracy'])
+
+    test_loss, test_acc = stripped_clustered_model.evaluate(x=test_data)
+    print(f'Final test accuracy: {test_acc*100:.2f}%')
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--background_volume',
+        type=float,
+        default=0.1,
+        help="""\
+        How loud the background noise should be, between 0 and 1.
+        """)
+    parser.add_argument(
+        '--background_frequency',
+        type=float,
+        default=0.8,
+        help="""\
+        How many of the training samples have background noise mixed in.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--time_shift_ms',
+        type=float,
+        default=100.0,
+        help="""\
+        Range to randomly shift the training audio by in time.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--how_many_training_steps',
+        type=str,
+        default='3750,750',
+        help='How many training loops to run',)
+    parser.add_argument(
+        '--eval_step_interval',
+        type=int,
+        default=400,
+        help='How often to evaluate the training results.')
+    parser.add_argument(
+        '--learning_rate',
+        type=str,
+        default='0.001,0.0001',
+        help='How large a learning rate to use when training.')
+    parser.add_argument(
+        '--batch_size',
+        type=int,
+        default=100,
+        help='How many items to train with at once',)
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--train_dir',
+        type=str,
+        default='/tmp/speech_commands_train',
+        help='Directory to write event logs and checkpoint.')
+    parser.add_argument(
+        '--save_step_interval',
+        type=int,
+        default=100,
+        help='Save model checkpoint every save_steps.')
+    parser.add_argument(
+        '--checkpoint',
+        type=str,
+        help='Checkpoint to load the weights from before fine-tuning.')
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+
+    FLAGS, _ = parser.parse_known_args()
+    optimize()
diff --git a/models/keyword_spotting/ds_cnn_small/model_package_tf/recreate_model.sh b/models/keyword_spotting/ds_cnn_small/model_package_tf/recreate_model.sh
new file mode 100644
index 0000000..a081905
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_small/model_package_tf/recreate_model.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+# Copyright (C) 2023 Arm Limited or its affiliates. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+ckpt_path=model_archive/model_source/weights/ds_cnn_0.94_ckpt
+train=false
+
+# Parse command line args
+while (( $# >= 1 )); do 
+    case $1 in
+    --ckpt)
+       if [ "$2" ]; then
+           ckpt_path=$2
+           shift
+       else
+           printf 'ERROR: "--ckpt" requires a path to be supplied.\n'
+           exit 1
+       fi
+       ;;
+    --train) 
+    	train=true
+	break;;
+    *) shift;
+    esac;
+done
+
+
+# DS-CNN Small training
+if [ "$train" = true ]
+then
+python train.py --model_architecture ds_cnn --model_size_info 5 64 10 4 2 2 64 3 3 1 1 64 3 3 1 1 64 3 3 1 1 64 3 3 1 1 --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --learning_rate 0.0005,0.0001,0.00002 --how_many_training_steps 10000,10000,10000 --summaries_dir work/DS_CNN/DS_CNN_S/retrain_logs --train_dir work/DS_CNN/DS_CNN_S/training
+fi
+
+# Conversion to TFLite fp32
+python convert_to_tflite.py --model_architecture ds_cnn --model_size_info 5 64 10 4 2 2 64 3 3 1 1 64 3 3 1 1 64 3 3 1 1 64 3 3 1 1 --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --checkpoint $ckpt_path --no-quantize
+
+# Conversion to TFLite int8
+python convert_to_tflite.py --model_architecture ds_cnn --model_size_info 5 64 10 4 2 2 64 3 3 1 1 64 3 3 1 1 64 3 3 1 1 64 3 3 1 1 --dct_coefficient_count 10 --window_size_ms 40 --window_stride_ms 20 --checkpoint $ckpt_path --inference_type int8
+
diff --git a/models/keyword_spotting/ds_cnn_small/model_package_tf/requirements.txt b/models/keyword_spotting/ds_cnn_small/model_package_tf/requirements.txt
new file mode 100644
index 0000000..3448cff
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_small/model_package_tf/requirements.txt
@@ -0,0 +1,3 @@
+numpy == 1.19.5
+tensorflow == 2.5.0
+tensorflow-model-optimization == 0.6.0
\ No newline at end of file
diff --git a/models/keyword_spotting/ds_cnn_small/model_package_tf/train.py b/models/keyword_spotting/ds_cnn_small/model_package_tf/train.py
new file mode 100644
index 0000000..8c488b3
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_small/model_package_tf/train.py
@@ -0,0 +1,227 @@
+# Copyright © 2023 Arm Ltd. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for training simple keyword spotting models."""
+
+import argparse
+from pathlib import Path
+
+import tensorflow as tf
+import numpy as np
+
+from data_processing import data_preprocessing
+from model_core_utils import models
+
+
+def train():
+    model_settings = models.prepare_model_settings(
+        len(data_preprocessing.prepare_words_list(FLAGS.wanted_words.split(','))),
+        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
+        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)
+
+    # Create the model.
+    model = models.create_model(model_settings, FLAGS.model_architecture, FLAGS.model_size_info, True)
+
+    audio_processor = data_preprocessing.AudioProcessor(data_url=FLAGS.data_url,
+                                                        data_dir=FLAGS.data_dir,
+                                                        silence_percentage=FLAGS.silence_percentage,
+                                                        unknown_percentage=FLAGS.unknown_percentage,
+                                                        wanted_words=FLAGS.wanted_words.split(','),
+                                                        validation_percentage=FLAGS.validation_percentage,
+                                                        testing_percentage=FLAGS.testing_percentage,
+                                                        model_settings=model_settings)
+
+    # We decay learning rate in a constant piecewise way to help learning.
+    training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(',')))
+    learning_rates_list = list(map(float, FLAGS.learning_rate.split(',')))
+    lr_boundary_list = training_steps_list[:-1]  # Only need the values at which to change lr.
+    lr_schedule = tf.keras.optimizers.schedules.PiecewiseConstantDecay(boundaries=lr_boundary_list,
+                                                                       values=learning_rates_list)
+
+    # Specify the optimizer configurations.
+    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
+    model.compile(optimizer=optimizer,
+                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
+                  metrics=['accuracy'])
+
+    train_data = audio_processor.get_data(audio_processor.Modes.TRAINING,
+                                          FLAGS.background_frequency, FLAGS.background_volume,
+                                          int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000))
+    train_data = train_data.repeat().batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+    val_data = audio_processor.get_data(audio_processor.Modes.VALIDATION)
+    val_data = val_data.batch(FLAGS.batch_size).prefetch(tf.data.AUTOTUNE)
+
+    # We train for a max number of iterations so need to calculate how many 'epochs' this will be.
+    training_steps_max = np.sum(training_steps_list)
+    training_epoch_max = int(np.ceil(training_steps_max / FLAGS.eval_step_interval))
+
+    # Callbacks.
+    train_dir = Path(FLAGS.train_dir) / "best"
+    train_dir.mkdir(parents=True, exist_ok=True)
+    model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
+        filepath=(train_dir / (FLAGS.model_architecture + "_{val_accuracy:.3f}_ckpt")),
+        save_weights_only=True,
+        monitor='val_accuracy',
+        mode='max',
+        save_best_only=True)
+    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=FLAGS.summaries_dir)
+
+    # Train the model.
+    model.fit(x=train_data,
+              steps_per_epoch=FLAGS.eval_step_interval,
+              epochs=training_epoch_max,
+              validation_data=val_data,
+              callbacks=[model_checkpoint_callback, tensorboard_callback])
+
+    # Test and save the model.
+    test_data = audio_processor.get_data(audio_processor.Modes.TESTING)
+    test_data = test_data.batch(FLAGS.batch_size)
+
+    test_loss, test_acc = model.evaluate(x=test_data)
+    print(f'Final test accuracy: {test_acc*100:.2f}%')
+    model.save(f'saved_model/{FLAGS.model_architecture}')
+    model.save(f'keras/{FLAGS.model_architecture}.h5')
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--data_url',
+        type=str,
+        default='http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz',
+        help='Location of speech training data archive on the web.')
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/speech_dataset/',
+        help="""\
+        Where to download the speech training data to.
+        """)
+    parser.add_argument(
+        '--background_volume',
+        type=float,
+        default=0.1,
+        help="""\
+        How loud the background noise should be, between 0 and 1.
+        """)
+    parser.add_argument(
+        '--background_frequency',
+        type=float,
+        default=0.8,
+        help="""\
+        How many of the training samples have background noise mixed in.
+        """)
+    parser.add_argument(
+        '--silence_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be silence.
+        """)
+    parser.add_argument(
+        '--unknown_percentage',
+        type=float,
+        default=10.0,
+        help="""\
+        How much of the training data should be unknown words.
+        """)
+    parser.add_argument(
+        '--time_shift_ms',
+        type=float,
+        default=100.0,
+        help="""\
+        Range to randomly shift the training audio by in time.
+        """)
+    parser.add_argument(
+        '--testing_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a test set.')
+    parser.add_argument(
+        '--validation_percentage',
+        type=int,
+        default=10,
+        help='What percentage of wavs to use as a validation set.')
+    parser.add_argument(
+        '--sample_rate',
+        type=int,
+        default=16000,
+        help='Expected sample rate of the wavs',)
+    parser.add_argument(
+        '--clip_duration_ms',
+        type=int,
+        default=1000,
+        help='Expected duration in milliseconds of the wavs',)
+    parser.add_argument(
+        '--window_size_ms',
+        type=float,
+        default=30.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--window_stride_ms',
+        type=float,
+        default=10.0,
+        help='How long each spectrogram timeslice is',)
+    parser.add_argument(
+        '--dct_coefficient_count',
+        type=int,
+        default=40,
+        help='How many bins to use for the MFCC fingerprint',)
+    parser.add_argument(
+        '--how_many_training_steps',
+        type=str,
+        default='15000,3000',
+        help='How many training loops to run',)
+    parser.add_argument(
+        '--eval_step_interval',
+        type=int,
+        default=400,
+        help='How often to evaluate the training results.')
+    parser.add_argument(
+        '--learning_rate',
+        type=str,
+        default='0.001,0.0001',
+        help='How large a learning rate to use when training.')
+    parser.add_argument(
+        '--batch_size',
+        type=int,
+        default=100,
+        help='How many items to train with at once',)
+    parser.add_argument(
+        '--summaries_dir',
+        type=str,
+        default='/tmp/retrain_logs',
+        help='Where to save summary logs for TensorBoard.')
+    parser.add_argument(
+        '--wanted_words',
+        type=str,
+        default='yes,no,up,down,left,right,on,off,stop,go',
+        help='Words to use (others will be added to an unknown label)',)
+    parser.add_argument(
+        '--train_dir',
+        type=str,
+        default='/tmp/speech_commands_train',
+        help='Directory to write event logs and checkpoint.')
+    parser.add_argument(
+        '--model_architecture',
+        type=str,
+        default='dnn',
+        help='What model architecture to use')
+    parser.add_argument(
+        '--model_size_info',
+        type=int,
+        nargs="+",
+        default=[128, 128, 128],
+        help='Model dimensions - different for various models')
+
+    FLAGS, _ = parser.parse_known_args()
+    train()
diff --git a/models/keyword_spotting/ds_cnn_small/model_package_tf/validation_utils/labels.txt b/models/keyword_spotting/ds_cnn_small/model_package_tf/validation_utils/labels.txt
new file mode 100644
index 0000000..ba41645
--- /dev/null
+++ b/models/keyword_spotting/ds_cnn_small/model_package_tf/validation_utils/labels.txt
@@ -0,0 +1,12 @@
+_silence_
+_unknown_
+yes
+no
+up
+down
+left
+right
+on
+off
+stop
+go
\ No newline at end of file
diff --git a/models/keyword_spotting/ds_cnn_small/tflite_int16/README.md b/models/keyword_spotting/ds_cnn_small/tflite_int16/README.md
deleted file mode 100644
index 26be0bf..0000000
--- a/models/keyword_spotting/ds_cnn_small/tflite_int16/README.md
+++ /dev/null
@@ -1,59 +0,0 @@
-# DS-CNN Small INT16
-
-## Description
-This is a fully quantized version (asymmetrical int16) of the DS-CNN Small model developed by Arm, with training checkpoints, from the Hello Edge paper. Code to recreate this model can be found here: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m
-
-## License
-[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
-
-## Related Materials
-### Class Labels
-The class labels associated with this model can be downloaded by running the script `get_class_labels.sh`.
-
-### Model Recreation Code
-Code to recreate this model can be found here: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m.
-
-## Network Information
-| Network Information |  Value         |
-|---------------------|------------------|
-|  Framework          | TensorFlow Lite |
-|  SHA-1 Hash         | e82c7d645bec3dec580a096de0a297c6dd9a6463  |
-|  Size (Bytes)       | 55392 |
-|  Provenance         | https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m |
-|  Paper              | https://arxiv.org/abs/1711.07128 |
-
-## Accuracy
-Dataset: Google Speech Commands Test Set
-
-| Metric | Value |
-|--------|-------|
-| Accuracy | 0.933 |
-
-## Performance
-| Platform | Optimized |
-|----------|:---------:|
-| Cortex-A |:heavy_check_mark:          |
-| Cortex-M |:heavy_check_mark: HERO         |
-| Mali GPU |:heavy_check_mark:          |
-| Ethos U  |:heavy_check_mark:          |
-
-### Key
-* :heavy_check_mark: - Will run on this platform.
-* :heavy_multiplication_x: - Will not run on this platform.
-
-
-
-## Optimizations
-| Optimization |  Value  |
-|-----------------|---------|
-| Quantization | INT |
-
-## Network Inputs
-| Input Node Name |  Shape  | Description |
-|-----------------|---------|-------------|
-| input | (1, 490) | The input is a processed MFCCs of shape (1, 490) |
-
-## Network Outputs
-| Output Node Name |  Shape  | Description |
-|------------------|---------|-------------|
-| Identity | (1, 12) | The probability on 12 keywords. |
diff --git a/models/keyword_spotting/ds_cnn_small/tflite_int16/ckpt/checkpoint b/models/keyword_spotting/ds_cnn_small/tflite_int16/ckpt/checkpoint
deleted file mode 100644
index 7415b78..0000000
--- a/models/keyword_spotting/ds_cnn_small/tflite_int16/ckpt/checkpoint
+++ /dev/null
@@ -1,2 +0,0 @@
-model_checkpoint_path: "ds_cnn_0.939_ckpt"
-all_model_checkpoint_paths: "ds_cnn_0.939_ckpt"
diff --git a/models/keyword_spotting/ds_cnn_small/tflite_int16/ckpt/ds_cnn_0.939_ckpt.data-00000-of-00001 b/models/keyword_spotting/ds_cnn_small/tflite_int16/ckpt/ds_cnn_0.939_ckpt.data-00000-of-00001
deleted file mode 100644
index d850952..0000000
Binary files a/models/keyword_spotting/ds_cnn_small/tflite_int16/ckpt/ds_cnn_0.939_ckpt.data-00000-of-00001 and /dev/null differ
diff --git a/models/keyword_spotting/ds_cnn_small/tflite_int16/ckpt/ds_cnn_0.939_ckpt.index b/models/keyword_spotting/ds_cnn_small/tflite_int16/ckpt/ds_cnn_0.939_ckpt.index
deleted file mode 100644
index 75f70e3..0000000
Binary files a/models/keyword_spotting/ds_cnn_small/tflite_int16/ckpt/ds_cnn_0.939_ckpt.index and /dev/null differ
diff --git a/models/keyword_spotting/ds_cnn_small/tflite_int16/definition.yaml b/models/keyword_spotting/ds_cnn_small/tflite_int16/definition.yaml
deleted file mode 100644
index 59c1dc7..0000000
--- a/models/keyword_spotting/ds_cnn_small/tflite_int16/definition.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
-benchmark:
-  Google Speech Commands test set:
-    Accuracy: 93.39%
-description: 'This is a fully quantized version (asymmetrical int16) of the DS-CNN
-  Small model developed by Arm, with training checkpoints, from the Hello Edge paper.
-  Code to recreate this model can be found here: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m'
-license:
-- Apache-2.0
-network:
-  file_size_bytes: 55392
-  filename: ds_cnn_quantized.tflite
-  framework: TensorFlow Lite
-  hash:
-    algorithm: sha1
-    value: e82c7d645bec3dec580a096de0a297c6dd9a6463
-  provenance: https://github.com/ARM-software/ML-examples/tree/main/tflu-kws-cortex-m
-network_parameters:
-  input_nodes:
-  - description: The input is a processed MFCCs of shape (1, 490)
-    example_input:
-      path: models/keyword_spotting/ds_cnn/tflite_int16/testing_input/serving_default_input:0
-      shape:
-      - 1
-      - 490
-      type: int16
-      use_case: Random input for model regression.
-    input_datatype: int16
-    name: serving_default_input:0
-    shape:
-    - 1
-    - 490
-  output_nodes:
-  - description: The probability on 12 keywords.
-    name: StatefulPartitionedCall:0
-    output_datatype: int16
-    shape:
-    - 1
-    - 12
-operators:
-  TensorFlow Lite:
-  - AVERAGE_POOL_2D
-  - CONV_2D
-  - DEPTHWISE_CONV_2D
-  - FULLY_CONNECTED
-  - RELU
-  - RESHAPE
-  - SOFTMAX
-paper: https://arxiv.org/abs/1711.07128
diff --git a/models/keyword_spotting/ds_cnn_small/tflite_int16/ds_cnn_quantized.tflite b/models/keyword_spotting/ds_cnn_small/tflite_int16/ds_cnn_quantized.tflite
deleted file mode 100644
index b19b478..0000000
Binary files a/models/keyword_spotting/ds_cnn_small/tflite_int16/ds_cnn_quantized.tflite and /dev/null differ
diff --git a/models/keyword_spotting/ds_cnn_small/tflite_int16/get_class_labels.sh b/models/keyword_spotting/ds_cnn_small/tflite_int16/get_class_labels.sh
deleted file mode 100755
index e59caf5..0000000
--- a/models/keyword_spotting/ds_cnn_small/tflite_int16/get_class_labels.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (C) 2021 Arm Limited or its affiliates. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the License); you may
-# not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an AS IS BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#!/usr/bin/env bash
-
-wget https://raw.githubusercontent.com/ARM-software/ML-KWS-for-MCU/e9cf319e9aa2ff71d433e111477dd95329fb94cb/Pretrained_models/labels.txt
-mv labels.txt labelmappings.txt
\ No newline at end of file
diff --git a/models/keyword_spotting/ds_cnn_small/tflite_int16/testing_input/serving_default_input:0/0.npy b/models/keyword_spotting/ds_cnn_small/tflite_int16/testing_input/serving_default_input:0/0.npy
deleted file mode 100644
index 75a2851..0000000
Binary files a/models/keyword_spotting/ds_cnn_small/tflite_int16/testing_input/serving_default_input:0/0.npy and /dev/null differ
diff --git a/models/keyword_spotting/ds_cnn_small/tflite_int16/testing_output/StatefulPartitionedCall:0/0.npy b/models/keyword_spotting/ds_cnn_small/tflite_int16/testing_output/StatefulPartitionedCall:0/0.npy
deleted file mode 100644
index b4c71a3..0000000
Binary files a/models/keyword_spotting/ds_cnn_small/tflite_int16/testing_output/StatefulPartitionedCall:0/0.npy and /dev/null differ
diff --git a/models/keyword_spotting/ds_cnn_small/tflite_int8/README.md b/models/keyword_spotting/ds_cnn_small/tflite_int8/README.md
deleted file mode 100644
index 230a02f..0000000
--- a/models/keyword_spotting/ds_cnn_small/tflite_int8/README.md
+++ /dev/null
@@ -1,59 +0,0 @@
-# DS-CNN Small INT8
-
-## Description
-This is a fully quantized version (asymmetrical int8) of the DS-CNN Small model developed by Arm, with training checkpoints, from the Hello Edge paper. Code to recreate this model can be found here: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m
-
-## License
-[Apache-2.0](https://spdx.org/licenses/Apache-2.0.html)
-
-## Related Materials
-### Class Labels
-The class labels associated with this model can be downloaded by running the script `get_class_labels.sh`.
-
-### Model Recreation Code
-Code to recreate this model can be found here: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m.
-
-## Network Information
-| Network Information |  Value         |
-|---------------------|------------------|
-|  Framework          | TensorFlow Lite |
-|  SHA-1 Hash         | cf24429e86a9647b1632c382894bc68d26d34039  |
-|  Size (Bytes)       | 47616 |
-|  Provenance         | https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m |
-|  Paper              | https://arxiv.org/abs/1711.07128 |
-
-## Accuracy
-Dataset: Google Speech Commands Test Set
-
-| Metric | Value |
-|--------|-------|
-| Accuracy | 0.935 |
-
-## Performance
-| Platform | Optimized |
-|----------|:---------:|
-| Cortex-A |:heavy_check_mark:          |
-| Cortex-M |:heavy_check_mark: HERO         |
-| Mali GPU |:heavy_check_mark:          |
-| Ethos U  |:heavy_check_mark:          |
-
-### Key
-* :heavy_check_mark: - Will run on this platform.
-* :heavy_multiplication_x: - Will not run on this platform.
-
-
-
-## Optimizations
-| Optimization |  Value  |
-|-----------------|---------|
-| Quantization | INT8 |
-
-## Network Inputs
-| Input Node Name |  Shape  | Description |
-|-----------------|---------|-------------|
-| input | (1, 490) | The input is a processed MFCCs of shape (1, 490) |
-
-## Network Outputs
-| Output Node Name |  Shape  | Description |
-|------------------|---------|-------------|
-| Identity | (1, 12) | The probability on 12 keywords. |
diff --git a/models/keyword_spotting/ds_cnn_small/tflite_int8/definition.yaml b/models/keyword_spotting/ds_cnn_small/tflite_int8/definition.yaml
deleted file mode 100644
index 5e507b4..0000000
--- a/models/keyword_spotting/ds_cnn_small/tflite_int8/definition.yaml
+++ /dev/null
@@ -1,45 +0,0 @@
-benchmark:
-  Google Speech Commands test set:
-    Accuracy: 93.56%
-description: 'This is a fully quantized version (asymmetrical int8) of the DS-CNN
-  Small model developed by Arm, with training checkpoints, from the Hello Edge paper.
-  Code to recreate this model can be found here: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m'
-license:
-- Apache-2.0
-network:
-  file_size_bytes: 47616
-  filename: ds_cnn_s_quantized.tflite
-  framework: TensorFlow Lite
-  hash:
-    algorithm: sha1
-    value: cf24429e86a9647b1632c382894bc68d26d34039
-  provenance: https://github.com/ARM-software/ML-examples/tree/master/tflu-kws-cortex-m
-  quality_level: hero#CORTEX-M
-network_parameters:
-  input_nodes:
-  - description: The input is a processed MFCCs of shape (1, 490)
-    example_input:
-      path: models/keyword_spotting/ds_cnn_small/tflite_int8/testing_input/input
-    name: input
-    shape:
-    - 1
-    - 490
-  output_nodes:
-  - description: The probability on 12 keywords.
-    name: Identity
-    shape:
-    - 1
-    - 12
-    test_output_path: models/keyword_spotting/ds_cnn_small/tflite_int8/testing_output/Identity
-operators:
-  TensorFlow Lite:
-  - AVERAGE_POOL_2D
-  - CONV_2D
-  - DEPTHWISE_CONV_2D
-  - DEQUANTIZE
-  - FULLY_CONNECTED
-  - QUANTIZE
-  - RELU
-  - RESHAPE
-  - SOFTMAX
-paper: https://arxiv.org/abs/1711.07128
diff --git a/models/keyword_spotting/ds_cnn_small/tflite_int8/get_class_labels.sh b/models/keyword_spotting/ds_cnn_small/tflite_int8/get_class_labels.sh
deleted file mode 100755
index e59caf5..0000000
--- a/models/keyword_spotting/ds_cnn_small/tflite_int8/get_class_labels.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (C) 2021 Arm Limited or its affiliates. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the License); you may
-# not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an AS IS BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#!/usr/bin/env bash
-
-wget https://raw.githubusercontent.com/ARM-software/ML-KWS-for-MCU/e9cf319e9aa2ff71d433e111477dd95329fb94cb/Pretrained_models/labels.txt
-mv labels.txt labelmappings.txt
\ No newline at end of file