From 120c938086cad2e6b2108611c04ed17862f476e5 Mon Sep 17 00:00:00 2001
From: Yongjoo Ahn <yongjoo1.ahn@samsung.com>
Date: Tue, 29 Oct 2024 16:51:04 +0900
Subject: [PATCH 1/3] [api] Add a new nnfw `EXECUTORCH_LLAMA`

- Add a enum for new nnfw `EXECUTORCH_LLAMA`

Signed-off-by: Yongjoo Ahn <yongjoo1.ahn@samsung.com>
---
 c/include/ml-api-common.h       | 1 +
 c/src/ml-api-inference-single.c | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/c/include/ml-api-common.h b/c/include/ml-api-common.h
index cd9a6150..0dca611c 100644
--- a/c/include/ml-api-common.h
+++ b/c/include/ml-api-common.h
@@ -75,6 +75,7 @@ typedef enum {
   ML_NNFW_TYPE_NCNN = 18,             /**< Tencent ncnn (Since 9.0) */
   ML_NNFW_TYPE_TENSORRT = 19,         /**< NVidia Tensor-RT (Since 9.0) */
   ML_NNFW_TYPE_QNN = 20,              /**< Qualcomm QNN (Qualcomm® AI Engine Direct) (Since 9.0) */
+  ML_NNFW_TYPE_EXECUTORCH_LLAMA = 21, /**< ExecuTorch Llama runner */
   ML_NNFW_TYPE_SNAP = 0x2001,         /**< SNAP (Samsung Neural Acceleration Platform), only for Android. (Since 6.0) */
 } ml_nnfw_type_e;
 
diff --git a/c/src/ml-api-inference-single.c b/c/src/ml-api-inference-single.c
index b0736a1d..7ef11498 100644
--- a/c/src/ml-api-inference-single.c
+++ b/c/src/ml-api-inference-single.c
@@ -112,6 +112,7 @@ static const char *ml_nnfw_subplugin_name[] = {
   [ML_NNFW_TYPE_NCNN] = "ncnn",
   [ML_NNFW_TYPE_TENSORRT] = "tensorrt",
   [ML_NNFW_TYPE_QNN] = "qnn",
+  [ML_NNFW_TYPE_EXECUTORCH_LLAMA] = "executorch-llama",
   NULL
 };
 
@@ -1957,6 +1958,7 @@ _ml_validate_model_file (const char *const *model,
     case ML_NNFW_TYPE_ONNX_RUNTIME:
     case ML_NNFW_TYPE_NCNN:
     case ML_NNFW_TYPE_TENSORRT:
+    case ML_NNFW_TYPE_EXECUTORCH_LLAMA:
     case ML_NNFW_TYPE_QNN:
       /**
        * We cannot check the file ext with NNFW.

From 7485a33bd356d26fd82b4110791b38e6ef4a6ce2 Mon Sep 17 00:00:00 2001
From: Yongjoo Ahn <yongjoo1.ahn@samsung.com>
Date: Tue, 29 Oct 2024 16:51:53 +0900
Subject: [PATCH 2/3] [api] Let single handle flexible filter

- Let single API handle flexible filter (executorch-llama)

Signed-off-by: Yongjoo Ahn <yongjoo1.ahn@samsung.com>
---
 c/src/ml-api-inference-single.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/c/src/ml-api-inference-single.c b/c/src/ml-api-inference-single.c
index 7ef11498..7a8d1239 100644
--- a/c/src/ml-api-inference-single.c
+++ b/c/src/ml-api-inference-single.c
@@ -138,6 +138,7 @@ typedef struct
   gboolean invoking;                  /**< invoke running flag */
   ml_tensors_data_h in_tensors;       /**< input tensor wrapper for processing */
   ml_tensors_data_h out_tensors;      /**< output tensor wrapper for processing */
+  gboolean is_flexible;               /**< true if tensor filter handles flexible input/output */
 
   GList *destroy_data_list;         /**< data to be freed by filter */
 } ml_single;
@@ -779,6 +780,11 @@ ml_single_set_info_in_handle (ml_single_h single, gboolean is_input,
     ml_tensors_info_h info = NULL;
 
     ml_single_get_gst_info (single_h, is_input, &gst_info);
+    if (single_h->is_flexible) {
+      gst_info.format = _NNS_TENSOR_FORMAT_FLEXIBLE;
+      gst_info.num_tensors = 1U;        /* TODO: Consider multiple input tensors filter */
+    }
+
     _ml_tensors_info_create_from_gst (&info, &gst_info);
 
     gst_tensors_info_free (&gst_info);
@@ -847,6 +853,7 @@ ml_single_create_handle (ml_nnfw_type_e nnfw)
   single_h->output = NULL;
   single_h->destroy_data_list = NULL;
   single_h->invoking = FALSE;
+  single_h->is_flexible = FALSE;
 
   gst_tensors_info_init (&single_h->in_info);
   gst_tensors_info_init (&single_h->out_info);
@@ -1083,6 +1090,11 @@ ml_single_open_custom (ml_single_h * single, ml_single_preset * info)
     status = ML_ERROR_STREAMS_PIPE;
     goto error;
   }
+  /* handle flexible single */
+  if (info->nnfw == ML_NNFW_TYPE_EXECUTORCH_LLAMA) {
+    single_h->is_flexible = TRUE;
+    g_object_set (filter_obj, "invoke-dynamic", TRUE, NULL);
+  }
 
   if (nnfw == ML_NNFW_TYPE_NNTR_INF) {
     if (!in_tensors_info || !out_tensors_info) {
@@ -1319,6 +1331,11 @@ _ml_single_invoke_validate_data (ml_single_h single,
           "The %d-th input tensor is not valid. There is no valid dimension metadata for this tensor.",
           i);
 
+    if (single_h->is_flexible) {
+      /* Skip data size check for flexible */
+      continue;
+    }
+
     raw_size = _model->tensors[i].size;
     if (G_UNLIKELY (_data->tensors[i].size != raw_size))
       _ml_error_report_return (ML_ERROR_INVALID_PARAMETER,

From ed9a1434e0a28259122bf84d27f1aa8653581054 Mon Sep 17 00:00:00 2001
From: Yongjoo Ahn <yongjoo1.ahn@samsung.com>
Date: Tue, 29 Oct 2024 16:53:31 +0900
Subject: [PATCH 3/3] [test] Add a disabled test for executorch-llama

- Add a simple test to show how to use executorch-llama with single api

Signed-off-by: Yongjoo Ahn <yongjoo1.ahn@samsung.com>
---
 tests/capi/unittest_capi_inference_single.cc | 46 ++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/tests/capi/unittest_capi_inference_single.cc b/tests/capi/unittest_capi_inference_single.cc
index 84ad6d6d..8500fe1b 100644
--- a/tests/capi/unittest_capi_inference_single.cc
+++ b/tests/capi/unittest_capi_inference_single.cc
@@ -3180,6 +3180,52 @@ TEST (nnstreamer_capi_singleshot, invoke_ncnn)
 }
 #endif /* ENABLE_NCNN */
 
+/**
+ * @brief DISABLED Test to show executorch_llama filter usage
+ */
+TEST (nnstreamer_capi_singleshot, DISABLED_executorch_llama)
+{
+  int status;
+  ml_single_h single;
+
+  status = ml_single_open (&single, "/path/to/pte,/path/to/tokienizer", NULL,
+      NULL, ML_NNFW_TYPE_EXECUTORCH_LLAMA, ML_NNFW_HW_ANY);
+  ASSERT_EQ (status, ML_ERROR_NONE);
+
+  /* prepare input data */
+  std::string prompt ("Once upon a time");
+  ml_tensors_info_h in_info;
+  ml_tensors_data_h in_data;
+  ml_tensor_dimension dim = { (unsigned int) prompt.size () + 1, 0 };
+
+  ml_tensors_info_create (&in_info);
+  ml_tensors_info_set_count (in_info, 1);
+  ml_tensors_info_set_tensor_type (in_info, 0, ML_TENSOR_TYPE_UINT8);
+  ml_tensors_info_set_tensor_dimension (in_info, 0, dim);
+
+  ml_tensors_data_create (in_info, &in_data);
+  ml_tensors_data_set_tensor_data (in_data, 0, prompt.c_str (), prompt.size () + 1);
+
+  /* invoke */
+  ml_tensors_data_h out_data;
+  status = ml_single_invoke (single, in_data, &out_data);
+  EXPECT_EQ (ML_ERROR_NONE, status);
+
+  char *result;
+  size_t result_size;
+  status = ml_tensors_data_get_tensor_data (out_data, 0U, (void **) &result, &result_size);
+  EXPECT_EQ (ML_ERROR_NONE, status);
+
+  g_info ("result: %s", result);
+  EXPECT_EQ (0, strncmp (result, prompt.c_str (), prompt.size ()));
+
+  /* free data */
+  ml_tensors_data_destroy (out_data);
+  ml_tensors_data_destroy (in_data);
+  ml_tensors_info_destroy (in_info);
+  ml_single_close (single);
+}
+
 /**
  * @brief Test NNStreamer single shot (custom filter)
  * @detail Run pipeline with custom filter with allocate in invoke, handle multi tensors.