diff --git a/c/include/ml-api-common.h b/c/include/ml-api-common.h index cd9a6150..0dca611c 100644 --- a/c/include/ml-api-common.h +++ b/c/include/ml-api-common.h @@ -75,6 +75,7 @@ typedef enum { ML_NNFW_TYPE_NCNN = 18, /**< Tencent ncnn (Since 9.0) */ ML_NNFW_TYPE_TENSORRT = 19, /**< NVidia Tensor-RT (Since 9.0) */ ML_NNFW_TYPE_QNN = 20, /**< Qualcomm QNN (Qualcomm® AI Engine Direct) (Since 9.0) */ + ML_NNFW_TYPE_EXECUTORCH_LLAMA = 21, /**< ExecuTorch Llama runner */ ML_NNFW_TYPE_SNAP = 0x2001, /**< SNAP (Samsung Neural Acceleration Platform), only for Android. (Since 6.0) */ } ml_nnfw_type_e; diff --git a/c/src/ml-api-inference-single.c b/c/src/ml-api-inference-single.c index b0736a1d..7a8d1239 100644 --- a/c/src/ml-api-inference-single.c +++ b/c/src/ml-api-inference-single.c @@ -112,6 +112,7 @@ static const char *ml_nnfw_subplugin_name[] = { [ML_NNFW_TYPE_NCNN] = "ncnn", [ML_NNFW_TYPE_TENSORRT] = "tensorrt", [ML_NNFW_TYPE_QNN] = "qnn", + [ML_NNFW_TYPE_EXECUTORCH_LLAMA] = "executorch-llama", NULL }; @@ -137,6 +138,7 @@ typedef struct gboolean invoking; /**< invoke running flag */ ml_tensors_data_h in_tensors; /**< input tensor wrapper for processing */ ml_tensors_data_h out_tensors; /**< output tensor wrapper for processing */ + gboolean is_flexible; /**< true if tensor filter handles flexible input/output */ GList *destroy_data_list; /**< data to be freed by filter */ } ml_single; @@ -778,6 +780,11 @@ ml_single_set_info_in_handle (ml_single_h single, gboolean is_input, ml_tensors_info_h info = NULL; ml_single_get_gst_info (single_h, is_input, &gst_info); + if (single_h->is_flexible) { + gst_info.format = _NNS_TENSOR_FORMAT_FLEXIBLE; + gst_info.num_tensors = 1U; /* TODO: Consider multiple input tensors filter */ + } + _ml_tensors_info_create_from_gst (&info, &gst_info); gst_tensors_info_free (&gst_info); @@ -846,6 +853,7 @@ ml_single_create_handle (ml_nnfw_type_e nnfw) single_h->output = NULL; single_h->destroy_data_list = NULL; single_h->invoking = FALSE; + single_h->is_flexible = FALSE; gst_tensors_info_init (&single_h->in_info); gst_tensors_info_init (&single_h->out_info); @@ -1082,6 +1090,11 @@ ml_single_open_custom (ml_single_h * single, ml_single_preset * info) status = ML_ERROR_STREAMS_PIPE; goto error; } + /* handle flexible single */ + if (info->nnfw == ML_NNFW_TYPE_EXECUTORCH_LLAMA) { + single_h->is_flexible = TRUE; + g_object_set (filter_obj, "invoke-dynamic", TRUE, NULL); + } if (nnfw == ML_NNFW_TYPE_NNTR_INF) { if (!in_tensors_info || !out_tensors_info) { @@ -1318,6 +1331,11 @@ _ml_single_invoke_validate_data (ml_single_h single, "The %d-th input tensor is not valid. There is no valid dimension metadata for this tensor.", i); + if (single_h->is_flexible) { + /* Skip data size check for flexible */ + continue; + } + raw_size = _model->tensors[i].size; if (G_UNLIKELY (_data->tensors[i].size != raw_size)) _ml_error_report_return (ML_ERROR_INVALID_PARAMETER, @@ -1957,6 +1975,7 @@ _ml_validate_model_file (const char *const *model, case ML_NNFW_TYPE_ONNX_RUNTIME: case ML_NNFW_TYPE_NCNN: case ML_NNFW_TYPE_TENSORRT: + case ML_NNFW_TYPE_EXECUTORCH_LLAMA: case ML_NNFW_TYPE_QNN: /** * We cannot check the file ext with NNFW. diff --git a/tests/capi/unittest_capi_inference_single.cc b/tests/capi/unittest_capi_inference_single.cc index 84ad6d6d..8500fe1b 100644 --- a/tests/capi/unittest_capi_inference_single.cc +++ b/tests/capi/unittest_capi_inference_single.cc @@ -3180,6 +3180,52 @@ TEST (nnstreamer_capi_singleshot, invoke_ncnn) } #endif /* ENABLE_NCNN */ +/** + * @brief DISABLED Test to show executorch_llama filter usage + */ +TEST (nnstreamer_capi_singleshot, DISABLED_executorch_llama) +{ + int status; + ml_single_h single; + + status = ml_single_open (&single, "/path/to/pte,/path/to/tokienizer", NULL, + NULL, ML_NNFW_TYPE_EXECUTORCH_LLAMA, ML_NNFW_HW_ANY); + ASSERT_EQ (status, ML_ERROR_NONE); + + /* prepare input data */ + std::string prompt ("Once upon a time"); + ml_tensors_info_h in_info; + ml_tensors_data_h in_data; + ml_tensor_dimension dim = { (unsigned int) prompt.size () + 1, 0 }; + + ml_tensors_info_create (&in_info); + ml_tensors_info_set_count (in_info, 1); + ml_tensors_info_set_tensor_type (in_info, 0, ML_TENSOR_TYPE_UINT8); + ml_tensors_info_set_tensor_dimension (in_info, 0, dim); + + ml_tensors_data_create (in_info, &in_data); + ml_tensors_data_set_tensor_data (in_data, 0, prompt.c_str (), prompt.size () + 1); + + /* invoke */ + ml_tensors_data_h out_data; + status = ml_single_invoke (single, in_data, &out_data); + EXPECT_EQ (ML_ERROR_NONE, status); + + char *result; + size_t result_size; + status = ml_tensors_data_get_tensor_data (out_data, 0U, (void **) &result, &result_size); + EXPECT_EQ (ML_ERROR_NONE, status); + + g_info ("result: %s", result); + EXPECT_EQ (0, strncmp (result, prompt.c_str (), prompt.size ())); + + /* free data */ + ml_tensors_data_destroy (out_data); + ml_tensors_data_destroy (in_data); + ml_tensors_info_destroy (in_info); + ml_single_close (single); +} + /** * @brief Test NNStreamer single shot (custom filter) * @detail Run pipeline with custom filter with allocate in invoke, handle multi tensors.