From 03aed27ba07c797f1ec57146d94800aa06c0d86d Mon Sep 17 00:00:00 2001 From: Yishuang Pang Date: Mon, 13 Jan 2025 11:51:41 -0800 Subject: [PATCH] No public description PiperOrigin-RevId: 715054197 --- mediapipe/tasks/cc/genai/inference/c/llm_inference_engine.h | 3 +++ .../tasks/ios/genai/inference/sources/LlmInference.swift | 6 +++++- .../tasks/java/com/google/mediapipe/tasks/core/jni/llm.cc | 1 + 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/mediapipe/tasks/cc/genai/inference/c/llm_inference_engine.h b/mediapipe/tasks/cc/genai/inference/c/llm_inference_engine.h index 7642704a75..6eb87576a3 100644 --- a/mediapipe/tasks/cc/genai/inference/c/llm_inference_engine.h +++ b/mediapipe/tasks/cc/genai/inference/c/llm_inference_engine.h @@ -107,6 +107,9 @@ typedef struct { // initialization may finish before weights have finished uploading which // might push some of the weight upload time into input processing. bool wait_for_weight_uploads; + + // Whether the submodel should be used if available. + bool use_submodel; } LlmModelSettings; // LlmSessionConfig configures how to execute the model. diff --git a/mediapipe/tasks/ios/genai/inference/sources/LlmInference.swift b/mediapipe/tasks/ios/genai/inference/sources/LlmInference.swift index f5d7cd940c..ef796173a9 100644 --- a/mediapipe/tasks/ios/genai/inference/sources/LlmInference.swift +++ b/mediapipe/tasks/ios/genai/inference/sources/LlmInference.swift @@ -76,7 +76,8 @@ import MediaPipeTasksGenAIC max_top_k: options.maxTopk, llm_activation_data_type: kLlmActivationDataTypeDefault, num_draft_tokens: 0, - wait_for_weight_uploads: options.waitForWeightUploads) + wait_for_weight_uploads: options.waitForWeightUploads, + use_submodel: options.useSubmodel) return try LlmTaskRunner(modelSettings: modelSetting) } } @@ -240,6 +241,9 @@ extension LlmInference { /// time into input processing. @objc public var waitForWeightUploads: Bool = false + // Whether to use the submodel if available. + @objc public var useSubmodel: Bool = false + /// Creates a new instance of `Options` with the given `modelPath` and default values of /// `maxTokens`, `maxTopk`, `supportedLoraRanks`. /// This function is only intended to be used from Objective C. diff --git a/mediapipe/tasks/java/com/google/mediapipe/tasks/core/jni/llm.cc b/mediapipe/tasks/java/com/google/mediapipe/tasks/core/jni/llm.cc index bf30df1cb4..09b805da00 100644 --- a/mediapipe/tasks/java/com/google/mediapipe/tasks/core/jni/llm.cc +++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/core/jni/llm.cc @@ -77,6 +77,7 @@ LlmModelSettings ParseModelSettings(void* bytes, int size) { output.llm_activation_data_type = kLlmActivationDataTypeDefault; output.num_draft_tokens = 0; output.wait_for_weight_uploads = false; + output.use_submodel = false; return output; }