From 03aed27ba07c797f1ec57146d94800aa06c0d86d Mon Sep 17 00:00:00 2001
From: Yishuang Pang <ypang@google.com>
Date: Mon, 13 Jan 2025 11:51:41 -0800
Subject: [PATCH] No public description

PiperOrigin-RevId: 715054197
---
 mediapipe/tasks/cc/genai/inference/c/llm_inference_engine.h | 3 +++
 .../tasks/ios/genai/inference/sources/LlmInference.swift    | 6 +++++-
 .../tasks/java/com/google/mediapipe/tasks/core/jni/llm.cc   | 1 +
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/mediapipe/tasks/cc/genai/inference/c/llm_inference_engine.h b/mediapipe/tasks/cc/genai/inference/c/llm_inference_engine.h
index 7642704a75..6eb87576a3 100644
--- a/mediapipe/tasks/cc/genai/inference/c/llm_inference_engine.h
+++ b/mediapipe/tasks/cc/genai/inference/c/llm_inference_engine.h
@@ -107,6 +107,9 @@ typedef struct {
   // initialization may finish before weights have finished uploading which
   // might push some of the weight upload time into input processing.
   bool wait_for_weight_uploads;
+
+  // Whether the submodel should be used if available.
+  bool use_submodel;
 } LlmModelSettings;
 
 // LlmSessionConfig configures how to execute the model.
diff --git a/mediapipe/tasks/ios/genai/inference/sources/LlmInference.swift b/mediapipe/tasks/ios/genai/inference/sources/LlmInference.swift
index f5d7cd940c..ef796173a9 100644
--- a/mediapipe/tasks/ios/genai/inference/sources/LlmInference.swift
+++ b/mediapipe/tasks/ios/genai/inference/sources/LlmInference.swift
@@ -76,7 +76,8 @@ import MediaPipeTasksGenAIC
             max_top_k: options.maxTopk,
             llm_activation_data_type: kLlmActivationDataTypeDefault,
             num_draft_tokens: 0,
-            wait_for_weight_uploads: options.waitForWeightUploads)
+            wait_for_weight_uploads: options.waitForWeightUploads,
+            use_submodel: options.useSubmodel)
           return try LlmTaskRunner(modelSettings: modelSetting)
         }
       }
@@ -240,6 +241,9 @@ extension LlmInference {
     /// time into input processing.
     @objc public var waitForWeightUploads: Bool = false
 
+    // Whether to use the submodel if available.
+    @objc public var useSubmodel: Bool = false
+
     /// Creates a new instance of `Options` with the given `modelPath` and default values of
     /// `maxTokens`, `maxTopk`, `supportedLoraRanks`.
     /// This function is only intended to be used from Objective C.
diff --git a/mediapipe/tasks/java/com/google/mediapipe/tasks/core/jni/llm.cc b/mediapipe/tasks/java/com/google/mediapipe/tasks/core/jni/llm.cc
index bf30df1cb4..09b805da00 100644
--- a/mediapipe/tasks/java/com/google/mediapipe/tasks/core/jni/llm.cc
+++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/core/jni/llm.cc
@@ -77,6 +77,7 @@ LlmModelSettings ParseModelSettings(void* bytes, int size) {
   output.llm_activation_data_type = kLlmActivationDataTypeDefault;
   output.num_draft_tokens = 0;
   output.wait_for_weight_uploads = false;
+  output.use_submodel = false;
   return output;
 }