forked from pytorch/executorch
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[llava][19/N] Add multimodal runner base class and build file
Differential Revision: D61249552 Pull Request resolved: pytorch#4665
- Loading branch information
1 parent
7b27f9b
commit 6efc222
Showing
6 changed files
with
237 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
# Copyright (c) Meta Platforms, Inc. and affiliates. | ||
# All rights reserved. | ||
# | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
# | ||
# Build llm runner lib. | ||
# | ||
# ### Editing this file ### | ||
# | ||
# This file should be formatted with | ||
# ~~~ | ||
# cmake-format -i CMakeLists.txt | ||
# ~~~ | ||
# It should also be cmake-lint clean. | ||
# | ||
|
||
if(NOT EXECUTORCH_ROOT) | ||
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../..) | ||
endif() | ||
|
||
include(${EXECUTORCH_ROOT}/build/Utils.cmake) | ||
include(${EXECUTORCH_ROOT}/build/Codegen.cmake) | ||
|
||
# | ||
# The `_<target>_srcs` lists are defined by including ${EXECUTORCH_SRCS_FILE}. | ||
# | ||
set(EXECUTORCH_SRCS_FILE | ||
"${CMAKE_CURRENT_BINARY_DIR}/../../../executorch_srcs.cmake" | ||
) | ||
|
||
extract_sources(${EXECUTORCH_SRCS_FILE}) | ||
|
||
include(${EXECUTORCH_SRCS_FILE}) | ||
|
||
# build llm runner library | ||
list(TRANSFORM _extension_llm_runner__srcs PREPEND "${EXECUTORCH_ROOT}/") | ||
|
||
target_include_directories( | ||
extension_module INTERFACE ${_common_include_directories} | ||
) | ||
|
||
add_library(extension_llm_runner STATIC ${_extension_llm_runner__srcs}) | ||
|
||
set(runner_deps executorch extension_module extension_data_loader) | ||
|
||
target_link_libraries(extension_llm_runner PUBLIC ${runner_deps}) | ||
|
||
target_include_directories( | ||
extension_llm_runner INTERFACE ${_common_include_directories} | ||
${EXECUTORCH_ROOT} | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
/* | ||
* Copyright (c) Meta Platforms, Inc. and affiliates. | ||
* All rights reserved. | ||
* | ||
* This source code is licensed under the BSD-style license found in the | ||
* LICENSE file in the root directory of this source tree. | ||
*/ | ||
|
||
// A simple image struct. | ||
|
||
#pragma once | ||
#include <cstdint> | ||
// patternlint-disable-next-line executorch-cpp-nostdinc | ||
#include <vector> | ||
|
||
namespace torch::executor { | ||
|
||
struct Image { | ||
// Assuming NCHW format | ||
std::vector<uint8_t> data; | ||
int32_t width; | ||
int32_t height; | ||
int32_t channels; | ||
}; | ||
|
||
} // namespace torch::executor |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
/* | ||
* Copyright (c) Meta Platforms, Inc. and affiliates. | ||
* All rights reserved. | ||
* | ||
* This source code is licensed under the BSD-style license found in the | ||
* LICENSE file in the root directory of this source tree. | ||
*/ | ||
|
||
// Given a image tensor, prefill the KV cache of a multimodal LLM. | ||
|
||
#pragma once | ||
|
||
#include <executorch/extension/llm/runner/image.h> | ||
#include <executorch/extension/module/module.h> | ||
|
||
namespace torch::executor { | ||
|
||
// Assuming kv cache and parallel prefill are enabled. | ||
class ImagePrefiller { | ||
public: | ||
explicit ImagePrefiller(Module* module) : module_(module) {} | ||
/** | ||
* Prefill an LLM Module with the given image input. | ||
* @param image The image input to the multimodal LLM. | ||
* @param start_pos The starting position in KV cache of the input in the LLM | ||
* @return The next token of the LLM Module after prefill. | ||
*/ | ||
virtual Result<exec_aten::Tensor> prefill( | ||
Image& image, | ||
int64_t start_pos = 0) = 0; | ||
|
||
protected: | ||
Module* module_; | ||
}; | ||
|
||
} // namespace torch::executor |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
/* | ||
* Copyright (c) Meta Platforms, Inc. and affiliates. | ||
* All rights reserved. | ||
* | ||
* This source code is licensed under the BSD-style license found in the | ||
* LICENSE file in the root directory of this source tree. | ||
*/ | ||
|
||
// A simple multimodal LLM runner that includes preprocessing and post | ||
// processing logic. The module takes in a string as input and emits a string as | ||
// output. | ||
|
||
#pragma once | ||
|
||
#include <cstdint> | ||
// patternlint-disable-next-line executorch-cpp-nostdinc | ||
#include <functional> | ||
#include <memory> | ||
// patternlint-disable-next-line executorch-cpp-nostdinc | ||
#include <string> | ||
#include <type_traits> | ||
// patternlint-disable-next-line executorch-cpp-nostdinc | ||
#include <unordered_map> | ||
|
||
#include <executorch/extension/llm/runner/image.h> | ||
#include <executorch/extension/llm/runner/image_prefiller.h> | ||
#include <executorch/extension/llm/runner/stats.h> | ||
#include <executorch/extension/llm/runner/text_decoder_runner.h> | ||
#include <executorch/extension/llm/runner/text_prefiller.h> | ||
#include <executorch/extension/llm/runner/text_token_generator.h> | ||
#include <executorch/extension/llm/sampler/sampler.h> | ||
#include <executorch/extension/llm/tokenizer/tokenizer.h> | ||
#include <executorch/extension/module/module.h> | ||
#include <executorch/extension/runner_util/managed_tensor.h> | ||
|
||
namespace torch::executor { | ||
using Stats = ::executorch::llm::Stats; | ||
|
||
class MultimodalRunner { | ||
public: | ||
explicit MultimodalRunner( | ||
const std::string& model_path, | ||
const std::string& tokenizer_path, | ||
const float temperature = 0.8f) | ||
: temperature_(temperature), | ||
module_(std::make_unique<Module>(model_path, Module::LoadMode::File)), | ||
tokenizer_path_(tokenizer_path) { | ||
ET_LOG( | ||
Info, | ||
"Creating Multimodal LLM runner: model_path=%s, tokenizer_path=%s", | ||
model_path.c_str(), | ||
tokenizer_path.c_str()); | ||
} | ||
|
||
virtual bool is_loaded() = 0; | ||
virtual Error load() = 0; | ||
virtual Error generate( | ||
std::vector<Image>& images, | ||
const std::string& prompt, | ||
int32_t seq_len = 1024, | ||
std::function<void(const std::string&)> token_callback = {}, | ||
std::function<void(const Stats&)> stats_callback = {}) = 0; | ||
|
||
inline void stop() { | ||
text_token_generator_->stop(); | ||
} | ||
|
||
protected: | ||
// metadata | ||
int32_t vocab_size_; | ||
int32_t bos_id_; | ||
int32_t eos_id_; | ||
int32_t n_bos_; | ||
int32_t n_eos_; | ||
int32_t max_seq_len_; | ||
float temperature_; | ||
|
||
// model | ||
std::unordered_set<std::string> model_methods_; | ||
std::unique_ptr<Module> module_; | ||
std::unique_ptr<TextDecoderRunner> text_decoder_runner_; | ||
std::unique_ptr<TextPrefiller> text_prefiller_; | ||
std::unique_ptr<ImagePrefiller> image_prefiller_; | ||
std::unique_ptr<TextTokenGenerator> text_token_generator_; | ||
std::string tokenizer_path_; | ||
std::unique_ptr<Tokenizer> tokenizer_; | ||
|
||
// stats | ||
Stats stats_; | ||
}; | ||
|
||
} // namespace torch::executor |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters