Skip to content

Commit

Permalink
[llava][19/N] Add multimodal runner base class and build file
Browse files Browse the repository at this point in the history
Differential Revision: D61249552

Pull Request resolved: pytorch#4665
  • Loading branch information
larryliu0820 authored Aug 14, 2024
1 parent 7b27f9b commit 6efc222
Show file tree
Hide file tree
Showing 6 changed files with 237 additions and 0 deletions.
13 changes: 13 additions & 0 deletions build/cmake_deps.toml
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,19 @@ deps = [
"executorch_no_prim_ops",
]

[targets.extension_llm_runner]
buck_targets = [
"//extension/llm/runner:runner_lib",
]
filters = [
".cpp$",
]
deps = [
"executorch",
"executorch_no_prim_ops",
"extension_module",
"extension_runner_util",
]
# ---------------------------------- extension end ----------------------------------
# ---------------------------------- binary start ----------------------------------

Expand Down
53 changes: 53 additions & 0 deletions extension/llm/runner/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

#
# Build llm runner lib.
#
# ### Editing this file ###
#
# This file should be formatted with
# ~~~
# cmake-format -i CMakeLists.txt
# ~~~
# It should also be cmake-lint clean.
#

if(NOT EXECUTORCH_ROOT)
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../..)
endif()

include(${EXECUTORCH_ROOT}/build/Utils.cmake)
include(${EXECUTORCH_ROOT}/build/Codegen.cmake)

#
# The `_<target>_srcs` lists are defined by including ${EXECUTORCH_SRCS_FILE}.
#
set(EXECUTORCH_SRCS_FILE
"${CMAKE_CURRENT_BINARY_DIR}/../../../executorch_srcs.cmake"
)

extract_sources(${EXECUTORCH_SRCS_FILE})

include(${EXECUTORCH_SRCS_FILE})

# build llm runner library
list(TRANSFORM _extension_llm_runner__srcs PREPEND "${EXECUTORCH_ROOT}/")

target_include_directories(
extension_module INTERFACE ${_common_include_directories}
)

add_library(extension_llm_runner STATIC ${_extension_llm_runner__srcs})

set(runner_deps executorch extension_module extension_data_loader)

target_link_libraries(extension_llm_runner PUBLIC ${runner_deps})

target_include_directories(
extension_llm_runner INTERFACE ${_common_include_directories}
${EXECUTORCH_ROOT}
)
26 changes: 26 additions & 0 deletions extension/llm/runner/image.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

// A simple image struct.

#pragma once
#include <cstdint>
// patternlint-disable-next-line executorch-cpp-nostdinc
#include <vector>

namespace torch::executor {

struct Image {
// Assuming NCHW format
std::vector<uint8_t> data;
int32_t width;
int32_t height;
int32_t channels;
};

} // namespace torch::executor
36 changes: 36 additions & 0 deletions extension/llm/runner/image_prefiller.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

// Given a image tensor, prefill the KV cache of a multimodal LLM.

#pragma once

#include <executorch/extension/llm/runner/image.h>
#include <executorch/extension/module/module.h>

namespace torch::executor {

// Assuming kv cache and parallel prefill are enabled.
class ImagePrefiller {
public:
explicit ImagePrefiller(Module* module) : module_(module) {}
/**
* Prefill an LLM Module with the given image input.
* @param image The image input to the multimodal LLM.
* @param start_pos The starting position in KV cache of the input in the LLM
* @return The next token of the LLM Module after prefill.
*/
virtual Result<exec_aten::Tensor> prefill(
Image& image,
int64_t start_pos = 0) = 0;

protected:
Module* module_;
};

} // namespace torch::executor
92 changes: 92 additions & 0 deletions extension/llm/runner/multimodal_runner.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

// A simple multimodal LLM runner that includes preprocessing and post
// processing logic. The module takes in a string as input and emits a string as
// output.

#pragma once

#include <cstdint>
// patternlint-disable-next-line executorch-cpp-nostdinc
#include <functional>
#include <memory>
// patternlint-disable-next-line executorch-cpp-nostdinc
#include <string>
#include <type_traits>
// patternlint-disable-next-line executorch-cpp-nostdinc
#include <unordered_map>

#include <executorch/extension/llm/runner/image.h>
#include <executorch/extension/llm/runner/image_prefiller.h>
#include <executorch/extension/llm/runner/stats.h>
#include <executorch/extension/llm/runner/text_decoder_runner.h>
#include <executorch/extension/llm/runner/text_prefiller.h>
#include <executorch/extension/llm/runner/text_token_generator.h>
#include <executorch/extension/llm/sampler/sampler.h>
#include <executorch/extension/llm/tokenizer/tokenizer.h>
#include <executorch/extension/module/module.h>
#include <executorch/extension/runner_util/managed_tensor.h>

namespace torch::executor {
using Stats = ::executorch::llm::Stats;

class MultimodalRunner {
public:
explicit MultimodalRunner(
const std::string& model_path,
const std::string& tokenizer_path,
const float temperature = 0.8f)
: temperature_(temperature),
module_(std::make_unique<Module>(model_path, Module::LoadMode::File)),
tokenizer_path_(tokenizer_path) {
ET_LOG(
Info,
"Creating Multimodal LLM runner: model_path=%s, tokenizer_path=%s",
model_path.c_str(),
tokenizer_path.c_str());
}

virtual bool is_loaded() = 0;
virtual Error load() = 0;
virtual Error generate(
std::vector<Image>& images,
const std::string& prompt,
int32_t seq_len = 1024,
std::function<void(const std::string&)> token_callback = {},
std::function<void(const Stats&)> stats_callback = {}) = 0;

inline void stop() {
text_token_generator_->stop();
}

protected:
// metadata
int32_t vocab_size_;
int32_t bos_id_;
int32_t eos_id_;
int32_t n_bos_;
int32_t n_eos_;
int32_t max_seq_len_;
float temperature_;

// model
std::unordered_set<std::string> model_methods_;
std::unique_ptr<Module> module_;
std::unique_ptr<TextDecoderRunner> text_decoder_runner_;
std::unique_ptr<TextPrefiller> text_prefiller_;
std::unique_ptr<ImagePrefiller> image_prefiller_;
std::unique_ptr<TextTokenGenerator> text_token_generator_;
std::string tokenizer_path_;
std::unique_ptr<Tokenizer> tokenizer_;

// stats
Stats stats_;
};

} // namespace torch::executor
17 changes: 17 additions & 0 deletions extension/llm/runner/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,20 @@ def define_common_targets():
"//executorch/extension/module:module" + aten_suffix,
],
)

runtime.cxx_library(
name = "runner_lib" + aten_suffix,
exported_headers = [
"image_prefiller.h",
"image.h",
"multimodal_runner.h",
],
visibility = [
"@EXECUTORCH_CLIENTS",
],
exported_deps = [
":text_decoder_runner" + aten_suffix,
":text_prefiller" + aten_suffix,
":text_token_generator" + aten_suffix,
],
)

0 comments on commit 6efc222

Please sign in to comment.