Skip to content

Commit

Permalink
[llama] Build the runner with tiktoken by default
Browse files Browse the repository at this point in the history
Differential Revision: D61830302

Pull Request resolved: pytorch#4921
  • Loading branch information
larryliu0820 authored Aug 30, 2024
1 parent ff4a736 commit f99e25f
Show file tree
Hide file tree
Showing 16 changed files with 108 additions and 91 deletions.
1 change: 1 addition & 0 deletions .ci/scripts/build-qnn-sdk.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# LICENSE file in the root directory of this source tree.

set -eux
set -o xtrace

build_qnn_backend() {
echo "Start building qnn backend."
Expand Down
1 change: 1 addition & 0 deletions backends/qualcomm/scripts/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
set -e
set -o xtrace

if [[ -z ${QNN_SDK_ROOT} ]]; then
echo "Please export QNN_SDK_ROOT=/path/to/qnn_sdk"
Expand Down
7 changes: 0 additions & 7 deletions build/build_android_llm_demo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,6 @@ build_android_native_library() {
TOKENIZER="$2"
ANDROID_NDK="${ANDROID_NDK:-/opt/ndk}"
CMAKE_OUT="cmake-out-android-${ANDROID_ABI}"
if [[ $TOKENIZER == "tiktoken" ]]; then
EXECUTORCH_USE_TIKTOKEN=ON
else
EXECUTORCH_USE_TIKTOKEN=OFF
fi

cmake . -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
-DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \
Expand Down Expand Up @@ -54,7 +49,6 @@ build_android_native_library() {
-DANDROID_ABI="$ANDROID_ABI" \
-DANDROID_PLATFORM=android-23 \
-DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
-DEXECUTORCH_USE_TIKTOKEN="${EXECUTORCH_USE_TIKTOKEN}" \
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_XNNPACK=ON \
Expand All @@ -72,7 +66,6 @@ build_android_native_library() {
-DEXECUTORCH_ENABLE_LOGGING=ON \
-DEXECUTORCH_LOG_LEVEL=Info \
-DEXECUTORCH_BUILD_LLAMA_JNI=ON \
-DEXECUTORCH_USE_TIKTOKEN="${EXECUTORCH_USE_TIKTOKEN}" \
-DCMAKE_BUILD_TYPE=Release \
-B"${CMAKE_OUT}"/extension/android

Expand Down
12 changes: 2 additions & 10 deletions examples/demo-apps/android/LlamaDemo/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,22 +64,14 @@ Note: `<path_to_android_ndk>` is the root for the NDK, which is usually under
`~/Library/Android/sdk/ndk/XX.Y.ZZZZZ` for macOS, and contains NOTICE and README.md.
We use `<path_to_android_ndk>/build/cmake/android.toolchain.cmake` for CMake to cross-compile.

3. (Optional) If you need to use tiktoken as the tokenizer (for LLaMA3), set
`EXECUTORCH_USE_TIKTOKEN=ON` and later CMake will use it as the tokenizer.
If you need to run other models like LLaMA2, skip this skip.

```bash
export EXECUTORCH_USE_TIKTOKEN=ON # Only for LLaMA3
```

4. Build the Android Java extension code:
3. Build the Android Java extension code:
```bash
pushd extension/android
./gradlew build
popd
```

5. Run the following command set up the required JNI library:
4. Run the following command set up the required JNI library:
```bash
pushd examples/demo-apps/android/LlamaDemo
./gradlew :app:setup
Expand Down
2 changes: 0 additions & 2 deletions examples/demo-apps/android/LlamaDemo/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ cmake examples/models/llama2 \
-DANDROID_ABI="$ANDROID_ABI" \
-DANDROID_PLATFORM=android-23 \
-DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
-DEXECUTORCH_USE_TIKTOKEN="${EXECUTORCH_USE_TIKTOKEN}" \
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_XNNPACK=ON \
Expand All @@ -50,7 +49,6 @@ cmake extension/android \
-DANDROID_PLATFORM=android-23 \
-DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
-DEXECUTORCH_BUILD_LLAMA_JNI=ON \
-DEXECUTORCH_USE_TIKTOKEN="${EXECUTORCH_USE_TIKTOKEN}" \
-DCMAKE_BUILD_TYPE=Release \
-B"${CMAKE_OUT}"/extension/android

Expand Down
19 changes: 0 additions & 19 deletions examples/models/llama2/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@ project(llama_runner)
# Duplicating options as root CMakeLists.txt
option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED "Build the optimized kernels" OFF)

option(EXECUTORCH_USE_TIKTOKEN "Use Tiktoken as a tokenizer" OFF)

include(CMakeDependentOption)
#
# pthreadpool: build pthreadpool library. Disable on unsupported platforms
Expand Down Expand Up @@ -94,23 +92,6 @@ endif()

# llama_runner library
add_subdirectory(runner)
if(EXECUTORCH_USE_TIKTOKEN)
# find RE2 for tokenizer
set(ABSL_ENABLE_INSTALL ON)
set(ABSL_PROPAGATE_CXX_STD ON)
set(_pic_flag ${CMAKE_POSITION_INDEPENDENT_CODE})
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
add_subdirectory(
${CMAKE_CURRENT_SOURCE_DIR}/../../../extension/llm/third-party/abseil-cpp
${CMAKE_CURRENT_BINARY_DIR}/abseil-cpp
)
add_subdirectory(
${CMAKE_CURRENT_SOURCE_DIR}/../../../extension/llm/third-party/re2
${CMAKE_CURRENT_BINARY_DIR}/re2
)
set(CMAKE_POSITION_INDEPENDENT_CODE ${_pic_flag})
target_link_libraries(llama_runner PUBLIC re2::re2)
endif()

set(link_libraries gflags)
set(_srcs main.cpp)
Expand Down
3 changes: 0 additions & 3 deletions examples/models/llama2/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,6 @@ Note for Mac users: There's a known linking issue with Xcode 15.1. Refer to the
cmake --build cmake-out/examples/models/llama2 -j16 --config Release
```
For Llama3, add `-DEXECUTORCH_USE_TIKTOKEN=ON` option when building the llama runner.
3. Run model. Run options available [here](https://github.com/pytorch/executorch/blob/main/examples/models/llama2/main.cpp#L18-L40).
```
cmake-out/examples/models/llama2/llama_main --model_path=<model pte file> --tokenizer_path=<tokenizer.bin> --prompt=<prompt>
Expand Down Expand Up @@ -283,7 +281,6 @@ cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \

cmake --build cmake-out-android/examples/models/llama2 -j16 --config Release
```
For Llama3, add `-DEXECUTORCH_USE_TIKTOKEN=ON` option when building the llama runner.
**2. Run on Android via adb shell**
Expand Down
36 changes: 25 additions & 11 deletions examples/models/llama2/runner/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,13 @@ target_include_directories(
extension_module INTERFACE ${_common_include_directories}
)

if(EXECUTORCH_USE_TIKTOKEN)
list(
APPEND _llama_runner__srcs
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizer/tiktoken.cpp
)
list(APPEND _llama_runner__srcs
${CMAKE_CURRENT_SOURCE_DIR}/../tokenizer/llama_tiktoken.cpp
)
set(_preprocessor_flag -DET_USE_TIKTOKEN)
endif()
list(
APPEND _llama_runner__srcs
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizer/tiktoken.cpp
)
list(APPEND _llama_runner__srcs
${CMAKE_CURRENT_SOURCE_DIR}/../tokenizer/llama_tiktoken.cpp
)

if(CMAKE_TOOLCHAIN_IOS
OR ANDROID
Expand All @@ -63,7 +60,24 @@ else()
add_library(llama_runner SHARED ${_llama_runner__srcs})
endif()

set(llama_runner_deps executorch extension_module extension_data_loader)
# find RE2 for tokenizer, build tiktoken
set(ABSL_ENABLE_INSTALL ON)
set(ABSL_PROPAGATE_CXX_STD ON)
set(_pic_flag ${CMAKE_POSITION_INDEPENDENT_CODE})
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
add_subdirectory(
${EXECUTORCH_ROOT}/extension/llm/third-party/abseil-cpp
${CMAKE_CURRENT_BINARY_DIR}/abseil-cpp
)
add_subdirectory(
${EXECUTORCH_ROOT}/extension/llm/third-party/re2
${CMAKE_CURRENT_BINARY_DIR}/re2
)
set(CMAKE_POSITION_INDEPENDENT_CODE ${_pic_flag})

set(llama_runner_deps executorch extension_module extension_data_loader
re2::re2
)

target_link_libraries(llama_runner PUBLIC ${llama_runner_deps})

Expand Down
25 changes: 13 additions & 12 deletions examples/models/llama2/runner/runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,8 @@
#include <executorch/extension/llm/runner/util.h>
#include <executorch/extension/runner_util/managed_tensor.h>

#if ET_USE_TIKTOKEN
#include <executorch/examples/models/llama2/tokenizer/llama_tiktoken.h>
#else /* BPE */
#include <executorch/extension/llm/tokenizer/bpe_tokenizer.h>
#endif /* ET_USE_TIKTOKEN*/

namespace torch::executor {
namespace {
Expand All @@ -46,13 +43,6 @@ Runner::Runner(
: temperature_(temperature),
module_(std::make_unique<Module>(model_path, Module::LoadMode::File)),
tokenizer_path_(tokenizer_path),
tokenizer_(
#if ET_USE_TIKTOKEN
get_tiktoken_for_llama()
#else
std::make_unique<BPETokenizer>()
#endif
),
metadata_({
{kAppendEosToPrompt, false},
{kEnableDynamicShape, false},
Expand All @@ -79,8 +69,19 @@ Error Runner::load() {
return Error::Ok;
}
ET_CHECK_OK_OR_RETURN_ERROR(module_->load_method("forward"));

tokenizer_->load(tokenizer_path_);
// load tokenizer
tokenizer_ = nullptr;
tokenizer_ = std::make_unique<BPETokenizer>();
Error err = tokenizer_->load(tokenizer_path_);
if (err == Error::InvalidArgument) {
ET_LOG(
Info,
"Failed to load %s as a BPETokenizer artifact, trying Tiktoken",
tokenizer_path_.c_str());
tokenizer_.reset();
tokenizer_ = get_tiktoken_for_llama();
tokenizer_->load(tokenizer_path_);
}

ET_LOG(Info, "Reading metadata from model");

Expand Down
8 changes: 1 addition & 7 deletions examples/models/llama2/runner/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@ def _get_operator_lib(aten = False):
else:
return ["//executorch/configurations:optimized_native_cpu_ops", "//executorch/extension/llm/custom_ops:custom_ops"]

def use_tiktoken():
return native.read_config("llama", "use_tiktoken", "0") == "1"

def define_common_targets():
for aten in (True, False):
aten_suffix = "_aten" if aten else ""
Expand All @@ -26,7 +23,6 @@ def define_common_targets():
preprocessor_flags = [
"-DUSE_ATEN_LIB",
] if aten else [],
exported_preprocessor_flags = ["-DET_USE_TIKTOKEN"] if use_tiktoken() else [],
visibility = [
"@EXECUTORCH_CLIENTS",
],
Expand All @@ -43,11 +39,9 @@ def define_common_targets():
"//executorch/kernels/quantized:generated_lib" + aten_suffix,
"//executorch/runtime/core/exec_aten:lib" + aten_suffix,
"//executorch/runtime/core/exec_aten/util:tensor_util" + aten_suffix,
] + ([
"//executorch/examples/models/llama2/tokenizer:tiktoken",
] if use_tiktoken() else [
"//executorch/extension/llm/tokenizer:bpe_tokenizer",
]) + (_get_operator_lib(aten)) + ([
] + (_get_operator_lib(aten)) + ([
# Vulkan API currently cannot build on some platforms (e.g. Apple, FBCODE)
# Therefore enable it explicitly for now to avoid failing tests
"//executorch/backends/vulkan:vulkan_backend_lib",
Expand Down
2 changes: 1 addition & 1 deletion examples/qualcomm/oss_scripts/llama2/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,6 @@ target_include_directories(
)
target_link_libraries(
qnn_llama_runner qnn_executorch_backend full_portable_ops_lib
extension_data_loader extension_module gflags
extension_data_loader extension_module gflags re2::re2
)
target_compile_options(qnn_llama_runner PUBLIC ${_common_compile_options})
3 changes: 1 addition & 2 deletions examples/qualcomm/qaihub_scripts/llama/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ target_include_directories(
)
target_link_libraries(
qaihub_llama2_7b_runner qnn_executorch_backend executorch_no_prim_ops
extension_data_loader extension_module gflags
extension_data_loader extension_module gflags re2::re2
)
target_compile_options(
qaihub_llama2_7b_runner PUBLIC ${_common_compile_options}
Expand Down Expand Up @@ -71,7 +71,6 @@ list(
_qaihub_llama3_8b_runner__srcs
${CMAKE_CURRENT_SOURCE_DIR}/../../../models/llama2/tokenizer/llama_tiktoken.cpp
)
set(_preprocessor_flag -DET_USE_TIKTOKEN)

# build qaihub llama3 8b runner
add_executable(qaihub_llama3_8b_runner ${_qaihub_llama3_8b_runner__srcs})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ target_include_directories(
)
target_link_libraries(
qaihub_stable_diffusion_runner qnn_executorch_backend executorch_no_prim_ops
extension_data_loader extension_module gflags
extension_data_loader extension_module gflags re2::re2
)
target_compile_options(
qaihub_stable_diffusion_runner PUBLIC ${_common_compile_options}
Expand Down
29 changes: 14 additions & 15 deletions extension/android/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -129,19 +129,18 @@ if(EXECUTORCH_BUILD_LLAMA_JNI)
quantized_ops_lib
)
target_compile_options(executorch_llama_jni PUBLIC ${_common_compile_options})
if(EXECUTORCH_USE_TIKTOKEN)
set(ABSL_ENABLE_INSTALL ON)
set(_pic_flag ${CMAKE_POSITION_INDEPENDENT_CODE})
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
add_subdirectory(
${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/third-party/abseil-cpp
${CMAKE_CURRENT_BINARY_DIR}/abseil-cpp
)
add_subdirectory(
${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/third-party/re2
${CMAKE_CURRENT_BINARY_DIR}/re2
)
set(CMAKE_POSITION_INDEPENDENT_CODE ${_pic_flag})
target_link_libraries(executorch_llama_jni re2::re2)
endif()
# link re2
set(ABSL_ENABLE_INSTALL ON)
set(_pic_flag ${CMAKE_POSITION_INDEPENDENT_CODE})
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
add_subdirectory(
${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/third-party/abseil-cpp
${CMAKE_CURRENT_BINARY_DIR}/abseil-cpp
)
add_subdirectory(
${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/third-party/re2
${CMAKE_CURRENT_BINARY_DIR}/re2
)
set(CMAKE_POSITION_INDEPENDENT_CODE ${_pic_flag})
target_link_libraries(executorch_llama_jni re2::re2)
endif()
47 changes: 47 additions & 0 deletions extension/llm/third-party/TARGETS
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")

oncall("executorch")

runtime.cxx_library(
name = "abseil",
public_include_directories = ["abseil-cpp"],
srcs = glob(
["abseil-cpp/absl/**/*.cc"],
exclude = [
"abseil-cpp/absl/**/*test*.cc",
"abseil-cpp/absl/**/*mock*.cc",
"abseil-cpp/absl/**/*matchers*.cc",
"abseil-cpp/absl/**/*benchmark*.cc",
],
),
exported_linker_flags = select(
{
"DEFAULT": [],
"ovr_config//os:macos": ["-Wl,-framework,CoreFoundation"],
},
),
visibility = ["PUBLIC"],
_is_external_target = True,
)

runtime.cxx_library(
name = "re2",
public_include_directories = ["re2"],
srcs = glob(
[
"re2/re2/**/*.cc",
"re2/util/**/*.cc",
],
exclude = [
"re2/re2/**/*test*.cc",
"re2/re2/testing/*.cc",
"re2/re2/fuzzing/*.cc",
"re2/re2/**/*benchmark*.cc",
],
),
exported_deps = [
":abseil",
],
visibility = ["PUBLIC"],
_is_external_target = True,
)
2 changes: 1 addition & 1 deletion shim/xplat/executorch/build/env_interface.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ _EXTERNAL_DEPS = {
"libtorch_python": "//third-party:libtorch_python",
"prettytable": "//third-party:prettytable",
"pybind11": "//third-party:pybind11",
"re2": [], # TODO(larryliu0820): Add support
"re2": "//extension/llm/third-party:re2",
"sentencepiece-py": [],
# Core C++ PyTorch functionality like Tensor and ScalarType.
"torch-core-cpp": "//third-party:libtorch",
Expand Down

0 comments on commit f99e25f

Please sign in to comment.