Skip to content

Commit

Permalink
[WIP][Llava] Add support to cross compile llava_runner for Android
Browse files Browse the repository at this point in the history
Differential Revision: D62281425

Pull Request resolved: pytorch#5108
  • Loading branch information
digantdesai authored Sep 6, 2024
1 parent 2ce4ad1 commit a25db2f
Show file tree
Hide file tree
Showing 3 changed files with 139 additions and 38 deletions.
144 changes: 108 additions & 36 deletions .ci/scripts/test_llava.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,48 +9,97 @@ set -exu
# shellcheck source=/dev/null

BUILD_TYPE=${1:-Debug}
TARGET_OS=${2:-Native}
BUILD_DIR=${3:-cmake-out}

echo "Building with BUILD_TYPE: $BUILD_TYPE"
echo "Building with BUILD_TYPE: $BUILD_TYPE, TARGET_OS: $TARGET_OS, BUILD_DIR: $BUILD_DIR"

if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
PYTHON_EXECUTABLE=python3
PYTHON_EXECUTABLE=python3
fi

TARGET_OS_lower="$(echo "${TARGET_OS}" | awk '{print tolower($0)}')"
if [[ "${TARGET_OS_lower}" == "android" ]]; then
if [[ -z "${ANDROID_NDK}" ]]; then
echo "Set ANDROID_NDK environment variable to build for Android."
exit 1
fi
fi

# Number of processes for a parallel build
NPROC=8
if hash nproc &> /dev/null; then NPROC=$(nproc); fi

EXECUTORCH_COMMON_CMAKE_ARGS=" \
-DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-DEXECUTORCH_BUILD_XNNPACK=ON \
-DEXECUTORCH_DO_NOT_USE_CXX11_ABI=ON \
-DEXECUTORCH_XNNPACK_SHARED_WORKSPACE=ON"

cmake_install_executorch_libraries() {
cmake \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-DEXECUTORCH_BUILD_XNNPACK=ON \
-DEXECUTORCH_DO_NOT_USE_CXX11_ABI=ON \
-DEXECUTORCH_XNNPACK_SHARED_WORKSPACE=ON \
-Bcmake-out .


cmake --build cmake-out -j9 --target install --config ${BUILD_TYPE}
cmake \
${EXECUTORCH_COMMON_CMAKE_ARGS} \
-B${BUILD_DIR} .

cmake --build ${BUILD_DIR} -j${NPROC} --target install --config ${BUILD_TYPE}
}

cmake_install_executorch_libraries_for_android() {
cmake \
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
-DANDROID_ABI=arm64-v8a \
-DANDROID_PLATFORM=android-23 \
${EXECUTORCH_COMMON_CMAKE_ARGS} \
-B${BUILD_DIR} .

cmake --build ${BUILD_DIR} -j${NPROC} --target install --config ${BUILD_TYPE}
}


LLAVA_COMMON_CMAKE_ARGS=" \
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
-DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_XNNPACK=ON"

cmake_build_llava_runner() {
dir=examples/models/llava
python_lib=$($PYTHON_EXECUTABLE -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')

cmake \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_XNNPACK=ON \
-DCMAKE_PREFIX_PATH="$python_lib" \
-Bcmake-out/${dir} \
cmake \
${LLAVA_COMMON_CMAKE_ARGS} \
-DCMAKE_PREFIX_PATH="$python_lib" \
-B${BUILD_DIR}/${dir} \
${dir}

cmake --build ${BUILD_DIR}/${dir} -j${NPROC} --config ${BUILD_TYPE}
}


cmake --build cmake-out/${dir} -j9 --config ${BUILD_TYPE}
cmake_build_llava_runner_for_android() {
dir=examples/models/llava
python_lib=$($PYTHON_EXECUTABLE -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')

cmake \
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
-DANDROID_ABI=arm64-v8a \
-DANDROID_PLATFORM=android-23 \
${LLAVA_COMMON_CMAKE_ARGS} \
-DCMAKE_PREFIX_PATH="$python_lib" \
-DLLAVA_RUNNER_NO_TORCH_DUMMY_IMAGE=ON \
-B${BUILD_DIR}/${dir} \
${dir}

cmake --build ${BUILD_DIR}/${dir} -j${NPROC} --config ${BUILD_TYPE}
}

# only export the one without custom op for now since it's
Expand Down Expand Up @@ -81,13 +130,24 @@ run_and_verify() {
echo "tokenizer.bin is missing."
exit 1
fi
RUNTIME_ARGS="--model_path=llava.pte \
--tokenizer_path=tokenizer.bin \
--image_path=image.pt \
--prompt=ASSISTANT: \
--temperature=0 \
--seq_len=650"
cmake-out/examples/models/llava/llava_main ${RUNTIME_ARGS} > result.txt



RUNTIME_ARGS="--model_path=llava.pte \
--tokenizer_path=tokenizer.bin \
--image_path=image.pt \
--prompt=ASSISTANT: \
--temperature=0 \
--seq_len=650"

if [[ "${TARGET_OS_lower}" == "android" ]]; then
echo "Transfer relevant files to the phone via ADB and run llava_main with following args,"
echo "$ llava_main ${RUNTIME_ARGS} "
exit 0;
fi

${BUILD_DIR}/examples/models/llava/llava_main ${RUNTIME_ARGS} > result.txt

# verify result.txt
RESULT=$(cat result.txt)
# set the expected prefix to be the same as prompt because there's a bug in sdpa_with_kv_cache that causes <unk> tokens.
Expand All @@ -110,8 +170,20 @@ run_and_verify() {
fi
}

cmake_install_executorch_libraries
cmake_build_llava_runner
# Step1. Build stuff
if [[ "${TARGET_OS_lower}" == "android" ]]; then
cmake_install_executorch_libraries_for_android
cmake_build_llava_runner_for_android
elif [[ "${TARGET_OS_lower}" == "native" ]]; then
cmake_install_executorch_libraries
cmake_build_llava_runner
else
echo "Invalid TARGET_OS ($2): ${TARGET_OS}"
fi

# Step2. Generate the PTE
export_llava

# Step3. Run
prepare_image_tensor
run_and_verify
18 changes: 16 additions & 2 deletions examples/models/llava/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ project(llava)
# Duplicating options as root CMakeLists.txt
option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED "Build the optimized kernels" OFF)

# This is a temporary hack to get around Torch dep so we can test this on android
option(LLAVA_RUNNER_NO_TORCH_DUMMY_IMAGE "Hack option to feed dummy image to remove torch.load dep" OFF)

include(CMakeDependentOption)
#
# pthreadpool: build pthreadpool library. Disable on unsupported platforms
Expand Down Expand Up @@ -70,7 +73,14 @@ set(_common_include_directories ${EXECUTORCH_ROOT}/..)
set(gflags_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../third-party/gflags)
find_package(gflags REQUIRED)

find_package(Torch CONFIG REQUIRED)
# Avoid torch dep from torch.load()-ing the image.
# This is a temporary hack.
if(LLAVA_RUNNER_NO_TORCH_DUMMY_IMAGE)
add_definitions(-DLLAVA_NO_TORCH_DUMMY_IMAGE=1)
message("Buidling the runner without Torch, feeding a dummy image!")
else()
find_package(Torch CONFIG REQUIRED)
endif()
add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)

#
Expand All @@ -95,7 +105,11 @@ endif()
# llava_runner library
add_subdirectory(runner)

set(link_libraries gflags torch)
set(LINK_LIBS gflags)
if(NOT LLAVA_RUNNER_NO_TORCH_DUMMY_IMAGE)
list(APPEND LINK_LIBS torch)
endif()
set(link_libraries ${LINK_LIBS})
set(_srcs main.cpp)

if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED)
Expand Down
15 changes: 15 additions & 0 deletions examples/models/llava/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@

#include <executorch/examples/models/llava/runner/llava_runner.h>
#include <gflags/gflags.h>
#ifndef LLAVA_NO_TORCH_DUMMY_IMAGE
#include <torch/torch.h>
#else
#include <algorithm> // std::fill
#endif

#if defined(ET_USE_THREADPOOL)
#include <executorch/extension/threadpool/cpuinfo_utils.h>
Expand Down Expand Up @@ -80,6 +84,15 @@ int32_t main(int32_t argc, char** argv) {

// read image and resize the longest edge to 336
std::vector<uint8_t> image_data;

#ifdef LLAVA_NO_TORCH_DUMMY_IMAGE
// Work without torch using a random data
image_data.resize(3 * 240 * 336);
std::fill(image_data.begin(), image_data.end(), 0); // black
std::array<int32_t, 3> image_shape = {3, 240, 336};
std::vector<torch::executor::Image> images = {
{.data = image_data, .width = image_shape[2], .height = image_shape[1]}};
#else // LLAVA_NO_TORCH_DUMMY_IMAGE
// cv::Mat image = cv::imread(image_path, cv::IMREAD_COLOR);
// int longest_edge = std::max(image.rows, image.cols);
// float scale_factor = 336.0f / longest_edge;
Expand All @@ -102,6 +115,8 @@ int32_t main(int32_t argc, char** argv) {
{.data = image_data,
.width = static_cast<int32_t>(image_tensor.size(2)),
.height = static_cast<int32_t>(image_tensor.size(1))}};
#endif // LLAVA_NO_TORCH_DUMMY_IMAGE

// generate
runner.generate(std::move(images), prompt, seq_len);
return 0;
Expand Down

0 comments on commit a25db2f

Please sign in to comment.