From a25db2f9548dbd1d58021f8e3b0742e85eb840fb Mon Sep 17 00:00:00 2001 From: Digant Desai Date: Fri, 6 Sep 2024 14:58:27 -0500 Subject: [PATCH] [WIP][Llava] Add support to cross compile llava_runner for Android Differential Revision: D62281425 Pull Request resolved: https://github.com/pytorch/executorch/pull/5108 --- .ci/scripts/test_llava.sh | 144 ++++++++++++++++++++------- examples/models/llava/CMakeLists.txt | 18 +++- examples/models/llava/main.cpp | 15 +++ 3 files changed, 139 insertions(+), 38 deletions(-) diff --git a/.ci/scripts/test_llava.sh b/.ci/scripts/test_llava.sh index 90a2afa11f..7dc6d15e40 100644 --- a/.ci/scripts/test_llava.sh +++ b/.ci/scripts/test_llava.sh @@ -9,48 +9,97 @@ set -exu # shellcheck source=/dev/null BUILD_TYPE=${1:-Debug} +TARGET_OS=${2:-Native} +BUILD_DIR=${3:-cmake-out} -echo "Building with BUILD_TYPE: $BUILD_TYPE" +echo "Building with BUILD_TYPE: $BUILD_TYPE, TARGET_OS: $TARGET_OS, BUILD_DIR: $BUILD_DIR" if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then - PYTHON_EXECUTABLE=python3 + PYTHON_EXECUTABLE=python3 fi +TARGET_OS_lower="$(echo "${TARGET_OS}" | awk '{print tolower($0)}')" +if [[ "${TARGET_OS_lower}" == "android" ]]; then + if [[ -z "${ANDROID_NDK}" ]]; then + echo "Set ANDROID_NDK environment variable to build for Android." + exit 1 + fi +fi + +# Number of processes for a parallel build +NPROC=8 +if hash nproc &> /dev/null; then NPROC=$(nproc); fi + +EXECUTORCH_COMMON_CMAKE_ARGS=" \ + -DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \ + -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ + -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ + -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ + -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ + -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ + -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ + -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ + -DEXECUTORCH_BUILD_XNNPACK=ON \ + -DEXECUTORCH_DO_NOT_USE_CXX11_ABI=ON \ + -DEXECUTORCH_XNNPACK_SHARED_WORKSPACE=ON" + cmake_install_executorch_libraries() { - cmake \ - -DCMAKE_INSTALL_PREFIX=cmake-out \ - -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ - -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ - -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ - -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ - -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ - -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ - -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ - -DEXECUTORCH_BUILD_XNNPACK=ON \ - -DEXECUTORCH_DO_NOT_USE_CXX11_ABI=ON \ - -DEXECUTORCH_XNNPACK_SHARED_WORKSPACE=ON \ - -Bcmake-out . - - - cmake --build cmake-out -j9 --target install --config ${BUILD_TYPE} + cmake \ + ${EXECUTORCH_COMMON_CMAKE_ARGS} \ + -B${BUILD_DIR} . + + cmake --build ${BUILD_DIR} -j${NPROC} --target install --config ${BUILD_TYPE} +} + +cmake_install_executorch_libraries_for_android() { + cmake \ + -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \ + -DANDROID_ABI=arm64-v8a \ + -DANDROID_PLATFORM=android-23 \ + ${EXECUTORCH_COMMON_CMAKE_ARGS} \ + -B${BUILD_DIR} . + + cmake --build ${BUILD_DIR} -j${NPROC} --target install --config ${BUILD_TYPE} } + +LLAVA_COMMON_CMAKE_ARGS=" \ + -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \ + -DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \ + -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ + -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ + -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ + -DEXECUTORCH_BUILD_XNNPACK=ON" + cmake_build_llava_runner() { dir=examples/models/llava python_lib=$($PYTHON_EXECUTABLE -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())') - cmake \ - -DCMAKE_INSTALL_PREFIX=cmake-out \ - -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ - -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ - -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ - -DEXECUTORCH_BUILD_XNNPACK=ON \ - -DCMAKE_PREFIX_PATH="$python_lib" \ - -Bcmake-out/${dir} \ + cmake \ + ${LLAVA_COMMON_CMAKE_ARGS} \ + -DCMAKE_PREFIX_PATH="$python_lib" \ + -B${BUILD_DIR}/${dir} \ ${dir} + cmake --build ${BUILD_DIR}/${dir} -j${NPROC} --config ${BUILD_TYPE} +} + - cmake --build cmake-out/${dir} -j9 --config ${BUILD_TYPE} +cmake_build_llava_runner_for_android() { + dir=examples/models/llava + python_lib=$($PYTHON_EXECUTABLE -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())') + + cmake \ + -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \ + -DANDROID_ABI=arm64-v8a \ + -DANDROID_PLATFORM=android-23 \ + ${LLAVA_COMMON_CMAKE_ARGS} \ + -DCMAKE_PREFIX_PATH="$python_lib" \ + -DLLAVA_RUNNER_NO_TORCH_DUMMY_IMAGE=ON \ + -B${BUILD_DIR}/${dir} \ + ${dir} + + cmake --build ${BUILD_DIR}/${dir} -j${NPROC} --config ${BUILD_TYPE} } # only export the one without custom op for now since it's @@ -81,13 +130,24 @@ run_and_verify() { echo "tokenizer.bin is missing." exit 1 fi - RUNTIME_ARGS="--model_path=llava.pte \ - --tokenizer_path=tokenizer.bin \ - --image_path=image.pt \ - --prompt=ASSISTANT: \ - --temperature=0 \ - --seq_len=650" - cmake-out/examples/models/llava/llava_main ${RUNTIME_ARGS} > result.txt + + + + RUNTIME_ARGS="--model_path=llava.pte \ + --tokenizer_path=tokenizer.bin \ + --image_path=image.pt \ + --prompt=ASSISTANT: \ + --temperature=0 \ + --seq_len=650" + + if [[ "${TARGET_OS_lower}" == "android" ]]; then + echo "Transfer relevant files to the phone via ADB and run llava_main with following args," + echo "$ llava_main ${RUNTIME_ARGS} " + exit 0; + fi + + ${BUILD_DIR}/examples/models/llava/llava_main ${RUNTIME_ARGS} > result.txt + # verify result.txt RESULT=$(cat result.txt) # set the expected prefix to be the same as prompt because there's a bug in sdpa_with_kv_cache that causes tokens. @@ -110,8 +170,20 @@ run_and_verify() { fi } -cmake_install_executorch_libraries -cmake_build_llava_runner +# Step1. Build stuff +if [[ "${TARGET_OS_lower}" == "android" ]]; then + cmake_install_executorch_libraries_for_android + cmake_build_llava_runner_for_android +elif [[ "${TARGET_OS_lower}" == "native" ]]; then + cmake_install_executorch_libraries + cmake_build_llava_runner +else + echo "Invalid TARGET_OS ($2): ${TARGET_OS}" +fi + +# Step2. Generate the PTE export_llava + +# Step3. Run prepare_image_tensor run_and_verify diff --git a/examples/models/llava/CMakeLists.txt b/examples/models/llava/CMakeLists.txt index 444f6b3389..c36e39a04c 100644 --- a/examples/models/llava/CMakeLists.txt +++ b/examples/models/llava/CMakeLists.txt @@ -21,6 +21,9 @@ project(llava) # Duplicating options as root CMakeLists.txt option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED "Build the optimized kernels" OFF) +# This is a temporary hack to get around Torch dep so we can test this on android +option(LLAVA_RUNNER_NO_TORCH_DUMMY_IMAGE "Hack option to feed dummy image to remove torch.load dep" OFF) + include(CMakeDependentOption) # # pthreadpool: build pthreadpool library. Disable on unsupported platforms @@ -70,7 +73,14 @@ set(_common_include_directories ${EXECUTORCH_ROOT}/..) set(gflags_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../third-party/gflags) find_package(gflags REQUIRED) -find_package(Torch CONFIG REQUIRED) +# Avoid torch dep from torch.load()-ing the image. +# This is a temporary hack. +if(LLAVA_RUNNER_NO_TORCH_DUMMY_IMAGE) + add_definitions(-DLLAVA_NO_TORCH_DUMMY_IMAGE=1) + message("Buidling the runner without Torch, feeding a dummy image!") +else() + find_package(Torch CONFIG REQUIRED) +endif() add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) # @@ -95,7 +105,11 @@ endif() # llava_runner library add_subdirectory(runner) -set(link_libraries gflags torch) +set(LINK_LIBS gflags) +if(NOT LLAVA_RUNNER_NO_TORCH_DUMMY_IMAGE) + list(APPEND LINK_LIBS torch) +endif() +set(link_libraries ${LINK_LIBS}) set(_srcs main.cpp) if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED) diff --git a/examples/models/llava/main.cpp b/examples/models/llava/main.cpp index 171eb77077..53f6329b4d 100644 --- a/examples/models/llava/main.cpp +++ b/examples/models/llava/main.cpp @@ -8,7 +8,11 @@ #include #include +#ifndef LLAVA_NO_TORCH_DUMMY_IMAGE #include +#else +#include // std::fill +#endif #if defined(ET_USE_THREADPOOL) #include @@ -80,6 +84,15 @@ int32_t main(int32_t argc, char** argv) { // read image and resize the longest edge to 336 std::vector image_data; + +#ifdef LLAVA_NO_TORCH_DUMMY_IMAGE + // Work without torch using a random data + image_data.resize(3 * 240 * 336); + std::fill(image_data.begin(), image_data.end(), 0); // black + std::array image_shape = {3, 240, 336}; + std::vector images = { + {.data = image_data, .width = image_shape[2], .height = image_shape[1]}}; +#else // LLAVA_NO_TORCH_DUMMY_IMAGE // cv::Mat image = cv::imread(image_path, cv::IMREAD_COLOR); // int longest_edge = std::max(image.rows, image.cols); // float scale_factor = 336.0f / longest_edge; @@ -102,6 +115,8 @@ int32_t main(int32_t argc, char** argv) { {.data = image_data, .width = static_cast(image_tensor.size(2)), .height = static_cast(image_tensor.size(1))}}; +#endif // LLAVA_NO_TORCH_DUMMY_IMAGE + // generate runner.generate(std::move(images), prompt, seq_len); return 0;