From f327e53b3bb176a9a0bceab1bb94d6d754acedc5 Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Mon, 26 Feb 2024 20:45:03 -0800 Subject: [PATCH] Add model exporting and inferencing steps into Llama runner cmake CI job (#2092) Summary: Pull Request resolved: https://github.com/pytorch/executorch/pull/2092 As titled Reviewed By: lucylq Differential Revision: D54145292 fbshipit-source-id: ea1073f0d3f7b4ae4b4e6bdb591f7ba760065ab2 --- .ci/scripts/test_llama.sh | 82 ++++++++++++++++----- .ci/scripts/utils.sh | 9 +++ .github/workflows/pull.yml | 32 ++------ examples/models/llama2/test_llama_runner.sh | 36 +-------- 4 files changed, 79 insertions(+), 80 deletions(-) diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh index 038ec1c643..bd7be4495a 100644 --- a/.ci/scripts/test_llama.sh +++ b/.ci/scripts/test_llama.sh @@ -6,9 +6,11 @@ # LICENSE file in the root directory of this source tree. set -exu +# shellcheck source=/dev/null +source "$(dirname "${BASH_SOURCE[0]}")/utils.sh" MODEL_NAME=$1 # stories110M.pt -BUILD_TOOL=$2 # buck2 +BUILD_TOOL=$2 # buck2 or cmake DTYPE=$3 # fp16 or fp32 if [[ -z "${MODEL_NAME:-}" ]]; then @@ -26,33 +28,58 @@ if [[ -z "${DTYPE:-}" ]]; then exit 1 fi -which "${PYTHON_EXECUTABLE}" +if [[ -z "${BUCK:-}" ]]; then + BUCK=buck2 +fi -# Check build tool. -if [[ "${BUILD_TOOL}" == "buck2" ]]; then - : -else - echo "Invalid build tool ${BUILD_TOOL}. Only buck2 is supported atm" - exit 1 +if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then + PYTHON_EXECUTABLE=python3 fi +which "${PYTHON_EXECUTABLE}" + + +cmake_install_executorch_libraries() { + echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a" + rm -rf cmake-out + retry cmake -DBUCK2="$BUCK" \ + -DCMAKE_INSTALL_PREFIX=cmake-out \ + -DCMAKE_BUILD_TYPE=Release \ + -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ + -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ + -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \ + -Bcmake-out . + cmake --build cmake-out -j9 --target install --config Release +} + +cmake_build_llama_runner() { + echo "Building llama runner" + dir="examples/models/llama2" + retry cmake -DBUCK2="$BUCK" \ + -DCMAKE_INSTALL_PREFIX=cmake-out \ + -DCMAKE_BUILD_TYPE=Release \ + -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \ + -Bcmake-out/${dir} \ + ${dir} + cmake --build cmake-out/${dir} -j9 --config Release + +} + cleanup_files() { echo "Deleting downloaded and generated files" rm "${MODEL_NAME}" rm tokenizer.model rm tokenizer.bin rm "${EXPORTED_MODEL_NAME}" + rm result.txt + rm params.json } # Download and create artifacts. PARAMS="params.json" touch "${PARAMS}" if [[ "${MODEL_NAME}" == "stories110M.pt" ]]; then - # Download stories110M.pt and tokenizer from Github - wget "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt" - wget "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.model" - # Create params.json file - echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps": 1e-05, "vocab_size": 32000}' > "${PARAMS}" + download_stories_model_artifacts else echo "Unsupported model name ${MODEL_NAME}" exit 1 @@ -72,16 +99,35 @@ fi # Export model. EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}.pte" echo "Exporting ${EXPORTED_MODEL_NAME}" -python3 -m examples.models.llama2.export_llama -c stories110M.pt -p "${PARAMS}" -d "${DTYPE}" +$PYTHON_EXECUTABLE -m examples.models.llama2.export_llama -c stories110M.pt -p "${PARAMS}" -d "${DTYPE}" # Create tokenizer.bin. echo "Creating tokenizer.bin" -buck2 run examples/models/llama2/tokenizer:tokenizer_py -- -t tokenizer.model -o tokenizer.bin +$PYTHON_EXECUTABLE -m examples.models.llama2.tokenizer.tokenizer -t tokenizer.model -o tokenizer.bin -# Run model. -echo "Running ${EXPORTED_MODEL_NAME} in portable mode" -RESULT=$(timeout 500s buck2 run examples/models/llama2:main -- --model_path="${EXPORTED_MODEL_NAME}" --tokenizer_path=tokenizer.bin --prompt="Once" --temperature=0) || true +RUNTIME_ARGS="--model_path=${EXPORTED_MODEL_NAME} --tokenizer_path=tokenizer.bin --prompt=Once --temperature=0 --seq_len=10" +# Check build tool. +echo "Running ${EXPORTED_MODEL_NAME} in portable mode" +if [[ "${BUILD_TOOL}" == "buck2" ]]; then + # Run model. + # shellcheck source=/dev/null + $BUCK run examples/models/llama2:main -- ${RUNTIME_ARGS} > result.txt +elif [[ "${BUILD_TOOL}" == "cmake" ]]; then + cmake_install_executorch_libraries + cmake_build_llama_runner + # Run llama runner + NOW=$(date +"%H:%M:%S") + echo "Starting to run llama runner at ${NOW}" + # shellcheck source=/dev/null + cmake-out/examples/models/llama2/llama_main ${RUNTIME_ARGS} > result.txt + NOW=$(date +"%H:%M:%S") + echo "Finished at ${NOW}" +else + echo "Invalid build tool ${BUILD_TOOL}. Only buck2 is supported atm" + exit 1 +fi +RESULT=$(cat result.txt) # Check results. EXPECTED_PREFIX="Once upon a time," # Expected result - may take too long to generate: diff --git a/.ci/scripts/utils.sh b/.ci/scripts/utils.sh index 2496bf6d02..04d3307220 100644 --- a/.ci/scripts/utils.sh +++ b/.ci/scripts/utils.sh @@ -131,3 +131,12 @@ cmake_install_executorch_lib() { -Bcmake-out . cmake --build cmake-out -j9 --target install --config Release } + +download_stories_model_artifacts() { + # Download stories110M.pt and tokenizer from Github + wget "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt" + wget "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.model" + # Create params.json file + touch params.json + echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps": 1e-05, "vocab_size": 32000}' > params.json +} diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 6e16e8ba8d..7e4dba0b84 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -83,12 +83,13 @@ jobs: # Build and test ExecuTorch PYTHON_EXECUTABLE=python bash .ci/scripts/test.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}" "${DEMO_BACKEND_DELEGATION}" - test-llama-linux: - name: test-llama-linux + test-llama-runner-linux: + name: test-llama-runner-linux uses: pytorch/test-infra/.github/workflows/linux_job.yml@main strategy: matrix: dtype: [fp16, fp32] + build-tool: [buck2, cmake] fail-fast: false with: runner: linux.2xlarge @@ -102,13 +103,14 @@ jobs: conda activate "${CONDA_ENV}" DTYPE=${{ matrix.dtype }} + BUILD_TOOL=${{ matrix.build-tool }} # Setup executorch PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh buck2 # Install requirements for export_llama PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh # Test llama2 - PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M.pt buck2 "${DTYPE}" + PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M.pt "${BUILD_TOOL}" "${DTYPE}" test-custom-ops-linux: name: test-custom-ops-linux @@ -213,27 +215,3 @@ jobs: uses: ./.github/workflows/_unittest.yml with: docker-image: executorch-ubuntu-22.04-clang12 - - test-llama-runner-cmake: - name: test-llama-runner - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main - strategy: - matrix: - include: - - build-tool: cmake - fail-fast: false - with: - runner: linux.2xlarge - docker-image: executorch-ubuntu-22.04-clang12 - submodules: 'true' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 90 - script: | - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - - BUILD_TOOL=${{ matrix.build-tool }} - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}" - # Test selective build - PYTHON_EXECUTABLE=python bash examples/models/llama2/test_llama_runner.sh "${BUILD_TOOL}" diff --git a/examples/models/llama2/test_llama_runner.sh b/examples/models/llama2/test_llama_runner.sh index b522c53c89..d0c44518ab 100644 --- a/examples/models/llama2/test_llama_runner.sh +++ b/examples/models/llama2/test_llama_runner.sh @@ -10,38 +10,4 @@ # 2. Build llama runner binary # 3. Run model with the llama runner binary with prompt set -e -# shellcheck source=/dev/null -source "$(dirname "${BASH_SOURCE[0]}")/../../../.ci/scripts/utils.sh" - -cmake_install_executorch_libraries() { - echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a" - rm -rf cmake-out - retry cmake -DBUCK2="$BUCK" \ - -DCMAKE_INSTALL_PREFIX=cmake-out \ - -DCMAKE_BUILD_TYPE=Release \ - -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ - -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ - -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \ - -Bcmake-out . - cmake --build cmake-out -j9 --target install --config Release -} - -cmake_build_llama_runner() { - echo "Building llama runner" - dir="examples/models/llama2" - retry cmake -DBUCK2="$BUCK" \ - -DCMAKE_INSTALL_PREFIX=cmake-out \ - -DCMAKE_BUILD_TYPE=Release \ - -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \ - -Bcmake-out/${dir} \ - ${dir} - cmake --build cmake-out/${dir} -j9 --config Release - -} - -if [[ $1 == "cmake" ]]; -then - cmake_install_executorch_libraries - cmake_build_llama_runner - # TODO(larryliu0820): export a model and verify the result -fi +bash "$(dirname "${BASH_SOURCE[0]}")/../../../.ci/scripts/test_llama.sh" "$@"