From f327e53b3bb176a9a0bceab1bb94d6d754acedc5 Mon Sep 17 00:00:00 2001
From: Mengwei Liu <larryliu@meta.com>
Date: Mon, 26 Feb 2024 20:45:03 -0800
Subject: [PATCH] Add model exporting and inferencing steps into Llama runner
 cmake CI job (#2092)

Summary:
Pull Request resolved: https://github.com/pytorch/executorch/pull/2092

As titled

Reviewed By: lucylq

Differential Revision: D54145292

fbshipit-source-id: ea1073f0d3f7b4ae4b4e6bdb591f7ba760065ab2
---
 .ci/scripts/test_llama.sh                   | 82 ++++++++++++++++-----
 .ci/scripts/utils.sh                        |  9 +++
 .github/workflows/pull.yml                  | 32 ++------
 examples/models/llama2/test_llama_runner.sh | 36 +--------
 4 files changed, 79 insertions(+), 80 deletions(-)

diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh
index 038ec1c643..bd7be4495a 100644
--- a/.ci/scripts/test_llama.sh
+++ b/.ci/scripts/test_llama.sh
@@ -6,9 +6,11 @@
 # LICENSE file in the root directory of this source tree.
 
 set -exu
+# shellcheck source=/dev/null
+source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
 
 MODEL_NAME=$1 # stories110M.pt
-BUILD_TOOL=$2 # buck2
+BUILD_TOOL=$2 # buck2 or cmake
 DTYPE=$3 # fp16 or fp32
 
 if [[ -z "${MODEL_NAME:-}" ]]; then
@@ -26,33 +28,58 @@ if [[ -z "${DTYPE:-}" ]]; then
   exit 1
 fi
 
-which "${PYTHON_EXECUTABLE}"
+if [[ -z "${BUCK:-}" ]]; then
+  BUCK=buck2
+fi
 
-# Check build tool.
-if [[ "${BUILD_TOOL}" == "buck2" ]]; then
-  :
-else
-  echo "Invalid build tool ${BUILD_TOOL}. Only buck2 is supported atm"
-  exit 1
+if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
+  PYTHON_EXECUTABLE=python3
 fi
 
+which "${PYTHON_EXECUTABLE}"
+
+
+cmake_install_executorch_libraries() {
+    echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
+    rm -rf cmake-out
+    retry cmake -DBUCK2="$BUCK" \
+        -DCMAKE_INSTALL_PREFIX=cmake-out \
+        -DCMAKE_BUILD_TYPE=Release \
+        -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+        -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
+        -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
+        -Bcmake-out .
+    cmake --build cmake-out -j9 --target install --config Release
+}
+
+cmake_build_llama_runner() {
+    echo "Building llama runner"
+    dir="examples/models/llama2"
+    retry cmake -DBUCK2="$BUCK" \
+        -DCMAKE_INSTALL_PREFIX=cmake-out \
+        -DCMAKE_BUILD_TYPE=Release \
+        -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
+        -Bcmake-out/${dir} \
+        ${dir}
+    cmake --build cmake-out/${dir} -j9 --config Release
+
+}
+
 cleanup_files() {
   echo "Deleting downloaded and generated files"
   rm "${MODEL_NAME}"
   rm tokenizer.model
   rm tokenizer.bin
   rm "${EXPORTED_MODEL_NAME}"
+  rm result.txt
+  rm params.json
 }
 
 # Download and create artifacts.
 PARAMS="params.json"
 touch "${PARAMS}"
 if [[ "${MODEL_NAME}" == "stories110M.pt" ]]; then
-  # Download stories110M.pt and tokenizer from Github
-  wget "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt"
-  wget "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.model"
-  # Create params.json file
-  echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps": 1e-05, "vocab_size": 32000}' > "${PARAMS}"
+  download_stories_model_artifacts
 else
   echo "Unsupported model name ${MODEL_NAME}"
   exit 1
@@ -72,16 +99,35 @@ fi
 # Export model.
 EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}.pte"
 echo "Exporting ${EXPORTED_MODEL_NAME}"
-python3 -m examples.models.llama2.export_llama -c stories110M.pt -p "${PARAMS}" -d "${DTYPE}"
+$PYTHON_EXECUTABLE -m examples.models.llama2.export_llama -c stories110M.pt -p "${PARAMS}" -d "${DTYPE}"
 
 # Create tokenizer.bin.
 echo "Creating tokenizer.bin"
-buck2 run examples/models/llama2/tokenizer:tokenizer_py -- -t tokenizer.model -o tokenizer.bin
+$PYTHON_EXECUTABLE -m examples.models.llama2.tokenizer.tokenizer -t tokenizer.model -o tokenizer.bin
 
-# Run model.
-echo "Running ${EXPORTED_MODEL_NAME} in portable mode"
-RESULT=$(timeout 500s buck2 run examples/models/llama2:main -- --model_path="${EXPORTED_MODEL_NAME}" --tokenizer_path=tokenizer.bin --prompt="Once" --temperature=0) || true
 
+RUNTIME_ARGS="--model_path=${EXPORTED_MODEL_NAME} --tokenizer_path=tokenizer.bin --prompt=Once --temperature=0 --seq_len=10"
+# Check build tool.
+echo "Running ${EXPORTED_MODEL_NAME} in portable mode"
+if [[ "${BUILD_TOOL}" == "buck2" ]]; then
+  # Run model.
+  # shellcheck source=/dev/null
+  $BUCK run examples/models/llama2:main -- ${RUNTIME_ARGS} > result.txt
+elif [[ "${BUILD_TOOL}" == "cmake" ]]; then
+  cmake_install_executorch_libraries
+  cmake_build_llama_runner
+  # Run llama runner
+  NOW=$(date +"%H:%M:%S")
+  echo "Starting to run llama runner at ${NOW}"
+  # shellcheck source=/dev/null
+  cmake-out/examples/models/llama2/llama_main ${RUNTIME_ARGS} > result.txt
+  NOW=$(date +"%H:%M:%S")
+  echo "Finished at ${NOW}"
+else
+  echo "Invalid build tool ${BUILD_TOOL}. Only buck2 is supported atm"
+  exit 1
+fi
+RESULT=$(cat result.txt)
 # Check results.
 EXPECTED_PREFIX="Once upon a time,"
 # Expected result - may take too long to generate:
diff --git a/.ci/scripts/utils.sh b/.ci/scripts/utils.sh
index 2496bf6d02..04d3307220 100644
--- a/.ci/scripts/utils.sh
+++ b/.ci/scripts/utils.sh
@@ -131,3 +131,12 @@ cmake_install_executorch_lib() {
           -Bcmake-out .
   cmake --build cmake-out -j9 --target install --config Release
 }
+
+download_stories_model_artifacts() {
+    # Download stories110M.pt and tokenizer from Github
+  wget "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt"
+  wget "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.model"
+  # Create params.json file
+  touch params.json
+  echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps": 1e-05, "vocab_size": 32000}' > params.json
+}
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 6e16e8ba8d..7e4dba0b84 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -83,12 +83,13 @@ jobs:
         # Build and test ExecuTorch
         PYTHON_EXECUTABLE=python bash .ci/scripts/test.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}" "${DEMO_BACKEND_DELEGATION}"
 
-  test-llama-linux:
-    name: test-llama-linux
+  test-llama-runner-linux:
+    name: test-llama-runner-linux
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     strategy:
       matrix:
         dtype: [fp16, fp32]
+        build-tool: [buck2, cmake]
       fail-fast: false
     with:
       runner: linux.2xlarge
@@ -102,13 +103,14 @@ jobs:
         conda activate "${CONDA_ENV}"
 
         DTYPE=${{ matrix.dtype }}
+        BUILD_TOOL=${{ matrix.build-tool }}
 
         # Setup executorch
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh buck2
         # Install requirements for export_llama
         PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
         # Test llama2
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M.pt buck2 "${DTYPE}"
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M.pt "${BUILD_TOOL}" "${DTYPE}"
 
   test-custom-ops-linux:
     name: test-custom-ops-linux
@@ -213,27 +215,3 @@ jobs:
     uses: ./.github/workflows/_unittest.yml
     with:
       docker-image: executorch-ubuntu-22.04-clang12
-
-  test-llama-runner-cmake:
-    name: test-llama-runner
-    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
-    strategy:
-      matrix:
-        include:
-          - build-tool: cmake
-      fail-fast: false
-    with:
-      runner: linux.2xlarge
-      docker-image: executorch-ubuntu-22.04-clang12
-      submodules: 'true'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        BUILD_TOOL=${{ matrix.build-tool }}
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
-        # Test selective build
-        PYTHON_EXECUTABLE=python bash examples/models/llama2/test_llama_runner.sh "${BUILD_TOOL}"
diff --git a/examples/models/llama2/test_llama_runner.sh b/examples/models/llama2/test_llama_runner.sh
index b522c53c89..d0c44518ab 100644
--- a/examples/models/llama2/test_llama_runner.sh
+++ b/examples/models/llama2/test_llama_runner.sh
@@ -10,38 +10,4 @@
 # 2. Build llama runner binary
 # 3. Run model with the llama runner binary with prompt
 set -e
-# shellcheck source=/dev/null
-source "$(dirname "${BASH_SOURCE[0]}")/../../../.ci/scripts/utils.sh"
-
-cmake_install_executorch_libraries() {
-    echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
-    rm -rf cmake-out
-    retry cmake -DBUCK2="$BUCK" \
-        -DCMAKE_INSTALL_PREFIX=cmake-out \
-        -DCMAKE_BUILD_TYPE=Release \
-        -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-        -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-        -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
-        -Bcmake-out .
-    cmake --build cmake-out -j9 --target install --config Release
-}
-
-cmake_build_llama_runner() {
-    echo "Building llama runner"
-    dir="examples/models/llama2"
-    retry cmake -DBUCK2="$BUCK" \
-        -DCMAKE_INSTALL_PREFIX=cmake-out \
-        -DCMAKE_BUILD_TYPE=Release \
-        -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
-        -Bcmake-out/${dir} \
-        ${dir}
-    cmake --build cmake-out/${dir} -j9 --config Release
-
-}
-
-if [[ $1 == "cmake" ]];
-then
-    cmake_install_executorch_libraries
-    cmake_build_llama_runner
-    # TODO(larryliu0820): export a model and verify the result
-fi
+bash "$(dirname "${BASH_SOURCE[0]}")/../../../.ci/scripts/test_llama.sh" "$@"