Skip to content

Commit

Permalink
Add model exporting and inferencing steps into Llama runner cmake CI …
Browse files Browse the repository at this point in the history
…job (pytorch#2092)

Summary:
Pull Request resolved: pytorch#2092

As titled

Reviewed By: lucylq

Differential Revision: D54145292

fbshipit-source-id: ea1073f0d3f7b4ae4b4e6bdb591f7ba760065ab2
  • Loading branch information
larryliu0820 authored and facebook-github-bot committed Feb 27, 2024
1 parent 52dde47 commit f327e53
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 80 deletions.
82 changes: 64 additions & 18 deletions .ci/scripts/test_llama.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@
# LICENSE file in the root directory of this source tree.

set -exu
# shellcheck source=/dev/null
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"

MODEL_NAME=$1 # stories110M.pt
BUILD_TOOL=$2 # buck2
BUILD_TOOL=$2 # buck2 or cmake
DTYPE=$3 # fp16 or fp32

if [[ -z "${MODEL_NAME:-}" ]]; then
Expand All @@ -26,33 +28,58 @@ if [[ -z "${DTYPE:-}" ]]; then
exit 1
fi

which "${PYTHON_EXECUTABLE}"
if [[ -z "${BUCK:-}" ]]; then
BUCK=buck2
fi

# Check build tool.
if [[ "${BUILD_TOOL}" == "buck2" ]]; then
:
else
echo "Invalid build tool ${BUILD_TOOL}. Only buck2 is supported atm"
exit 1
if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
PYTHON_EXECUTABLE=python3
fi

which "${PYTHON_EXECUTABLE}"


cmake_install_executorch_libraries() {
echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
rm -rf cmake-out
retry cmake -DBUCK2="$BUCK" \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_BUILD_TYPE=Release \
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
-Bcmake-out .
cmake --build cmake-out -j9 --target install --config Release
}

cmake_build_llama_runner() {
echo "Building llama runner"
dir="examples/models/llama2"
retry cmake -DBUCK2="$BUCK" \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_BUILD_TYPE=Release \
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
-Bcmake-out/${dir} \
${dir}
cmake --build cmake-out/${dir} -j9 --config Release

}

cleanup_files() {
echo "Deleting downloaded and generated files"
rm "${MODEL_NAME}"
rm tokenizer.model
rm tokenizer.bin
rm "${EXPORTED_MODEL_NAME}"
rm result.txt
rm params.json
}

# Download and create artifacts.
PARAMS="params.json"
touch "${PARAMS}"
if [[ "${MODEL_NAME}" == "stories110M.pt" ]]; then
# Download stories110M.pt and tokenizer from Github
wget "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt"
wget "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.model"
# Create params.json file
echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps": 1e-05, "vocab_size": 32000}' > "${PARAMS}"
download_stories_model_artifacts
else
echo "Unsupported model name ${MODEL_NAME}"
exit 1
Expand All @@ -72,16 +99,35 @@ fi
# Export model.
EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}.pte"
echo "Exporting ${EXPORTED_MODEL_NAME}"
python3 -m examples.models.llama2.export_llama -c stories110M.pt -p "${PARAMS}" -d "${DTYPE}"
$PYTHON_EXECUTABLE -m examples.models.llama2.export_llama -c stories110M.pt -p "${PARAMS}" -d "${DTYPE}"

# Create tokenizer.bin.
echo "Creating tokenizer.bin"
buck2 run examples/models/llama2/tokenizer:tokenizer_py -- -t tokenizer.model -o tokenizer.bin
$PYTHON_EXECUTABLE -m examples.models.llama2.tokenizer.tokenizer -t tokenizer.model -o tokenizer.bin

# Run model.
echo "Running ${EXPORTED_MODEL_NAME} in portable mode"
RESULT=$(timeout 500s buck2 run examples/models/llama2:main -- --model_path="${EXPORTED_MODEL_NAME}" --tokenizer_path=tokenizer.bin --prompt="Once" --temperature=0) || true

RUNTIME_ARGS="--model_path=${EXPORTED_MODEL_NAME} --tokenizer_path=tokenizer.bin --prompt=Once --temperature=0 --seq_len=10"
# Check build tool.
echo "Running ${EXPORTED_MODEL_NAME} in portable mode"
if [[ "${BUILD_TOOL}" == "buck2" ]]; then
# Run model.
# shellcheck source=/dev/null
$BUCK run examples/models/llama2:main -- ${RUNTIME_ARGS} > result.txt
elif [[ "${BUILD_TOOL}" == "cmake" ]]; then
cmake_install_executorch_libraries
cmake_build_llama_runner
# Run llama runner
NOW=$(date +"%H:%M:%S")
echo "Starting to run llama runner at ${NOW}"
# shellcheck source=/dev/null
cmake-out/examples/models/llama2/llama_main ${RUNTIME_ARGS} > result.txt
NOW=$(date +"%H:%M:%S")
echo "Finished at ${NOW}"
else
echo "Invalid build tool ${BUILD_TOOL}. Only buck2 is supported atm"
exit 1
fi
RESULT=$(cat result.txt)
# Check results.
EXPECTED_PREFIX="Once upon a time,"
# Expected result - may take too long to generate:
Expand Down
9 changes: 9 additions & 0 deletions .ci/scripts/utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -131,3 +131,12 @@ cmake_install_executorch_lib() {
-Bcmake-out .
cmake --build cmake-out -j9 --target install --config Release
}

download_stories_model_artifacts() {
# Download stories110M.pt and tokenizer from Github
wget "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt"
wget "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.model"
# Create params.json file
touch params.json
echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps": 1e-05, "vocab_size": 32000}' > params.json
}
32 changes: 5 additions & 27 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,13 @@ jobs:
# Build and test ExecuTorch
PYTHON_EXECUTABLE=python bash .ci/scripts/test.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}" "${DEMO_BACKEND_DELEGATION}"
test-llama-linux:
name: test-llama-linux
test-llama-runner-linux:
name: test-llama-runner-linux
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
strategy:
matrix:
dtype: [fp16, fp32]
build-tool: [buck2, cmake]
fail-fast: false
with:
runner: linux.2xlarge
Expand All @@ -102,13 +103,14 @@ jobs:
conda activate "${CONDA_ENV}"
DTYPE=${{ matrix.dtype }}
BUILD_TOOL=${{ matrix.build-tool }}
# Setup executorch
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh buck2
# Install requirements for export_llama
PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
# Test llama2
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M.pt buck2 "${DTYPE}"
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M.pt "${BUILD_TOOL}" "${DTYPE}"
test-custom-ops-linux:
name: test-custom-ops-linux
Expand Down Expand Up @@ -213,27 +215,3 @@ jobs:
uses: ./.github/workflows/_unittest.yml
with:
docker-image: executorch-ubuntu-22.04-clang12

test-llama-runner-cmake:
name: test-llama-runner
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
strategy:
matrix:
include:
- build-tool: cmake
fail-fast: false
with:
runner: linux.2xlarge
docker-image: executorch-ubuntu-22.04-clang12
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 90
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"
BUILD_TOOL=${{ matrix.build-tool }}
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
# Test selective build
PYTHON_EXECUTABLE=python bash examples/models/llama2/test_llama_runner.sh "${BUILD_TOOL}"
36 changes: 1 addition & 35 deletions examples/models/llama2/test_llama_runner.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,38 +10,4 @@
# 2. Build llama runner binary
# 3. Run model with the llama runner binary with prompt
set -e
# shellcheck source=/dev/null
source "$(dirname "${BASH_SOURCE[0]}")/../../../.ci/scripts/utils.sh"

cmake_install_executorch_libraries() {
echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
rm -rf cmake-out
retry cmake -DBUCK2="$BUCK" \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_BUILD_TYPE=Release \
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
-Bcmake-out .
cmake --build cmake-out -j9 --target install --config Release
}

cmake_build_llama_runner() {
echo "Building llama runner"
dir="examples/models/llama2"
retry cmake -DBUCK2="$BUCK" \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_BUILD_TYPE=Release \
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
-Bcmake-out/${dir} \
${dir}
cmake --build cmake-out/${dir} -j9 --config Release

}

if [[ $1 == "cmake" ]];
then
cmake_install_executorch_libraries
cmake_build_llama_runner
# TODO(larryliu0820): export a model and verify the result
fi
bash "$(dirname "${BASH_SOURCE[0]}")/../../../.ci/scripts/test_llama.sh" "$@"

0 comments on commit f327e53

Please sign in to comment.