[ET-VK] Introduce memory metadata tagging pass (#6669) #246
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: trunk | |
on: | |
push: | |
branches: | |
- main | |
- release/* | |
tags: | |
- ciflow/trunk/* | |
pull_request: | |
paths: | |
- .ci/docker/ci_commit_pins/pytorch.txt | |
- .ci/scripts/** | |
workflow_dispatch: | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} | |
cancel-in-progress: true | |
jobs: | |
gather-models: | |
runs-on: ubuntu-22.04 | |
outputs: | |
models: ${{ steps.gather-models.outputs.models }} | |
steps: | |
- uses: actions/checkout@v3 | |
with: | |
submodules: 'false' | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: '3.10' | |
- name: Extract the list of models to test | |
id: gather-models | |
run: | | |
set -eux | |
PYTHONPATH="${PWD}" python .ci/scripts/gather_test_models.py --target-os macos --event "${GITHUB_EVENT_NAME}" | |
test-models-macos: | |
name: test-models-macos | |
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
needs: gather-models | |
strategy: | |
matrix: ${{ fromJSON(needs.gather-models.outputs.models) }} | |
fail-fast: false | |
with: | |
runner: ${{ matrix.runner }} | |
python-version: '3.11' | |
submodules: 'true' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: ${{ matrix.timeout }} | |
script: | | |
MODEL_NAME=${{ matrix.model }} | |
BUILD_TOOL=${{ matrix.build-tool }} | |
BACKEND=${{ matrix.backend }} | |
DEMO_BACKEND_DELEGATION=${{ matrix.demo_backend_delegation }} | |
bash .ci/scripts/setup-conda.sh | |
# Setup MacOS dependencies as there is no Docker support on MacOS atm | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}" | |
# Build and test xecutorch | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}" "${DEMO_BACKEND_DELEGATION}" | |
test-custom-ops-macos: | |
name: test-custom-ops-macos | |
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
strategy: | |
matrix: | |
include: | |
- build-tool: cmake | |
fail-fast: false | |
with: | |
runner: macos-m1-stable | |
python-version: '3.11' | |
submodules: 'true' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
script: | | |
BUILD_TOOL=${{ matrix.build-tool }} | |
bash .ci/scripts/setup-conda.sh | |
# Setup MacOS dependencies as there is no Docker support on MacOS atm | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}" | |
# Build and test custom ops | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/portable/custom_ops/test_custom_ops.sh "${BUILD_TOOL}" | |
test-selective-build-macos: | |
name: test-selective-build-macos | |
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
strategy: | |
matrix: | |
include: | |
- build-tool: cmake | |
fail-fast: false | |
with: | |
runner: macos-m1-stable | |
python-version: '3.11' | |
submodules: 'true' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
script: | | |
BUILD_TOOL=${{ matrix.build-tool }} | |
bash .ci/scripts/setup-conda.sh | |
# Setup MacOS dependencies as there is no Docker support on MacOS atm | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}" | |
# Build and test selective build | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/selective_build/test_selective_build.sh "${BUILD_TOOL}" | |
test-demo-backend-delegation: | |
name: test-demo-backend-delegation | |
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main | |
strategy: | |
matrix: | |
include: | |
- build-tool: buck2 | |
- build-tool: cmake | |
fail-fast: false | |
with: | |
runner: linux.2xlarge | |
docker-image: executorch-ubuntu-22.04-clang12 | |
submodules: 'true' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
script: | | |
# The generic Linux job chooses to use base env, not the one setup by the image | |
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") | |
conda activate "${CONDA_ENV}" | |
BUILD_TOOL=${{ matrix.build-tool }} | |
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}" | |
# Test selective build | |
PYTHON_EXECUTABLE=python bash examples/portable/scripts/test_demo_backend_delegation.sh "${BUILD_TOOL}" | |
test-arm-backend-delegation: | |
name: test-arm-backend-delegation | |
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main | |
with: | |
runner: linux.2xlarge | |
docker-image: executorch-ubuntu-22.04-arm-sdk | |
submodules: 'true' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 90 | |
script: | | |
# The generic Linux job chooses to use base env, not the one setup by the image | |
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") | |
conda activate "${CONDA_ENV}" | |
source .ci/scripts/utils.sh | |
install_executorch | |
install_arm | |
# Increase number of files user can monitor to bypass buck failures. | |
# Hopefully this is high enough for this setup. | |
sudo sysctl fs.inotify.max_user_watches=1048576 # 1024 * 1024 | |
# Test ethos-u delegate examples with run.sh | |
PYTHON_EXECUTABLE=python bash examples/arm/run.sh examples/arm/ethos-u-scratch/ | |
test-arm-reference-delegation: | |
name: test-arm-reference-delegation | |
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main | |
with: | |
runner: linux.2xlarge | |
docker-image: executorch-ubuntu-22.04-arm-sdk | |
submodules: 'true' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 90 | |
script: | | |
# The generic Linux job chooses to use base env, not the one setup by the image | |
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") | |
conda activate "${CONDA_ENV}" | |
source .ci/scripts/utils.sh | |
install_executorch | |
install_arm | |
# Run arm unit tests | |
pytest -c /dev/null -v -n auto --cov=./ --cov-report=xml backends/arm/test | |
test-coreml-delegate: | |
name: test-coreml-delegate | |
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
with: | |
runner: macos-13-xlarge | |
python-version: '3.11' | |
submodules: 'true' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 90 | |
script: | | |
BUILD_TOOL=cmake | |
bash .ci/scripts/setup-conda.sh | |
# Setup MacOS dependencies as there is no Docker support on MacOS atm | |
GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}" | |
# Build and test coreml delegate | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/build_all.sh | |
test-pybind-build-macos: | |
name: test-pybind-build-macos | |
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
strategy: | |
matrix: | |
include: | |
- build-tool: cmake | |
fail-fast: false | |
with: | |
runner: macos-m1-stable | |
python-version: '3.11' | |
submodules: 'true' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 180 | |
script: | | |
bash .ci/scripts/setup-conda.sh | |
# build module for executorch.extension.pybindings.portable_lib | |
BUILD_TOOL=${{ matrix.build-tool }} | |
EXECUTORCH_BUILD_PYBIND=ON PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}" | |
# see if we can import the module successfully | |
${CONDA_RUN} python -c "from executorch.extension.pybindings import portable_lib; print('success!')" | |
test-llama-runner-macos: | |
name: test-llama-runner-mac | |
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
strategy: | |
matrix: | |
dtype: [fp32] | |
mode: [portable, xnnpack+kv+custom, mps, coreml] | |
include: | |
- dtype: bf16 | |
mode: portable | |
- dtype: bf16 | |
mode: custom | |
fail-fast: false | |
with: | |
runner: macos-m1-stable | |
python-version: '3.11' | |
submodules: 'true' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 900 | |
script: | | |
DTYPE=${{ matrix.dtype }} | |
MODE=${{ matrix.mode }} | |
bash .ci/scripts/setup-conda.sh | |
# Setup executorch | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh cmake | |
if [[ "${MODE}" == "mps" ]]; then | |
# Install mps delegate | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/mps/install_requirements.sh | |
echo "Finishing installing mps." | |
elif [[ "${MODE}" == "coreml" ]]; then | |
# Install coreml delegate | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh | |
echo "Finishing installing coreml." | |
fi | |
# Install requirements for export_llama | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama/install_requirements.sh | |
# Test llama2 | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh stories110M cmake "${DTYPE}" "${MODE}" | |
# # TODO(jackzhxng): Runner consistently runs out of memory before test finishes. Try to find a more powerful runner. | |
# test-llava-runner-macos: | |
# name: test-llava-runner-macos | |
# uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
# strategy: | |
# fail-fast: false | |
# with: | |
# runner: macos-14-xlarge | |
# python-version: '3.11' | |
# submodules: 'true' | |
# ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
# timeout: 900 | |
# script: | | |
# BUILD_TOOL=cmake | |
# bash .ci/scripts/setup-conda.sh | |
# # Setup MacOS dependencies as there is no Docker support on MacOS atm | |
# GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}" | |
# # install Llava requirements | |
# ${CONDA_RUN} bash examples/models/llama/install_requirements.sh | |
# ${CONDA_RUN} bash examples/models/llava/install_requirements.sh | |
# # run python unittest | |
# ${CONDA_RUN} python -m unittest examples.models.llava.test.test_llava | |
# # run e2e (export, tokenizer and runner) | |
# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llava.sh Release | |
test-qnn-model: | |
name: test-qnn-model | |
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main | |
strategy: | |
matrix: | |
dtype: [fp32] | |
model: [dl3, mv3, mv2, ic4, ic3, vit] | |
fail-fast: false | |
with: | |
runner: linux.2xlarge | |
docker-image: executorch-ubuntu-22.04-clang12-android | |
submodules: 'true' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 900 | |
script: | | |
# The generic Linux job chooses to use base env, not the one setup by the image | |
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") | |
conda activate "${CONDA_ENV}" | |
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake | |
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh | |
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh | |
PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn" | |
test-apple-model: | |
name: test-apple-model | |
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
strategy: | |
fail-fast: false | |
with: | |
runner: macos-m1-stable | |
python-version: '3.11' | |
submodules: 'true' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 90 | |
script: | | |
BUILD_TOOL=cmake | |
bash .ci/scripts/setup-conda.sh | |
# Setup MacOS dependencies as there is no Docker support on MacOS atm | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}" | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh | |
echo "Finishing installing coreml." | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/mps/install_requirements.sh | |
echo "Finishing installing mps." | |
# Build and test coreml model | |
MODELS=(mv3 ic4 resnet50 edsr mobilebert w2l) | |
for MODEL_NAME in "${MODELS[@]}"; do | |
echo "::group::Exporting coreml model: $MODEL_NAME" | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "coreml" | |
echo "::endgroup::" | |
echo "::group::Exporting mps model: $MODEL_NAME" | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "mps" | |
echo "::endgroup::" | |
done | |
test-huggingface-transformers: | |
name: test-huggingface-transformers | |
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main | |
secrets: inherit | |
strategy: | |
matrix: | |
hf_model_repo: [google/gemma-2b] | |
fail-fast: false | |
with: | |
secrets-env: EXECUTORCH_HF_TOKEN | |
runner: linux.12xlarge | |
docker-image: executorch-ubuntu-22.04-clang12 | |
submodules: 'true' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 90 | |
script: | | |
echo "::group::Set up ExecuTorch" | |
# The generic Linux job chooses to use base env, not the one setup by the image | |
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") | |
conda activate "${CONDA_ENV}" | |
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake | |
echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a" | |
rm -rf cmake-out | |
cmake \ | |
-DCMAKE_INSTALL_PREFIX=cmake-out \ | |
-DCMAKE_BUILD_TYPE=Release \ | |
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ | |
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ | |
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ | |
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ | |
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ | |
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ | |
-DEXECUTORCH_BUILD_XNNPACK=ON \ | |
-DPYTHON_EXECUTABLE=python \ | |
-Bcmake-out . | |
cmake --build cmake-out -j9 --target install --config Release | |
echo "Build llama runner" | |
dir="examples/models/llama" | |
cmake \ | |
-DCMAKE_INSTALL_PREFIX=cmake-out \ | |
-DCMAKE_BUILD_TYPE=Release \ | |
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ | |
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ | |
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ | |
-DEXECUTORCH_BUILD_XNNPACK=ON \ | |
-DPYTHON_EXECUTABLE=python \ | |
-Bcmake-out/${dir} \ | |
${dir} | |
cmake --build cmake-out/${dir} -j9 --config Release | |
echo "::endgroup::" | |
echo "::group::Set up HuggingFace Dependencies" | |
if [ -z "$SECRET_EXECUTORCH_HF_TOKEN" ]; then | |
echo "::error::SECRET_EXECUTORCH_HF_TOKEN is empty. For security reason secrets won't be accessible on forked PRs. Please make sure you submit a non-forked PR." | |
exit 1 | |
fi | |
pip install -U "huggingface_hub[cli]" | |
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN | |
pip install accelerate sentencepiece | |
# TODO(guangyang): Switch to use released transformers library after all required patches are included | |
pip install "git+https://github.com/huggingface/transformers.git@6cc4dfe3f1e8d421c6d6351388e06e9b123cbfe1" | |
pip list | |
echo "::endgroup::" | |
echo "::group::Export to ExecuTorch" | |
TOKENIZER_FILE=tokenizer.model | |
TOKENIZER_BIN_FILE=tokenizer.bin | |
ET_MODEL_NAME=et_model | |
# Fetch the file using a Python one-liner | |
DOWNLOADED_TOKENIZER_FILE_PATH=$(python -c " | |
from huggingface_hub import hf_hub_download | |
# Download the file from the Hugging Face Hub | |
downloaded_path = hf_hub_download( | |
repo_id='${{ matrix.hf_model_repo }}', | |
filename='${TOKENIZER_FILE}' | |
) | |
print(downloaded_path) | |
") | |
if [ -f "$DOWNLOADED_TOKENIZER_FILE_PATH" ]; then | |
echo "${TOKENIZER_FILE} downloaded successfully at: $DOWNLOADED_TOKENIZER_FILE_PATH" | |
python -m extension.llm.tokenizer.tokenizer -t $DOWNLOADED_TOKENIZER_FILE_PATH -o ./${TOKENIZER_BIN_FILE} | |
ls ./tokenizer.bin | |
else | |
echo "Failed to download ${TOKENIZER_FILE} from ${{ matrix.hf_model_repo }}." | |
exit 1 | |
fi | |
python -m extension.export_util.export_hf_model -hfm=${{ matrix.hf_model_repo }} -o ${ET_MODEL_NAME} | |
cmake-out/examples/models/llama/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is" | |
echo "::endgroup::" |