Arm backend: Fix memory mode for Ethos-U85 (#7416) #318
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: trunk | |
on: | |
push: | |
branches: | |
- main | |
- release/* | |
tags: | |
- ciflow/trunk/* | |
pull_request: | |
paths: | |
- .ci/docker/ci_commit_pins/pytorch.txt | |
- .ci/scripts/** | |
workflow_dispatch: | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} | |
cancel-in-progress: true | |
jobs: | |
gather-models: | |
runs-on: ubuntu-22.04 | |
outputs: | |
models: ${{ steps.gather-models.outputs.models }} | |
steps: | |
- uses: actions/checkout@v3 | |
with: | |
submodules: 'false' | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: '3.10' | |
- name: Extract the list of models to test | |
id: gather-models | |
run: | | |
set -eux | |
PYTHONPATH="${PWD}" python .ci/scripts/gather_test_models.py --target-os macos --event "${GITHUB_EVENT_NAME}" | |
test-models-macos: | |
name: test-models-macos | |
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
needs: gather-models | |
strategy: | |
matrix: ${{ fromJSON(needs.gather-models.outputs.models) }} | |
fail-fast: false | |
with: | |
runner: ${{ matrix.runner }} | |
python-version: '3.11' | |
submodules: 'true' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: ${{ matrix.timeout }} | |
script: | | |
MODEL_NAME=${{ matrix.model }} | |
BUILD_TOOL=${{ matrix.build-tool }} | |
BACKEND=${{ matrix.backend }} | |
DEMO_BACKEND_DELEGATION=${{ matrix.demo_backend_delegation }} | |
bash .ci/scripts/setup-conda.sh | |
# Setup MacOS dependencies as there is no Docker support on MacOS atm | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}" | |
# Build and test executorch | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}" "${DEMO_BACKEND_DELEGATION}" | |
test-custom-ops-macos: | |
name: test-custom-ops-macos | |
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
strategy: | |
matrix: | |
include: | |
- build-tool: cmake | |
fail-fast: false | |
with: | |
runner: macos-m1-stable | |
python-version: '3.11' | |
submodules: 'true' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
script: | | |
BUILD_TOOL=${{ matrix.build-tool }} | |
bash .ci/scripts/setup-conda.sh | |
# Setup MacOS dependencies as there is no Docker support on MacOS atm | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}" | |
# Build and test custom ops | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/portable/custom_ops/test_custom_ops.sh "${BUILD_TOOL}" | |
test-selective-build-macos: | |
name: test-selective-build-macos | |
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
strategy: | |
matrix: | |
include: | |
- build-tool: cmake | |
fail-fast: false | |
with: | |
runner: macos-m1-stable | |
python-version: '3.11' | |
submodules: 'true' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
script: | | |
BUILD_TOOL=${{ matrix.build-tool }} | |
bash .ci/scripts/setup-conda.sh | |
# Setup MacOS dependencies as there is no Docker support on MacOS atm | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}" | |
# Build and test selective build | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/selective_build/test_selective_build.sh "${BUILD_TOOL}" | |
test-demo-backend-delegation: | |
name: test-demo-backend-delegation | |
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main | |
strategy: | |
matrix: | |
include: | |
- build-tool: buck2 | |
- build-tool: cmake | |
fail-fast: false | |
with: | |
runner: linux.2xlarge | |
docker-image: executorch-ubuntu-22.04-clang12 | |
submodules: 'true' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
script: | | |
# The generic Linux job chooses to use base env, not the one setup by the image | |
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") | |
conda activate "${CONDA_ENV}" | |
BUILD_TOOL=${{ matrix.build-tool }} | |
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}" | |
# Test selective build | |
PYTHON_EXECUTABLE=python bash examples/portable/scripts/test_demo_backend_delegation.sh "${BUILD_TOOL}" | |
test-arm-backend-delegation: | |
name: test-arm-backend-delegation | |
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
with: | |
runner: linux.2xlarge | |
docker-image: executorch-ubuntu-22.04-arm-sdk | |
submodules: 'true' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 90 | |
script: | | |
# The generic Linux job chooses to use base env, not the one setup by the image | |
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") | |
conda activate "${CONDA_ENV}" | |
source .ci/scripts/utils.sh | |
install_executorch | |
install_arm | |
# Increase number of files user can monitor to bypass buck failures. | |
# Hopefully this is high enough for this setup. | |
sudo sysctl fs.inotify.max_user_watches=1048576 # 1024 * 1024 | |
# Test ethos-u delegate examples with run.sh | |
PYTHON_EXECUTABLE=python bash examples/arm/run.sh examples/arm/ethos-u-scratch/ | |
test-arm-reference-delegation: | |
name: test-arm-reference-delegation | |
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
with: | |
runner: linux.2xlarge | |
docker-image: executorch-ubuntu-22.04-arm-sdk | |
submodules: 'true' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 90 | |
script: | | |
# The generic Linux job chooses to use base env, not the one setup by the image | |
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") | |
conda activate "${CONDA_ENV}" | |
source .ci/scripts/utils.sh | |
install_executorch | |
install_arm | |
# Run arm unit tests | |
pytest -c /dev/null -v -n auto --cov=./ --cov-report=xml backends/arm/test | |
test-coreml-delegate: | |
name: test-coreml-delegate | |
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
with: | |
runner: macos-13-xlarge | |
python-version: '3.11' | |
submodules: 'true' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 90 | |
script: | | |
BUILD_TOOL=cmake | |
bash .ci/scripts/setup-conda.sh | |
# Setup MacOS dependencies as there is no Docker support on MacOS atm | |
GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}" | |
# Build and test coreml delegate | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/build_all.sh | |
test-pybind-build-macos: | |
name: test-pybind-build-macos | |
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
strategy: | |
matrix: | |
include: | |
- build-tool: cmake | |
fail-fast: false | |
with: | |
runner: macos-m1-stable | |
python-version: '3.11' | |
submodules: 'true' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 180 | |
script: | | |
bash .ci/scripts/setup-conda.sh | |
# build module for executorch.extension.pybindings.portable_lib | |
BUILD_TOOL=${{ matrix.build-tool }} | |
EXECUTORCH_BUILD_PYBIND=ON PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}" | |
# see if we can import the module successfully | |
${CONDA_RUN} python -c "from executorch.extension.pybindings import portable_lib; print('success!')" | |
test-llama-runner-macos: | |
name: test-llama-runner-mac | |
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
strategy: | |
matrix: | |
dtype: [fp32] | |
mode: [portable, xnnpack+kv+custom, mps, coreml, xnnpack+custom+quantize_kv] | |
include: | |
- dtype: bf16 | |
mode: portable | |
- dtype: bf16 | |
mode: custom | |
fail-fast: false | |
with: | |
runner: macos-m1-stable | |
python-version: '3.11' | |
submodules: 'true' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 900 | |
script: | | |
DTYPE=${{ matrix.dtype }} | |
MODE=${{ matrix.mode }} | |
bash .ci/scripts/setup-conda.sh | |
# Setup executorch | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh cmake | |
if [[ "${MODE}" == "mps" ]]; then | |
# Install mps delegate | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/mps/install_requirements.sh | |
echo "Finishing installing mps." | |
elif [[ "${MODE}" == "coreml" ]]; then | |
# Install coreml delegate | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh | |
echo "Finishing installing coreml." | |
fi | |
# Install requirements for export_llama | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama/install_requirements.sh | |
# Test llama2 | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh -model stories110M -build_tool cmake -dtype "${DTYPE}" -mode "${MODE}" | |
# # TODO(jackzhxng): Runner consistently runs out of memory before test finishes. Try to find a more powerful runner. | |
# test-llava-runner-macos: | |
# name: test-llava-runner-macos | |
# uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
# strategy: | |
# fail-fast: false | |
# with: | |
# runner: macos-14-xlarge | |
# python-version: '3.11' | |
# submodules: 'true' | |
# ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
# timeout: 900 | |
# script: | | |
# BUILD_TOOL=cmake | |
# bash .ci/scripts/setup-conda.sh | |
# # Setup MacOS dependencies as there is no Docker support on MacOS atm | |
# GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}" | |
# # install Llava requirements | |
# ${CONDA_RUN} bash examples/models/llama/install_requirements.sh | |
# ${CONDA_RUN} bash examples/models/llava/install_requirements.sh | |
# # run python unittest | |
# ${CONDA_RUN} python -m unittest examples.models.llava.test.test_llava | |
# # run e2e (export, tokenizer and runner) | |
# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llava.sh | |
test-qnn-model: | |
name: test-qnn-model | |
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main | |
strategy: | |
matrix: | |
dtype: [fp32] | |
model: [dl3, mv3, mv2, ic4, ic3, vit] | |
fail-fast: false | |
with: | |
runner: linux.2xlarge | |
docker-image: executorch-ubuntu-22.04-qnn-sdk | |
submodules: 'true' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 900 | |
script: | | |
# The generic Linux job chooses to use base env, not the one setup by the image | |
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") | |
conda activate "${CONDA_ENV}" | |
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake | |
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh | |
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh | |
PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn" | |
test-apple-model: | |
name: test-apple-model | |
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
strategy: | |
fail-fast: false | |
with: | |
runner: macos-m1-stable | |
python-version: '3.11' | |
submodules: 'true' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 90 | |
script: | | |
BUILD_TOOL=cmake | |
bash .ci/scripts/setup-conda.sh | |
# Setup MacOS dependencies as there is no Docker support on MacOS atm | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}" | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh | |
echo "Finishing installing coreml." | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/mps/install_requirements.sh | |
echo "Finishing installing mps." | |
# Build and test coreml model | |
MODELS=(mv3 ic4 resnet50 edsr mobilebert w2l) | |
for MODEL_NAME in "${MODELS[@]}"; do | |
echo "::group::Exporting coreml model: $MODEL_NAME" | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "coreml" | |
echo "::endgroup::" | |
echo "::group::Exporting mps model: $MODEL_NAME" | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "mps" | |
echo "::endgroup::" | |
done | |
test-huggingface-transformers: | |
# NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway | |
if: ${{ !github.event.pull_request.head.repo.fork }} | |
name: test-huggingface-transformers | |
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main | |
secrets: inherit | |
strategy: | |
matrix: | |
hf_model_repo: [google/gemma-2-2b] | |
fail-fast: false | |
with: | |
secrets-env: EXECUTORCH_HF_TOKEN | |
runner: linux.2xlarge.memory | |
docker-image: executorch-ubuntu-22.04-clang12 | |
submodules: 'true' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 90 | |
script: | | |
echo "::group::Set up ExecuTorch" | |
# The generic Linux job chooses to use base env, not the one setup by the image | |
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") | |
conda activate "${CONDA_ENV}" | |
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake | |
echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a" | |
rm -rf cmake-out | |
cmake \ | |
-DCMAKE_INSTALL_PREFIX=cmake-out \ | |
-DCMAKE_BUILD_TYPE=Release \ | |
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ | |
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ | |
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ | |
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ | |
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ | |
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ | |
-DEXECUTORCH_BUILD_XNNPACK=ON \ | |
-DPYTHON_EXECUTABLE=python \ | |
-Bcmake-out . | |
cmake --build cmake-out -j9 --target install --config Release | |
echo "Build llama runner" | |
dir="examples/models/llama" | |
cmake \ | |
-DCMAKE_INSTALL_PREFIX=cmake-out \ | |
-DCMAKE_BUILD_TYPE=Release \ | |
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ | |
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ | |
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ | |
-DEXECUTORCH_BUILD_XNNPACK=ON \ | |
-DPYTHON_EXECUTABLE=python \ | |
-Bcmake-out/${dir} \ | |
${dir} | |
cmake --build cmake-out/${dir} -j9 --config Release | |
echo "::endgroup::" | |
echo "::group::Set up HuggingFace Dependencies" | |
if [ -z "$SECRET_EXECUTORCH_HF_TOKEN" ]; then | |
echo "::error::SECRET_EXECUTORCH_HF_TOKEN is empty. For security reason secrets won't be accessible on forked PRs. Please make sure you submit a non-forked PR." | |
exit 1 | |
fi | |
pip install -U "huggingface_hub[cli]" | |
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN | |
pip install accelerate sentencepiece | |
pip list | |
echo "::endgroup::" | |
echo "::group::Export to ExecuTorch" | |
TOKENIZER_FILE=tokenizer.model | |
TOKENIZER_BIN_FILE=tokenizer.bin | |
ET_MODEL_NAME=et_model | |
DOWNLOADED_TOKENIZER_FILE_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${{ matrix.hf_model_repo }}" --files "${TOKENIZER_FILE}") | |
if [ -f "$DOWNLOADED_TOKENIZER_FILE_PATH/$TOKENIZER_FILE" ]; then | |
echo "${TOKENIZER_FILE} downloaded successfully at: $DOWNLOADED_TOKENIZER_FILE_PATH" | |
python -m extension.llm.tokenizer.tokenizer -t "$DOWNLOADED_TOKENIZER_FILE_PATH/$TOKENIZER_FILE" -o ./${TOKENIZER_BIN_FILE} | |
ls ./tokenizer.bin | |
else | |
echo "Failed to download ${TOKENIZER_FILE} from ${{ matrix.hf_model_repo }}." | |
exit 1 | |
fi | |
python -m extension.export_util.export_hf_model -hfm=${{ matrix.hf_model_repo }} -o ${ET_MODEL_NAME} | |
cmake-out/examples/models/llama/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is" | |
echo "::endgroup::" | |
test-llama-runner-qnn-linux: | |
name: test-llama-runner-qnn-linux | |
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main | |
strategy: | |
matrix: | |
dtype: [fp32] | |
pt2e_quantize: [qnn_16a16w, qnn_8a8w] | |
mode: [qnn] | |
fail-fast: false | |
with: | |
runner: linux.2xlarge | |
docker-image: executorch-ubuntu-22.04-qnn-sdk | |
submodules: 'true' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 900 | |
script: | | |
# The generic Linux job chooses to use base env, not the one setup by the image | |
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") | |
conda activate "${CONDA_ENV}" | |
BUILD_TOOL="cmake" | |
DTYPE=${{ matrix.dtype }} | |
MODE=${{ matrix.mode }} | |
PT2E_QUANTIZE=${{ matrix.pt2e_quantize }} | |
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh | |
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh | |
# Setup executorch | |
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}" | |
# Install requirements for export_llama | |
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh | |
# Test llama2 | |
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}" |