Skip to content

Arm backend: Fix memory mode for Ethos-U85 (#7416) #318

Arm backend: Fix memory mode for Ethos-U85 (#7416)

Arm backend: Fix memory mode for Ethos-U85 (#7416) #318

Workflow file for this run

name: trunk
on:
push:
branches:
- main
- release/*
tags:
- ciflow/trunk/*
pull_request:
paths:
- .ci/docker/ci_commit_pins/pytorch.txt
- .ci/scripts/**
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
cancel-in-progress: true
jobs:
gather-models:
runs-on: ubuntu-22.04
outputs:
models: ${{ steps.gather-models.outputs.models }}
steps:
- uses: actions/checkout@v3
with:
submodules: 'false'
- uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Extract the list of models to test
id: gather-models
run: |
set -eux
PYTHONPATH="${PWD}" python .ci/scripts/gather_test_models.py --target-os macos --event "${GITHUB_EVENT_NAME}"
test-models-macos:
name: test-models-macos
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
needs: gather-models
strategy:
matrix: ${{ fromJSON(needs.gather-models.outputs.models) }}
fail-fast: false
with:
runner: ${{ matrix.runner }}
python-version: '3.11'
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: ${{ matrix.timeout }}
script: |
MODEL_NAME=${{ matrix.model }}
BUILD_TOOL=${{ matrix.build-tool }}
BACKEND=${{ matrix.backend }}
DEMO_BACKEND_DELEGATION=${{ matrix.demo_backend_delegation }}
bash .ci/scripts/setup-conda.sh
# Setup MacOS dependencies as there is no Docker support on MacOS atm
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
# Build and test executorch
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}" "${DEMO_BACKEND_DELEGATION}"
test-custom-ops-macos:
name: test-custom-ops-macos
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
strategy:
matrix:
include:
- build-tool: cmake
fail-fast: false
with:
runner: macos-m1-stable
python-version: '3.11'
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
BUILD_TOOL=${{ matrix.build-tool }}
bash .ci/scripts/setup-conda.sh
# Setup MacOS dependencies as there is no Docker support on MacOS atm
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
# Build and test custom ops
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/portable/custom_ops/test_custom_ops.sh "${BUILD_TOOL}"
test-selective-build-macos:
name: test-selective-build-macos
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
strategy:
matrix:
include:
- build-tool: cmake
fail-fast: false
with:
runner: macos-m1-stable
python-version: '3.11'
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
BUILD_TOOL=${{ matrix.build-tool }}
bash .ci/scripts/setup-conda.sh
# Setup MacOS dependencies as there is no Docker support on MacOS atm
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
# Build and test selective build
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/selective_build/test_selective_build.sh "${BUILD_TOOL}"
test-demo-backend-delegation:
name: test-demo-backend-delegation
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
strategy:
matrix:
include:
- build-tool: buck2
- build-tool: cmake
fail-fast: false
with:
runner: linux.2xlarge
docker-image: executorch-ubuntu-22.04-clang12
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"
BUILD_TOOL=${{ matrix.build-tool }}
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
# Test selective build
PYTHON_EXECUTABLE=python bash examples/portable/scripts/test_demo_backend_delegation.sh "${BUILD_TOOL}"
test-arm-backend-delegation:
name: test-arm-backend-delegation
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
runner: linux.2xlarge
docker-image: executorch-ubuntu-22.04-arm-sdk
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 90
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"
source .ci/scripts/utils.sh
install_executorch
install_arm
# Increase number of files user can monitor to bypass buck failures.
# Hopefully this is high enough for this setup.
sudo sysctl fs.inotify.max_user_watches=1048576 # 1024 * 1024
# Test ethos-u delegate examples with run.sh
PYTHON_EXECUTABLE=python bash examples/arm/run.sh examples/arm/ethos-u-scratch/
test-arm-reference-delegation:
name: test-arm-reference-delegation
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
runner: linux.2xlarge
docker-image: executorch-ubuntu-22.04-arm-sdk
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 90
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"
source .ci/scripts/utils.sh
install_executorch
install_arm
# Run arm unit tests
pytest -c /dev/null -v -n auto --cov=./ --cov-report=xml backends/arm/test
test-coreml-delegate:
name: test-coreml-delegate
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
with:
runner: macos-13-xlarge
python-version: '3.11'
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 90
script: |
BUILD_TOOL=cmake
bash .ci/scripts/setup-conda.sh
# Setup MacOS dependencies as there is no Docker support on MacOS atm
GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
# Build and test coreml delegate
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/build_all.sh
test-pybind-build-macos:
name: test-pybind-build-macos
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
strategy:
matrix:
include:
- build-tool: cmake
fail-fast: false
with:
runner: macos-m1-stable
python-version: '3.11'
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 180
script: |
bash .ci/scripts/setup-conda.sh
# build module for executorch.extension.pybindings.portable_lib
BUILD_TOOL=${{ matrix.build-tool }}
EXECUTORCH_BUILD_PYBIND=ON PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
# see if we can import the module successfully
${CONDA_RUN} python -c "from executorch.extension.pybindings import portable_lib; print('success!')"
test-llama-runner-macos:
name: test-llama-runner-mac
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
strategy:
matrix:
dtype: [fp32]
mode: [portable, xnnpack+kv+custom, mps, coreml, xnnpack+custom+quantize_kv]
include:
- dtype: bf16
mode: portable
- dtype: bf16
mode: custom
fail-fast: false
with:
runner: macos-m1-stable
python-version: '3.11'
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 900
script: |
DTYPE=${{ matrix.dtype }}
MODE=${{ matrix.mode }}
bash .ci/scripts/setup-conda.sh
# Setup executorch
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh cmake
if [[ "${MODE}" == "mps" ]]; then
# Install mps delegate
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/mps/install_requirements.sh
echo "Finishing installing mps."
elif [[ "${MODE}" == "coreml" ]]; then
# Install coreml delegate
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh
echo "Finishing installing coreml."
fi
# Install requirements for export_llama
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama/install_requirements.sh
# Test llama2
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh -model stories110M -build_tool cmake -dtype "${DTYPE}" -mode "${MODE}"
# # TODO(jackzhxng): Runner consistently runs out of memory before test finishes. Try to find a more powerful runner.
# test-llava-runner-macos:
# name: test-llava-runner-macos
# uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
# strategy:
# fail-fast: false
# with:
# runner: macos-14-xlarge
# python-version: '3.11'
# submodules: 'true'
# ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
# timeout: 900
# script: |
# BUILD_TOOL=cmake
# bash .ci/scripts/setup-conda.sh
# # Setup MacOS dependencies as there is no Docker support on MacOS atm
# GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
# # install Llava requirements
# ${CONDA_RUN} bash examples/models/llama/install_requirements.sh
# ${CONDA_RUN} bash examples/models/llava/install_requirements.sh
# # run python unittest
# ${CONDA_RUN} python -m unittest examples.models.llava.test.test_llava
# # run e2e (export, tokenizer and runner)
# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llava.sh
test-qnn-model:
name: test-qnn-model
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
strategy:
matrix:
dtype: [fp32]
model: [dl3, mv3, mv2, ic4, ic3, vit]
fail-fast: false
with:
runner: linux.2xlarge
docker-image: executorch-ubuntu-22.04-qnn-sdk
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 900
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn"
test-apple-model:
name: test-apple-model
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
strategy:
fail-fast: false
with:
runner: macos-m1-stable
python-version: '3.11'
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 90
script: |
BUILD_TOOL=cmake
bash .ci/scripts/setup-conda.sh
# Setup MacOS dependencies as there is no Docker support on MacOS atm
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh
echo "Finishing installing coreml."
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/mps/install_requirements.sh
echo "Finishing installing mps."
# Build and test coreml model
MODELS=(mv3 ic4 resnet50 edsr mobilebert w2l)
for MODEL_NAME in "${MODELS[@]}"; do
echo "::group::Exporting coreml model: $MODEL_NAME"
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "coreml"
echo "::endgroup::"
echo "::group::Exporting mps model: $MODEL_NAME"
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "mps"
echo "::endgroup::"
done
test-huggingface-transformers:
# NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
if: ${{ !github.event.pull_request.head.repo.fork }}
name: test-huggingface-transformers
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
secrets: inherit
strategy:
matrix:
hf_model_repo: [google/gemma-2-2b]
fail-fast: false
with:
secrets-env: EXECUTORCH_HF_TOKEN
runner: linux.2xlarge.memory
docker-image: executorch-ubuntu-22.04-clang12
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 90
script: |
echo "::group::Set up ExecuTorch"
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake
echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
rm -rf cmake-out
cmake \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_BUILD_TYPE=Release \
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-DEXECUTORCH_BUILD_XNNPACK=ON \
-DPYTHON_EXECUTABLE=python \
-Bcmake-out .
cmake --build cmake-out -j9 --target install --config Release
echo "Build llama runner"
dir="examples/models/llama"
cmake \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_BUILD_TYPE=Release \
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-DEXECUTORCH_BUILD_XNNPACK=ON \
-DPYTHON_EXECUTABLE=python \
-Bcmake-out/${dir} \
${dir}
cmake --build cmake-out/${dir} -j9 --config Release
echo "::endgroup::"
echo "::group::Set up HuggingFace Dependencies"
if [ -z "$SECRET_EXECUTORCH_HF_TOKEN" ]; then
echo "::error::SECRET_EXECUTORCH_HF_TOKEN is empty. For security reason secrets won't be accessible on forked PRs. Please make sure you submit a non-forked PR."
exit 1
fi
pip install -U "huggingface_hub[cli]"
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
pip install accelerate sentencepiece
pip list
echo "::endgroup::"
echo "::group::Export to ExecuTorch"
TOKENIZER_FILE=tokenizer.model
TOKENIZER_BIN_FILE=tokenizer.bin
ET_MODEL_NAME=et_model
DOWNLOADED_TOKENIZER_FILE_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${{ matrix.hf_model_repo }}" --files "${TOKENIZER_FILE}")
if [ -f "$DOWNLOADED_TOKENIZER_FILE_PATH/$TOKENIZER_FILE" ]; then
echo "${TOKENIZER_FILE} downloaded successfully at: $DOWNLOADED_TOKENIZER_FILE_PATH"
python -m extension.llm.tokenizer.tokenizer -t "$DOWNLOADED_TOKENIZER_FILE_PATH/$TOKENIZER_FILE" -o ./${TOKENIZER_BIN_FILE}
ls ./tokenizer.bin
else
echo "Failed to download ${TOKENIZER_FILE} from ${{ matrix.hf_model_repo }}."
exit 1
fi
python -m extension.export_util.export_hf_model -hfm=${{ matrix.hf_model_repo }} -o ${ET_MODEL_NAME}
cmake-out/examples/models/llama/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is"
echo "::endgroup::"
test-llama-runner-qnn-linux:
name: test-llama-runner-qnn-linux
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
strategy:
matrix:
dtype: [fp32]
pt2e_quantize: [qnn_16a16w, qnn_8a8w]
mode: [qnn]
fail-fast: false
with:
runner: linux.2xlarge
docker-image: executorch-ubuntu-22.04-qnn-sdk
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 900
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"
BUILD_TOOL="cmake"
DTYPE=${{ matrix.dtype }}
MODE=${{ matrix.mode }}
PT2E_QUANTIZE=${{ matrix.pt2e_quantize }}
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
# Setup executorch
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
# Install requirements for export_llama
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
# Test llama2
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"