Enable composable benchmark configs for flexible model+device+optimiz… #109

Workflow file for this run

.github/workflows/apple-perf.yml at 62016d6

	name: apple-perf

	on:
	schedule:
	- cron: 0 1 * * *
	pull_request:
	paths:
	- .github/workflows/apple-perf.yml
	- extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml.j2
	push:
	branches:
	- main
	paths:
	- .github/workflows/apple-perf.yml
	- extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml.j2
	# Note: GitHub has an upper limit of 10 inputs
	workflow_dispatch:
	inputs:
	models:
	description: Models to be benchmarked
	required: false
	type: string
	default: stories110M
	devices:
	description: Target devices to run benchmark
	required: false
	type: string
	default: apple_iphone_15
	benchmark_configs:
	description: The list of configs used the benchmark
	required: false
	type: string
	workflow_call:
	inputs:
	models:
	description: Models to be benchmarked
	required: false
	type: string
	default: stories110M
	devices:
	description: Target devices to run benchmark
	required: false
	type: string
	default: apple_iphone_15
	benchmark_configs:
	description: The list of configs used the benchmark
	required: false
	type: string

	concurrency:
	group: ${{ github.workflow }}-${{ github.event.pull_request.number \|\| github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
	cancel-in-progress: true

	jobs:
	set-parameters:
	runs-on: ubuntu-22.04
	outputs:
	benchmark_configs: ${{ steps.set-parameters.outputs.benchmark_configs }}
	steps:
	- uses: actions/checkout@v3
	with:
	submodules: 'false'
	- uses: actions/setup-python@v4
	with:
	python-version: '3.10'
	- name: Set parameters
	id: set-parameters
	shell: bash
	env:
	# Separate default values from the workflow dispatch. To ensure defaults are accessible
	# during scheduled runs and to provide flexibility for different defaults between
	# on-demand and periodic benchmarking.
	CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'llama,mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8' \|\| 'llama' }}
	CRON_DEFAULT_DEVICES: apple_iphone_15
	run: \|
	set -eux
	MODELS="${{ inputs.models }}"
	if [ -z "$MODELS" ]; then
	MODELS="$CRON_DEFAULT_MODELS"
	fi
	DEVICES="${{ inputs.devices }}"
	if [ -z "$DEVICES" ]; then
	DEVICES="$CRON_DEFAULT_DEVICES"
	fi

	PYTHONPATH="${PWD}" python .ci/scripts/gather_benchmark_configs.py \
	--os "ios" \
	--models $MODELS \
	--devices $DEVICES

	echo "benchmark_configs is: ${{ steps.set-parameters.outputs.benchmark_configs }}"

	prepare-test-specs:
	runs-on: linux.2xlarge
	needs: set-parameters
	strategy:
	matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_configs) }}
	fail-fast: false
	steps:
	- uses: actions/checkout@v3

	- name: Prepare the spec
	shell: bash
	working-directory: extension/benchmark/apple/Benchmark
	run: \|
	set -eux

	echo "DEBUG: ${{ matrix.model }}"
	# The model will be exported in the next step to this S3 path
	MODEL_PATH="https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}/model.zip"
	# We could write a script to properly use jinja here, but there is only one variable,
	# so let's just sed it
	sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' default-ios-device-farm-appium-test-spec.yml.j2
	cp default-ios-device-farm-appium-test-spec.yml.j2 default-ios-device-farm-appium-test-spec.yml
	# Just print the test spec for debugging
	cat default-ios-device-farm-appium-test-spec.yml

	- name: Upload the spec
	uses: seemethere/upload-artifact-s3@v5
	with:
	s3-bucket: gha-artifacts
	s3-prefix: \|
	${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}
	retention-days: 1
	if-no-files-found: error
	path: extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml

	export-models:
	name: export-models
	uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
	needs: set-parameters
	secrets: inherit
	strategy:
	matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_configs) }}
	fail-fast: false
	with:
	# NB: Need to use our AWS MacOS runner to upload large models to S3
	runner: macos-m1-stable
	python-version: '3.11'
	submodules: 'true'
	timeout: 60
	upload-artifact: ios-models
	upload-artifact-to-s3: true
	secrets-env: EXECUTORCH_HF_TOKEN
	script: \|
	set -eux

	echo "::group::Setting up CI environment"
	.ci/scripts/setup-conda.sh

	BUILD_TOOL=cmake
	# Setup MacOS dependencies as there is no Docker support on MacOS atm
	GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
	.ci/scripts/setup-macos.sh "${BUILD_TOOL}"

	if [[ ${{ matrix.config }} == "coreml" ]]; then
	PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
	backends/apple/coreml/scripts/install_requirements.sh
	fi

	if [[ ${{ matrix.config }} == "mps" ]]; then
	PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
	backends/apple/mps/install_requirements.sh
	fi

	# Install requirements for export_llama
	PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama/install_requirements.sh

	pip install -U "huggingface_hub[cli]"
	huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
	${CONDA_RUN} pip install accelerate sentencepiece
	pip list

	ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded/${{ matrix.model }}_${{ matrix.config }}
	echo "::endgroup::"

	echo "::group::Exporting ${{ matrix.config }} model: ${{ matrix.model }}"
	BUILD_MODE="cmake"

	if [[ ${{ matrix.model }} =~ ^[^/]+/[^/]+$ ]]; then
	# HuggingFace model. Assume the pattern is always like "<org>/<repo>"
	HF_MODEL_REPO=${{ matrix.model }}
	OUT_ET_MODEL_NAME="$(echo "$HF_MODEL_REPO" \| awk -F'/' '{print $2}' \| sed 's/_/-/g' \| tr '[:upper:]' '[:lower:]')_${{ matrix.config }}"

	if [[ "$HF_MODEL_REPO" == meta-llama/* ]]; then
	# Llama models on Hugging Face
	if [[ ${{ matrix.config }} == "llama3_spinquant" ]]; then
	# SpinQuant
	# Download prequantized chceckpoint from Hugging Face
	DOWNLOADED_PATH=$(
	bash .ci/scripts/download_hf_hub.sh \
	--model_id "${HF_MODEL_REPO}" \
	--files "tokenizer.model" "params.json" "consolidated.00.pth"
	)
	# Export using ExecuTorch's model definition
	${CONDA_RUN} python -m examples.models.llama.export_llama \
	--model "llama3_2" \
	--checkpoint "${DOWNLOADED_PATH}/consolidated.00.pth" \
	--params "${DOWNLOADED_PATH}/params.json" \
	--use_sdpa_with_kv_cache \
	-X \
	--xnnpack-extended-ops \
	--preq_mode 8da4w_output_8da8w \
	--preq_group_size 32 \
	--max_seq_length 2048 \
	--output_name "${OUT_ET_MODEL_NAME}.pte" \
	-kv \
	-d fp32 \
	--preq_embedding_quantize 8,0 \
	--use_spin_quant native \
	--metadata '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}'
	ls -lh "${OUT_ET_MODEL_NAME}.pte"
	elif [[ ${{ matrix.config }} == "llama3_qlora" ]]; then
	# QAT + LoRA
	# Download prequantized chceckpoint from Hugging Face
	DOWNLOADED_PATH=$(
	bash .ci/scripts/download_hf_hub.sh \
	--model_id "${HF_MODEL_REPO}" \
	--files "tokenizer.model" "params.json" "consolidated.00.pth"
	)
	# Export using ExecuTorch's model definition
	${CONDA_RUN} python -m examples.models.llama.export_llama \
	--model "llama3_2" \
	--checkpoint "${DOWNLOADED_PATH}/consolidated.00.pth" \
	--params "${DOWNLOADED_PATH}/params.json" \
	-qat \
	-lora 16 \
	--preq_mode 8da4w_output_8da8w \
	--preq_group_size 32 \
	--preq_embedding_quantize 8,0 \
	--use_sdpa_with_kv_cache \
	-kv \
	-X \
	--xnnpack-extended-ops \
	-d fp32 \
	--max_seq_length 2048 \
	--output_name "${OUT_ET_MODEL_NAME}.pte" \
	--metadata '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}'
	ls -lh "${OUT_ET_MODEL_NAME}.pte"
	elif [[ ${{ matrix.config }} == "llama3_fb16" ]]; then
	# Original BF16 version, without any quantization
	DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "original" --files "tokenizer.model" "params.json" "consolidated.00.pth")
	${CONDA_RUN} python -m examples.models.llama.export_llama \
	--model "llama3_2" \
	--checkpoint "${DOWNLOADED_PATH}/consolidated.00.pth" \
	--params "${DOWNLOADED_PATH}/params.json" \
	-kv \
	--use_sdpa_with_kv_cache \
	-X \
	-d bf16 \
	--metadata '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}' \
	--output_name="${OUT_ET_MODEL_NAME}.pte"
	ls -lh "${OUT_ET_MODEL_NAME}.pte"
	elif [[ ${{ matrix.config }} == "llama3_coreml_ane" ]]; then
	# ANE
	DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "original" --files "tokenizer.model" "params.json" "consolidated.00.pth")
	${CONDA_RUN} python -m examples.models.llama.export_llama \
	--checkpoint "${DOWNLOADED_PATH}/consolidated.00.pth" \
	--params "${DOWNLOADED_PATH}/params.json" \
	-E "4,32" \
	-kv \
	--disable_dynamic_shape \
	--coreml \
	--coreml-ios 17 \
	--coreml-quantize c4w \
	--coreml-compute-units cpu_and_ne \
	--output_name="${OUT_ET_MODEL_NAME}.pte"
	ls -lh "${OUT_ET_MODEL_NAME}.pte"
	else
	# By default, test with the Hugging Face model and the xnnpack recipe
	DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "original" --files "tokenizer.model")
	${CONDA_RUN} python -m extension.export_util.export_hf_model -hfm="$HF_MODEL_REPO" -o "$OUT_ET_MODEL_NAME"
	ls -lh "${OUT_ET_MODEL_NAME}.pte"
	fi
	else
	echo "Unsupported model ${{ matrix.model }}"
	exit 1
	fi

	zip -j model.zip "${OUT_ET_MODEL_NAME}.pte" "${DOWNLOADED_PATH}/tokenizer.model"
	ls -lh model.zip
	mkdir -p "${ARTIFACTS_DIR_NAME}"
	mv model.zip "${ARTIFACTS_DIR_NAME}"
	elif [[ ${{ matrix.model }} == "llama" ]]; then
	# Install requirements for export_llama
	PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
	bash examples/models/llama/install_requirements.sh

	# Test llama2
	if [[ ${{ matrix.config }} == "xnnpack" ]]; then
	DELEGATE_CONFIG="xnnpack+custom+qe"
	elif [[ ${{ matrix.config }} == "coreml" ]]; then
	DELEGATE_CONFIG="coreml"
	elif [[ ${{ matrix.config }} == "mps" ]]; then
	DELEGATE_CONFIG="mps"
	fi
	DTYPE="fp32"
	PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
	bash .ci/scripts/test_llama.sh \
	-model "stories110M" \
	-build_tool "${BUILD_MODE}" \
	-dtype "${DTYPE}" \
	-mode "${DELEGATE_CONFIG}" \
	-upload "${ARTIFACTS_DIR_NAME}"
	else
	PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
	bash .ci/scripts/test_model.sh \
	"${{ matrix.model }}" \
	"${BUILD_MODE}" \
	"${{ matrix.config }}" \
	"${ARTIFACTS_DIR_NAME}"
	fi
	echo "::endgroup::"

	build-benchmark-app:
	name: build-benchmark-app
	uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
	needs:
	- set-parameters
	secrets: inherit
	with:
	runner: macos-latest-xlarge
	python-version: '3.11'
	submodules: 'true'
	ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha \|\| github.sha }}
	upload-artifact: ios-apps
	secrets-env: BUILD_CERTIFICATE_BASE64 EXECUTORCH_BENCHMARK_BUILD_PROVISION_PROFILE_BASE64 KEYCHAIN_PASSWORD
	timeout: 90
	script: \|
	set -eux

	echo "::group::Setting up CI environment"
	.ci/scripts/setup-conda.sh

	BUILD_TOOL=cmake
	# Setup MacOS dependencies as there is no Docker support on MacOS atm
	GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
	.ci/scripts/setup-macos.sh "${BUILD_TOOL}"
	export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded

	# Setup Apple certificate for iOS development
	BUILD_PROVISION_PROFILE_BASE64="${SECRET_EXECUTORCH_BENCHMARK_BUILD_PROVISION_PROFILE_BASE64}" \
	BUILD_CERTIFICATE_BASE64="${SECRET_BUILD_CERTIFICATE_BASE64}" \
	KEYCHAIN_PASSWORD="${SECRET_KEYCHAIN_PASSWORD}" \
	.ci/scripts/setup-ios.sh

	# Install CoreML Backend Requirements
	PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
	backends/apple/coreml/scripts/install_requirements.sh

	# Install MPS Backend Requirements
	PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
	backends/apple/mps/install_requirements.sh
	echo "::endgroup::"

	echo "::group::Build ExecuTorch iOS frameworks"
	FRAMEWORKS=(
	"executorch"
	"backend_coreml"
	"backend_mps"
	"backend_xnnpack"
	"kernels_custom"
	"kernels_optimized"
	"kernels_portable"
	"kernels_quantized"
	)

	# Build Release iOS Frameworks
	PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
	build/build_apple_frameworks.sh --coreml --custom --mps --optimized --portable --quantized --xnnpack

	mkdir -p extension/benchmark/apple/Benchmark/Frameworks
	for FRAMEWORK in "${FRAMEWORKS[@]}"; do (
	cp -r "cmake-out/${FRAMEWORK}.xcframework" extension/benchmark/apple/Benchmark/Frameworks/
	) done
	echo "::endgroup::"

	# NB: Although exported models can be copied to this directory and bundled together with the
	# app, we don't use this in CI and rely on AWS extra data parameter to make the model and the
	# tokenizer available to the benchmark. This decouples the app and the model. We just need to
	# create the directory here to pass the build
	mkdir -p extension/benchmark/apple/Benchmark/Models
	${CONDA_RUN} --no-capture-output \
	build/build_apple_llm_demo.sh ${ARTIFACTS_DIR_NAME}

	upload-benchmark-app:
	needs: build-benchmark-app
	runs-on: linux.2xlarge
	steps:
	- name: Download the apps from GitHub
	uses: actions/download-artifact@v3
	with:
	# The name here needs to match the name of the upload-artifact parameter
	name: ios-apps
	path: ${{ runner.temp }}/artifacts/

	- name: Verify the apps
	shell: bash
	working-directory: ${{ runner.temp }}/artifacts/
	run: \|
	ls -lah ./

	- name: Upload the apps to S3
	uses: seemethere/upload-artifact-s3@v5
	with:
	s3-bucket: gha-artifacts
	s3-prefix: \|
	${{ github.repository }}/${{ github.run_id }}/artifacts
	retention-days: 14
	if-no-files-found: ignore
	path: ${{ runner.temp }}/artifacts/

	benchmark-on-device:
	if: always()
	needs:
	- set-parameters
	- prepare-test-specs
	- upload-benchmark-app
	- export-models
	permissions:
	id-token: write
	contents: read
	uses: pytorch/test-infra/.github/workflows/mobile_job.yml@main
	strategy:
	matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_configs) }}
	fail-fast: false
	with:
	# Due to scheduling a job may be pushed beyond the default 60m threshold
	timeout: 120
	device-type: ios
	# For iOS testing, the runner just needs to call AWS Device Farm, so there is no need to run this on macOS
	runner: linux.2xlarge
	test-infra-ref: ''
	# This is the ARN of ExecuTorch project on AWS
	project-arn: arn:aws:devicefarm:us-west-2:308535385114:project:02a2cf0f-6d9b-45ee-ba1a-a086587469e6
	device-pool-arn: ${{ matrix.device_arn }}
	# Uploaded to S3 from the previous job
	ios-ipa-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/Benchmark.ipa
	ios-xctestrun-zip: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/Benchmark.xctestrun.zip
	test-spec: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}/default-ios-device-farm-appium-test-spec.yml

	upload-benchmark-results:
	needs:
	- benchmark-on-device
	if: always()
	runs-on: linux.2xlarge
	environment: upload-benchmark-results
	permissions:
	id-token: write
	contents: read
	steps:
	- uses: actions/checkout@v3
	with:
	submodules: false

	- name: Authenticate with AWS
	uses: aws-actions/configure-aws-credentials@v4
	with:
	role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results
	# The max duration enforced by the server side
	role-duration-seconds: 18000
	aws-region: us-east-1

	- name: Setup conda
	uses: pytorch/test-infra/.github/actions/setup-miniconda@main
	with:
	python-version: '3.10'

	- name: Download the list of artifacts from S3
	env:
	ARTIFACTS_S3_DIR: s3://gha-artifacts/device_farm/${{ github.run_id }}/${{ github.run_attempt }}/artifacts/
	shell: bash
	run: \|
	set -eux
	${CONDA_RUN} python -mpip install awscli==1.32.18

	mkdir -p artifacts
	pushd artifacts
	${CONDA_RUN} aws s3 sync "${ARTIFACTS_S3_DIR}" .
	popd

	ls -lah artifacts

	- name: Extract the benchmark results JSON
	shell: bash
	run: \|
	set -eux

	mkdir -p benchmark-results

	for ARTIFACTS_BY_JOB in artifacts/*.json; do
	[ -f "${ARTIFACTS_BY_JOB}" ] \|\| break
	echo "${ARTIFACTS_BY_JOB}"
	${CONDA_RUN} python .github/scripts/extract_benchmark_results.py \
	--artifacts "${ARTIFACTS_BY_JOB}" \
	--output-dir benchmark-results \
	--repo ${{ github.repository }} \
	--head-branch ${{ github.head_ref \|\| github.ref_name }} \
	--workflow-name "${{ github.workflow }}" \
	--workflow-run-id ${{ github.run_id }} \
	--workflow-run-attempt ${{ github.run_attempt }}
	done

	for SCHEMA in v2 v3; do
	for BENCHMARK_RESULTS in benchmark-results/"${SCHEMA}"/*.json; do
	cat "${BENCHMARK_RESULTS}"
	echo
	done
	done

	# TODO (huydhn): Remove v2 schema once the benchmark dashboard finishes the migration
	- name: Upload the benchmark results (v2)
	uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
	with:
	benchmark-results-dir: benchmark-results/v2
	dry-run: false
	schema-version: v2

	- name: Upload the benchmark results (v3)
	uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
	with:
	benchmark-results-dir: benchmark-results/v3
	dry-run: false
	schema-version: v3
	github-token: ${{ secrets.GITHUB_TOKEN }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Enable composable benchmark configs for flexible model+device+optimiz… #109

Workflow file

Enable composable benchmark configs for flexible model+device+optimiz… #109

Jobs

Run details

Workflow file for this run