Port Memory Constraints over to OSS #125

Workflow file for this run

.github/workflows/android-perf.yml at de70b9b

	name: android-perf

	on:
	schedule:
	- cron: 0 0 * * *
	pull_request:
	paths:
	- .github/workflows/android-perf.yml
	- extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2
	push:
	branches:
	- main
	paths:
	- .github/workflows/android-perf.yml
	- extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2
	# Note: GitHub has an upper limit of 10 inputs
	workflow_dispatch:
	inputs:
	models:
	description: Models to be benchmarked
	required: false
	type: string
	default: stories110M
	devices:
	description: Target devices to run benchmark
	required: false
	type: string
	default: samsung_galaxy_s22
	delegates:
	description: Backend delegates
	required: false
	type: string
	default: xnnpack
	threadpool:
	description: Run with threadpool?
	required: false
	type: boolean
	default: false
	benchmark_configs:
	description: The list of configs used the benchmark
	required: false
	type: string
	workflow_call:
	inputs:
	models:
	description: Models to be benchmarked
	required: false
	type: string
	default: stories110M
	devices:
	description: Target devices to run benchmark
	required: false
	type: string
	default: samsung_galaxy_s22
	delegates:
	description: Backend delegates
	required: false
	type: string
	default: xnnpack
	threadpool:
	description: Run with threadpool?
	required: false
	type: boolean
	default: false
	benchmark_configs:
	description: The list of configs used the benchmark
	required: false
	type: string

	concurrency:
	group: ${{ github.workflow }}-${{ github.event.pull_request.number \|\| github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
	cancel-in-progress: true

	jobs:
	set-parameters:
	runs-on: linux.2xlarge
	outputs:
	models: ${{ steps.set-parameters.outputs.models }}
	devices: ${{ steps.set-parameters.outputs.devices }}
	delegates: ${{ steps.set-parameters.outputs.delegates }}
	steps:
	- name: Set parameters
	id: set-parameters
	shell: bash
	env:
	# Separate default values from the workflow dispatch. To ensure defaults are accessible
	# during scheduled runs and to provide flexibility for different defaults between
	# on-demand and periodic benchmarking.
	CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'stories110M,dl3,mv3,mv2,ic4,ic3,vit' \|\| 'stories110M' }}
	CRON_DEFAULT_DEVICES: samsung_galaxy_s22
	CRON_DEFAULT_DELEGATES: ${{ github.event_name == 'schedule' && 'xnnpack,qnn' \|\| 'xnnpack' }}
	run: \|
	set -ex
	MODELS="${{ inputs.models }}"
	if [ -z "$MODELS" ]; then
	MODELS="$CRON_DEFAULT_MODELS"
	fi
	DEVICES="${{ inputs.devices }}"
	if [ -z "$DEVICES" ]; then
	DEVICES="$CRON_DEFAULT_DEVICES"
	fi
	DELEGATES="${{ inputs.delegates }}"
	if [ -z "$DELEGATES" ]; then
	DELEGATES="$CRON_DEFAULT_DELEGATES"
	fi

	# Mapping devices to their corresponding device-pool-arn
	declare -A DEVICE_POOL_ARNS
	DEVICE_POOL_ARNS[samsung_galaxy_s22]="arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa"
	DEVICE_POOL_ARNS[samsung_galaxy_s24]="arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98f8788c-2e25-4a3c-8bb2-0d1e8897c0db"

	# Resolve device names with their corresponding ARNs
	if [[ ! $(echo "$DEVICES" \| jq empty 2>/dev/null) ]]; then
	DEVICES=$(echo "$DEVICES" \| jq -Rc 'split(",")')
	fi
	declare -a MAPPED_ARNS=()
	for DEVICE in $(echo "$DEVICES" \| jq -r '.[]'); do
	if [[ -z "${DEVICE_POOL_ARNS[$DEVICE]}" ]]; then
	echo "Error: No ARN found for device '$DEVICE'. Abort." >&2
	exit 1
	fi
	MAPPED_ARNS+=("${DEVICE_POOL_ARNS[$DEVICE]}")
	done

	echo "models=$(echo $MODELS \| jq -Rc 'split(",")')" >> $GITHUB_OUTPUT
	MAPPED_ARNS_JSON=$(printf '%s\n' "${MAPPED_ARNS[@]}" \| jq -R . \| jq -s .)
	echo "devices=$(echo "$MAPPED_ARNS_JSON" \| jq -c .)" >> $GITHUB_OUTPUT
	echo "delegates=$(echo $DELEGATES \| jq -Rc 'split(",")')" >> $GITHUB_OUTPUT

	prepare-test-specs:
	runs-on: linux.2xlarge
	needs: set-parameters
	strategy:
	matrix:
	model: ${{ fromJson(needs.set-parameters.outputs.models) }}
	delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }}
	fail-fast: false
	steps:
	- uses: actions/checkout@v3

	- name: Prepare the spec
	shell: bash
	working-directory: extension/benchmark/android/benchmark
	run: \|
	set -eux

	# The model will be exported in the next step to this S3 path
	MODEL_PATH="https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/model.zip"
	# We could write a script to properly use jinja here, but there is only one variable,
	# so let's just sed it
	sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' android-llm-device-farm-test-spec.yml.j2
	cp android-llm-device-farm-test-spec.yml.j2 android-llm-device-farm-test-spec.yml

	# Just print the test spec for debugging
	cat android-llm-device-farm-test-spec.yml

	- name: Upload the spec
	uses: seemethere/upload-artifact-s3@v5
	with:
	s3-bucket: gha-artifacts
	s3-prefix: \|
	${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}
	retention-days: 1
	if-no-files-found: error
	path: extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml

	export-models:
	name: export-models
	uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
	needs: set-parameters
	strategy:
	matrix:
	model: ${{ fromJson(needs.set-parameters.outputs.models) }}
	delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }}
	fail-fast: false
	with:
	runner: linux.4xlarge
	docker-image: executorch-ubuntu-22.04-qnn-sdk
	submodules: 'true'
	timeout: 60
	upload-artifact: android-models
	upload-artifact-to-s3: true
	script: \|
	# The generic Linux job chooses to use base env, not the one setup by the image
	echo "::group::Setting up dev environment"
	CONDA_ENV=$(conda env list --json \| jq -r ".envs \| .[-1]")
	conda activate "${CONDA_ENV}"
	if [[ ${{ matrix.delegate }} == "qnn" ]]; then
	PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
	PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
	fi
	PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
	ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded/${{ matrix.model }}_${{ matrix.delegate }}
	echo "::endgroup::"

	echo "::group::Exporting ${{ matrix.delegate }} model: ${{ matrix.model }}"
	BUILD_MODE="cmake"
	DTYPE="fp32"

	if [[ ${{ matrix.model }} =~ ^stories* ]]; then
	# Install requirements for export_llama
	PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
	# Test llama2
	if [[ ${{ matrix.delegate }} == "xnnpack" ]]; then
	DELEGATE_CONFIG="xnnpack+custom+qe"
	elif [[ ${{ matrix.delegate }} == "qnn" ]]; then
	DELEGATE_CONFIG="qnn"
	else
	echo "Unsupported delegate ${{ matrix.delegate }}"
	exit 1
	fi
	PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh \
	-model "${{ matrix.model }}" \
	-build_tool "${BUILD_MODE}" \
	-dtype "${DTYPE}" \
	-mode "${DELEGATE_CONFIG}" \
	-upload "${ARTIFACTS_DIR_NAME}"
	else
	PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh \
	"${{ matrix.model }}" \
	"${BUILD_MODE}" \
	"${{ matrix.delegate }}" \
	"${ARTIFACTS_DIR_NAME}"
	fi
	echo "::endgroup::"

	build-benchmark-app:
	name: build-benchmark-app
	uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
	needs: set-parameters
	with:
	runner: linux.2xlarge
	docker-image: executorch-ubuntu-22.04-clang12-android
	submodules: 'true'
	ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha \|\| github.sha }}
	timeout: 90
	upload-artifact: android-apps
	upload-artifact-to-s3: true
	script: \|
	set -eux

	# The generic Linux job chooses to use base env, not the one setup by the image
	CONDA_ENV=$(conda env list --json \| jq -r ".envs \| .[-1]")
	conda activate "${CONDA_ENV}"
	PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake
	export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded

	PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
	PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh

	export ANDROID_ABIS="arm64-v8a"
	PYTHON_EXECUTABLE=python EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.25.0.240728 bash build/build_android_llm_demo.sh ${ARTIFACTS_DIR_NAME}

	# Let's see how expensive this job is, we might want to tone it down by running it periodically
	benchmark-on-device:
	if: always()
	permissions:
	id-token: write
	contents: read
	uses: pytorch/test-infra/.github/workflows/mobile_job.yml@main
	needs:
	- set-parameters
	- prepare-test-specs
	- build-benchmark-app
	- export-models
	strategy:
	matrix:
	model: ${{ fromJson(needs.set-parameters.outputs.models) }}
	delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }}
	device: ${{ fromJson(needs.set-parameters.outputs.devices) }}
	fail-fast: false
	with:
	# Due to scheduling a job may be pushed beyond the default 60m threshold
	timeout: 120
	device-type: android
	runner: linux.2xlarge
	test-infra-ref: ''
	# This is the ARN of ExecuTorch project on AWS
	project-arn: arn:aws:devicefarm:us-west-2:308535385114:project:02a2cf0f-6d9b-45ee-ba1a-a086587469e6
	device-pool-arn: ${{ matrix.device }}
	android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/minibench/app-debug.apk
	android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/minibench/app-debug-androidTest.apk
	test-spec: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/android-llm-device-farm-test-spec.yml

	upload-benchmark-results:
	needs:
	- benchmark-on-device
	if: always()
	runs-on: linux.2xlarge
	environment: upload-benchmark-results
	permissions:
	id-token: write
	contents: read
	steps:
	- uses: actions/checkout@v3
	with:
	submodules: false

	- name: Authenticate with AWS
	uses: aws-actions/configure-aws-credentials@v4
	with:
	role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results
	# The max duration enforced by the server side
	role-duration-seconds: 18000
	aws-region: us-east-1

	- name: Setup conda
	uses: pytorch/test-infra/.github/actions/setup-miniconda@main
	with:
	python-version: '3.10'

	- name: Download the list of artifacts from S3
	env:
	ARTIFACTS_S3_DIR: s3://gha-artifacts/device_farm/${{ github.run_id }}/${{ github.run_attempt }}/artifacts/
	shell: bash
	run: \|
	set -eux
	${CONDA_RUN} python -mpip install awscli==1.32.18

	mkdir -p artifacts
	pushd artifacts
	${CONDA_RUN} aws s3 sync "${ARTIFACTS_S3_DIR}" .
	popd

	ls -lah artifacts

	- name: Extract the benchmark results JSON
	shell: bash
	run: \|
	set -eux

	mkdir -p benchmark-results

	for ARTIFACTS_BY_JOB in artifacts/*.json; do
	[ -f "${ARTIFACTS_BY_JOB}" ] \|\| break
	echo "${ARTIFACTS_BY_JOB}"
	${CONDA_RUN} python .github/scripts/extract_benchmark_results.py \
	--artifacts "${ARTIFACTS_BY_JOB}" \
	--output-dir benchmark-results \
	--repo ${{ github.repository }} \
	--head-branch ${{ github.head_ref \|\| github.ref_name }} \
	--workflow-name "${{ github.workflow }}" \
	--workflow-run-id ${{ github.run_id }} \
	--workflow-run-attempt ${{ github.run_attempt }}
	done

	for SCHEMA in v2 v3; do
	for BENCHMARK_RESULTS in benchmark-results/"${SCHEMA}"/*.json; do
	cat "${BENCHMARK_RESULTS}"
	echo
	done
	done

	# TODO (huydhn): Remove v2 schema once the benchmark dashboard finishes the migration
	- name: Upload the benchmark results (v2)
	uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
	with:
	benchmark-results-dir: benchmark-results/v2
	dry-run: false
	schema-version: v2

	- name: Upload the benchmark results (v3)
	uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
	with:
	benchmark-results-dir: benchmark-results/v3
	dry-run: false
	schema-version: v3
	github-token: ${{ secrets.GITHUB_TOKEN }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Port Memory Constraints over to OSS #125

Workflow file

Port Memory Constraints over to OSS #125

Jobs

Run details

Workflow file for this run