.github/workflows/apple-perf.yml

name: apple-perf

on:
  schedule:
    - cron: 0 1 * * *
  pull_request:
    paths:
      - .github/workflows/apple-perf.yml
      - extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml.j2
  push:
    branches:
      - main
    paths:
      - .github/workflows/apple-perf.yml
      - extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml.j2
  # Note: GitHub has an upper limit of 10 inputs
  workflow_dispatch:
    inputs:
      models:
        description: Models to be benchmarked
        required: false
        type: string
        default: stories110M
      devices:
        description: Target devices to run benchmark
        required: false
        type: string
        default: apple_iphone_15
      delegates:
        description: Backend delegates
        required: false
        type: string
        default: xnnpack
      benchmark_configs:
        description: The list of configs used the benchmark
        required: false
        type: string
  workflow_call:
    inputs:
      models:
        description: Models to be benchmarked
        required: false
        type: string
        default: stories110M
      devices:
        description: Target devices to run benchmark
        required: false
        type: string
        default: apple_iphone_15
      delegates:
        description: Backend delegates
        required: false
        type: string
        default: xnnpack
      benchmark_configs:
        description: The list of configs used the benchmark
        required: false
        type: string

concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
  cancel-in-progress: true

jobs:
  set-parameters:
    runs-on: linux.2xlarge
    outputs:
      models: ${{ steps.set-parameters.outputs.models }}
      devices: ${{ steps.set-parameters.outputs.devices }}
      delegates: ${{ steps.set-parameters.outputs.delegates }}
    steps:
      - name: Set parameters
        id: set-parameters
        shell: bash
        env:
          # Separate default values from the workflow dispatch. To ensure defaults are accessible
          # during scheduled runs and to provide flexibility for different defaults between
          # on-demand and periodic benchmarking.
          CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'stories110M,mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l' || 'stories110M' }}
          CRON_DEFAULT_DEVICES: apple_iphone_15
          CRON_DEFAULT_DELEGATES: ${{ github.event_name == 'schedule' && 'xnnpack,coreml,mps' || 'xnnpack' }}
        run: |
          set -ex
          MODELS="${{ inputs.models }}"
          if [ -z "$MODELS" ]; then
            MODELS="$CRON_DEFAULT_MODELS"
          fi
          DEVICES="${{ inputs.devices }}"
          if [ -z "$DEVICES" ]; then
            DEVICES="$CRON_DEFAULT_DEVICES"
          fi
          DELEGATES="${{ inputs.delegates }}"
          if [ -z "$DELEGATES" ]; then
            DELEGATES="$CRON_DEFAULT_DELEGATES"
          fi

          # Mapping devices to their corresponding device-pool-arn
          declare -A DEVICE_POOL_ARNS
          DEVICE_POOL_ARNS[apple_iphone_15]="arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/3b5acd2e-92e2-4778-b651-7726bafe129d"

          # Resolve device names with their corresponding ARNs
          if [[ ! $(echo "$DEVICES" | jq empty 2>/dev/null) ]]; then
            DEVICES=$(echo "$DEVICES" | jq -Rc 'split(",")')
          fi
          declare -a MAPPED_ARNS=()
          for DEVICE in $(echo "$DEVICES" | jq -r '.[]'); do
            if [[ -z "${DEVICE_POOL_ARNS[$DEVICE]}" ]]; then
              echo "Error: No ARN found for device '$DEVICE'. Abort." >&2
              exit 1
            fi
            MAPPED_ARNS+=("${DEVICE_POOL_ARNS[$DEVICE]}")
          done

          echo "models=$(echo $MODELS | jq -Rc 'split(",")')" >> $GITHUB_OUTPUT
          MAPPED_ARNS_JSON=$(printf '%s\n' "${MAPPED_ARNS[@]}" | jq -R . | jq -s .)
          echo "devices=$(echo "$MAPPED_ARNS_JSON" | jq -c .)" >> $GITHUB_OUTPUT
          echo "delegates=$(echo $DELEGATES | jq -Rc 'split(",")')" >> $GITHUB_OUTPUT

  prepare-test-specs:
    runs-on: linux.2xlarge
    needs: set-parameters
    strategy:
      matrix:
          model: ${{ fromJson(needs.set-parameters.outputs.models) }}
          delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }}
      fail-fast: false
    steps:
      - uses: actions/checkout@v3

      - name: Prepare the spec
        shell: bash
        working-directory: extension/benchmark/apple/Benchmark
        run: |
          set -eux
          # The model will be exported in the next step to this S3 path
          MODEL_PATH="https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/model.zip"
          # We could write a script to properly use jinja here, but there is only one variable,
          # so let's just sed it
          sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' default-ios-device-farm-appium-test-spec.yml.j2
          cp default-ios-device-farm-appium-test-spec.yml.j2 default-ios-device-farm-appium-test-spec.yml
          # Just print the test spec for debugging
          cat default-ios-device-farm-appium-test-spec.yml

      - name: Upload the spec
        uses: seemethere/upload-artifact-s3@v5
        with:
          s3-bucket: gha-artifacts
          s3-prefix: |
            ${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}
          retention-days: 1
          if-no-files-found: error
          path: extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml

  export-models:
    name: export-models
    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
    needs: set-parameters
    strategy:
      matrix:
          model: ${{ fromJson(needs.set-parameters.outputs.models) }}
          delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }}
      fail-fast: false
    with:
      # NB: Need to use our AWS MacOS runner to upload large models to S3
      runner: macos-m1-stable
      python-version: '3.11'
      submodules: 'true'
      timeout: 60
      upload-artifact: ios-models
      upload-artifact-to-s3: true
      script: |
        set -eux

        echo "::group::Setting up CI environment"
        .ci/scripts/setup-conda.sh

        BUILD_TOOL=cmake
        # Setup MacOS dependencies as there is no Docker support on MacOS atm
        GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
          .ci/scripts/setup-macos.sh "${BUILD_TOOL}"

        if [[ ${{ matrix.delegate }} == "coreml" ]]; then
          PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
            backends/apple/coreml/scripts/install_requirements.sh
        fi

        if [[ ${{ matrix.delegate }} == "mps" ]]; then
          PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
            backends/apple/mps/install_requirements.sh
        fi

        ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded/${{ matrix.model }}_${{ matrix.delegate }}
        echo "::endgroup::"

        echo "::group::Exporting ${{ matrix.delegate }} model: ${{ matrix.model }}"
        BUILD_MODE="cmake"
        DTYPE="fp32"

        if [[ ${{ matrix.model }} =~ ^stories* ]]; then
          # Install requirements for export_llama
          PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
            bash examples/models/llama/install_requirements.sh

          # Test llama2
          if [[ ${{ matrix.delegate }} == "xnnpack" ]]; then
            DELEGATE_CONFIG="xnnpack+custom+qe"
          elif [[ ${{ matrix.delegate }} == "coreml" ]]; then
            DELEGATE_CONFIG="coreml"
          elif [[ ${{ matrix.delegate }} == "mps" ]]; then
            DELEGATE_CONFIG="mps"
          fi
          PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
            bash .ci/scripts/test_llama.sh \
              -model "${{ matrix.model }}" \
              -build_tool "${BUILD_MODE}" \
              -dtype "${DTYPE}" \
              -mode "${DELEGATE_CONFIG}" \
              -upload "${ARTIFACTS_DIR_NAME}"
        else
          PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
            bash .ci/scripts/test_model.sh \
              "${{ matrix.model }}" \
              "${BUILD_MODE}" \
              "${{ matrix.delegate }}" \
              "${ARTIFACTS_DIR_NAME}"
        fi
        echo "::endgroup::"

  build-benchmark-app:
    name: build-benchmark-app
    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
    needs:
      - set-parameters
    secrets: inherit
    with:
      runner: macos-latest-xlarge
      python-version: '3.11'
      submodules: 'true'
      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
      upload-artifact: ios-apps
      secrets-env: BUILD_CERTIFICATE_BASE64 EXECUTORCH_BENCHMARK_BUILD_PROVISION_PROFILE_BASE64 KEYCHAIN_PASSWORD
      timeout: 90
      script: |
        set -eux

        echo "::group::Setting up CI environment"
        .ci/scripts/setup-conda.sh

        BUILD_TOOL=cmake
        # Setup MacOS dependencies as there is no Docker support on MacOS atm
        GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
        .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
        export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded

        # Setup Apple certificate for iOS development
        BUILD_PROVISION_PROFILE_BASE64="${SECRET_EXECUTORCH_BENCHMARK_BUILD_PROVISION_PROFILE_BASE64}" \
        BUILD_CERTIFICATE_BASE64="${SECRET_BUILD_CERTIFICATE_BASE64}" \
        KEYCHAIN_PASSWORD="${SECRET_KEYCHAIN_PASSWORD}" \
        .ci/scripts/setup-ios.sh

        # Install CoreML Backend Requirements
        PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
          backends/apple/coreml/scripts/install_requirements.sh

        # Install MPS Backend Requirements
        PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
          backends/apple/mps/install_requirements.sh
        echo "::endgroup::"

        echo "::group::Build ExecuTorch iOS frameworks"
        FRAMEWORKS=(
          "executorch"
          "backend_coreml"
          "backend_mps"
          "backend_xnnpack"
          "kernels_custom"
          "kernels_optimized"
          "kernels_portable"
          "kernels_quantized"
        )

        # Build Release iOS Frameworks
        PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
          build/build_apple_frameworks.sh --coreml --custom --mps --optimized --portable --quantized --xnnpack

        mkdir -p extension/benchmark/apple/Benchmark/Frameworks
        for FRAMEWORK in "${FRAMEWORKS[@]}"; do (
          cp -r "cmake-out/${FRAMEWORK}.xcframework" extension/benchmark/apple/Benchmark/Frameworks/
        ) done
        echo "::endgroup::"

        # NB: Although exported models can be copied to this directory and bundled together with the
        # app, we don't use this in CI and rely on AWS extra data parameter to make the model and the
        # tokenizer available to the benchmark. This decouples the app and the model. We just need to
        # create the directory here to pass the build
        mkdir -p extension/benchmark/apple/Benchmark/Models
        ${CONDA_RUN} --no-capture-output \
          build/build_apple_llm_demo.sh ${ARTIFACTS_DIR_NAME}

  upload-benchmark-app:
    needs: build-benchmark-app
    runs-on: linux.2xlarge
    steps:
      - name: Download the apps from GitHub
        uses: actions/download-artifact@v3
        with:
          # The name here needs to match the name of the upload-artifact parameter
          name: ios-apps
          path: ${{ runner.temp }}/artifacts/

      - name: Verify the apps
        shell: bash
        working-directory: ${{ runner.temp }}/artifacts/
        run: |
          ls -lah ./

      - name: Upload the apps to S3
        uses: seemethere/upload-artifact-s3@v5
        with:
          s3-bucket: gha-artifacts
          s3-prefix: |
            ${{ github.repository }}/${{ github.run_id }}/artifacts
          retention-days: 14
          if-no-files-found: ignore
          path: ${{ runner.temp }}/artifacts/

  benchmark-on-device:
    if: always()
    needs:
      - set-parameters
      - prepare-test-specs
      - upload-benchmark-app
      - export-models
    permissions:
      id-token: write
      contents: read
    uses: pytorch/test-infra/.github/workflows/mobile_job.yml@main
    strategy:
      matrix:
        model: ${{ fromJson(needs.set-parameters.outputs.models) }}
        delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }}
        device: ${{ fromJson(needs.set-parameters.outputs.devices) }}
      fail-fast: false
    with:
      # Due to scheduling a job may be pushed beyond the default 60m threshold
      timeout: 120
      device-type: ios
      # For iOS testing, the runner just needs to call AWS Device Farm, so there is no need to run this on macOS
      runner: linux.2xlarge
      test-infra-ref: ''
      # This is the ARN of ExecuTorch project on AWS
      project-arn: arn:aws:devicefarm:us-west-2:308535385114:project:02a2cf0f-6d9b-45ee-ba1a-a086587469e6
      device-pool-arn: ${{ matrix.device }}
      # Uploaded to S3 from the previous job
      ios-ipa-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/Benchmark.ipa
      ios-xctestrun-zip: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/Benchmark.xctestrun.zip
      test-spec: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/default-ios-device-farm-appium-test-spec.yml

  upload-benchmark-results:
    needs:
      - benchmark-on-device
    if: always()
    runs-on: linux.2xlarge
    environment: upload-benchmark-results
    permissions:
      id-token: write
      contents: read
    steps:
      - uses: actions/checkout@v3
        with:
          submodules: false

      - name: Authenticate with AWS
        uses: aws-actions/configure-aws-credentials@v4
        with:
          role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results
          # The max duration enforced by the server side
          role-duration-seconds: 18000
          aws-region: us-east-1

      - name: Setup conda
        uses: pytorch/test-infra/.github/actions/setup-miniconda@main
        with:
          python-version: '3.10'

      - name: Download the list of artifacts from S3
        env:
          ARTIFACTS_S3_DIR: s3://gha-artifacts/device_farm/${{ github.run_id }}/${{ github.run_attempt }}/artifacts/
        shell: bash
        run: |
          set -eux
          ${CONDA_RUN} python -mpip install awscli==1.32.18

          mkdir -p artifacts
          pushd artifacts
          ${CONDA_RUN} aws s3 sync "${ARTIFACTS_S3_DIR}" .
          popd

          ls -lah artifacts

      - name: Extract the benchmark results JSON
        shell: bash
        run: |
          set -eux

          mkdir -p benchmark-results

          for ARTIFACTS_BY_JOB in artifacts/*.json; do
            [ -f "${ARTIFACTS_BY_JOB}" ] || break
            echo "${ARTIFACTS_BY_JOB}"
            ${CONDA_RUN} python .github/scripts/extract_benchmark_results.py \
              --artifacts "${ARTIFACTS_BY_JOB}" \
              --output-dir benchmark-results \
              --repo ${{ github.repository }} \
              --head-branch ${{ github.head_ref || github.ref_name }} \
              --workflow-name "${{ github.workflow }}" \
              --workflow-run-id ${{ github.run_id }} \
              --workflow-run-attempt ${{ github.run_attempt }}
          done

          for SCHEMA in v2 v3; do
            for BENCHMARK_RESULTS in benchmark-results/"${SCHEMA}"/*.json; do
              cat "${BENCHMARK_RESULTS}"
              echo
            done
          done

      # TODO (huydhn): Remove v2 schema once the benchmark dashboard finishes the migration
      - name: Upload the benchmark results (v2)
        uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
        with:
          benchmark-results-dir: benchmark-results/v2
          dry-run: false
          schema-version: v2

      - name: Upload the benchmark results (v3)
        uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
        with:
          benchmark-results-dir: benchmark-results/v3
          dry-run: false
          schema-version: v3
          github-token: ${{ secrets.GITHUB_TOKEN }}