Port infra from tpu-pytorch
to tpu-pytorch-releases
and multi TPU support
#10763
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Build and test | |
on: | |
pull_request: | |
branches: | |
- master | |
- r[0-9]+.[0-9]+ | |
paths-ignore: | |
- 'experimental/**' | |
push: | |
branches: | |
- master | |
- r[0-9]+.[0-9]+ | |
paths-ignore: | |
- 'experimental/**' | |
workflow_dispatch: | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} | |
cancel-in-progress: true | |
jobs: | |
get-torch-commit: | |
runs-on: ubuntu-latest | |
outputs: | |
torch_commit: ${{ steps.commit.outputs.torch_commit }} | |
steps: | |
- id: commit | |
name: Get latest torch commit | |
run: | | |
echo "torch_commit=$(git ls-remote https://github.com/pytorch/pytorch.git HEAD | awk '{print $1}')" >> "$GITHUB_OUTPUT" | |
build-torch-xla: | |
name: "Build PyTorch/XLA" | |
uses: ./.github/workflows/_build_torch_xla.yml | |
needs: get-torch-commit | |
with: | |
dev-image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/development:3.10_tpuvm | |
torch-commit: ${{needs.get-torch-commit.outputs.torch_commit}} | |
secrets: | |
gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }} | |
# Disable due to https://github.com/pytorch/xla/issues/8199 | |
# build-torch-with-cuda: | |
# name: "Build PyTorch with CUDA" | |
# uses: ./.github/workflows/_build_torch_with_cuda.yml | |
# needs: get-torch-commit | |
# with: | |
# # note that to build a torch wheel with CUDA enabled, we do not need a GPU runner. | |
# dev-image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/development:3.10_cuda_12.1 | |
# torch-commit: ${{needs.get-torch-commit.outputs.torch_commit}} | |
# runner: linux.24xlarge | |
# Disable due to https://github.com/pytorch/xla/issues/8199 | |
# build-cuda-plugin: | |
# name: "Build XLA CUDA plugin" | |
# uses: ./.github/workflows/_build_plugin.yml | |
# with: | |
# dev-image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/development:3.10_cuda_12.1 | |
# secrets: | |
# gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }} | |
test-python-cpu: | |
name: "CPU tests" | |
uses: ./.github/workflows/_test.yml | |
needs: [build-torch-xla, get-torch-commit] | |
with: | |
dev-image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/development:3.10_tpuvm | |
timeout-minutes: 120 | |
collect-coverage: false | |
torch-commit: ${{needs.get-torch-commit.outputs.torch_commit}} | |
secrets: | |
gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }} | |
# Disable due to https://github.com/pytorch/xla/issues/8199 | |
# test-cuda: | |
# name: "GPU tests" | |
# uses: ./.github/workflows/_test.yml | |
# needs: [build-torch-xla, build-cuda-plugin, get-torch-commit] | |
# with: | |
# dev-image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/development:3.10_cuda_12.1 | |
# runner: linux.8xlarge.nvidia.gpu | |
# timeout-minutes: 300 | |
# collect-coverage: false | |
# install-cuda-plugin: true | |
# torch-commit: ${{needs.get-torch-commit.outputs.torch_commit}} | |
# secrets: | |
# gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }} | |
# Disable due to https://github.com/pytorch/xla/issues/8199 | |
# test-cuda-with-pytorch-cuda-enabled: | |
# name: "GPU tests requiring torch CUDA" | |
# uses: ./.github/workflows/_test_requiring_torch_cuda.yml | |
# needs: [build-torch-with-cuda, build-torch-xla, build-cuda-plugin, get-torch-commit] | |
# with: | |
# dev-image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/development:3.10_cuda_12.1 | |
# runner: linux.8xlarge.nvidia.gpu | |
# timeout-minutes: 300 | |
# collect-coverage: false | |
# torch-commit: ${{needs.get-torch-commit.outputs.torch_commit}} | |
test-tpu: | |
name: "TPU tests" | |
uses: ./.github/workflows/_tpu_ci.yml | |
needs: build-torch-xla | |
if: github.event_name == 'push' || github.event_name == 'pull_request' | |
strategy: | |
matrix: | |
tpu-version: ['v4', 'v5p', 'v6e'] | |
fail-fast: false # Continue running other TPU versions if one fails | |
with: | |
tpu-version: ${{ matrix.tpu-version }} | |
runner-label: ${{ format('tpu-{0}', matrix.tpu-version) }} | |
push-docs: | |
name: "Build docs" | |
uses: ./.github/workflows/_docs.yml | |
needs: build-torch-xla | |
with: | |
dev-image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/development:3.10_tpuvm | |
secrets: | |
torchxla-bot-token: ${{ secrets.TORCH_XLA_BOT_TOKEN }} |