Skip to content

Commit

Permalink
build rocm wheels
Browse files Browse the repository at this point in the history
  • Loading branch information
tenpercent committed Aug 16, 2024
1 parent 367274c commit 82a7b20
Show file tree
Hide file tree
Showing 4 changed files with 145 additions and 16 deletions.
77 changes: 77 additions & 0 deletions .github/workflows/rocm_wheels.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
name: rocm-wheels

on:
push:
branches:
- develop
pull_request:
paths:
- "packaging/compute_wheel_version.sh"
- ".github/workflows/wheel*"
- ".github/actions/setup-windows-runner/action.yml"
- "setup.py"
- "requirements*.txt"
workflow_dispatch:
inputs:
logLevel:
description: 'Log level'
required: false
default: 'warning'

jobs:
target_determinator:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- id: set-matrix
shell: python
run: |
import os
import json
environ = os.environ
PY_VERSIONS = ['3.11']
include = []
for os in ['ubuntu-alola']:
for python in PY_VERSIONS:
for torch_version in ['2.4.0']:
for toolkit_type, toolkit_short_versions in {'rocm': ["6.1"]}.items():
for toolkit_short_version in toolkit_short_versions:
include.append(dict(
os=os,
python=python,
torch_version=torch_version,
toolkit_type=toolkit_type,
toolkit_short_version=toolkit_short_version,
))
print(include[-1])
matrix = {'include': include}
print(json.dumps(matrix))
with open(environ["GITHUB_OUTPUT"], "a") as fd:
fd.write("matrix="+json.dumps(matrix))
build:
needs: target_determinator
strategy:
fail-fast: false
matrix: ${{ fromJson(needs.target_determinator.outputs.matrix) }}

uses: ./.github/workflows/wheels_build.yml
if: github.repository == 'rocm/xformers' || github.event_name == 'pull_request'
with:
os: ${{ matrix.os }}
python: ${{ matrix.python }}
torch_version: ${{ matrix.torch_version }}
toolkit_type: ${{ matrix.toolkit_type }}
toolkit_short_version: ${{ matrix.toolkit_short_version }}

clean:
runs-on: self-hosted
if: ${{ always() }}
needs: [build]
steps:
- name: Remove dangling Docker images
run: |
docker images -q -f dangling=true | xargs --no-run-if-empty docker rmi
32 changes: 28 additions & 4 deletions .github/workflows/wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,16 @@ jobs:
include = []
for os in ['8-core-ubuntu', 'windows-8-core']:
for python in PY_VERSIONS:
for torch_version in ['2.3.0']:
for cuda_short_version in ["118", "121"]:
for torch_version in ['2.4.0']:
for toolkit_type, toolkit_short_version in {'cuda': ["118", "121"], 'rocm': ["6.0", "6.1"]}.items():
if os == 'windows-8-core' and toolkit_type == 'rocm':
continue
include.append(dict(
os=os,
python=python,
torch_version=torch_version,
cuda_short_version=cuda_short_version,
toolkit_type=toolkit_type,
toolkit_short_version=toolkit_short_version,
))
print(include[-1])
matrix = {'include': include}
Expand All @@ -60,7 +63,8 @@ jobs:
os: ${{ matrix.os }}
python: ${{ matrix.python }}
torch_version: ${{ matrix.torch_version }}
cuda_short_version: ${{ matrix.cuda_short_version }}
toolkit_type: ${{ matrix.toolkit_type }}
toolkit_short_version: ${{ matrix.toolkit_short_version }}

upload_pip:
needs: build
Expand Down Expand Up @@ -92,3 +96,23 @@ jobs:
filter: "*torch2.3.0+cu121*"
execute: ${{ github.repository == 'facebookresearch/xformers' && github.ref_type == 'tag' }}

upload_pt_rocm6_0:
needs: build
uses: ./.github/workflows/wheels_upload_s3.yml
with:
aws_role: "arn:aws:iam::749337293305:role/pytorch_bot_uploader_role"
s3_path: s3://pytorch/whl/rocm6.0/
aws_s3_cp_extra_args: --acl public-read
filter: "*torch2.4.0+rocm6.0*"
execute: ${{ github.repository == 'facebookresearch/xformers' && github.ref_type == 'tag' }}

upload_pt_rocm6_1:
needs: build
uses: ./.github/workflows/wheels_upload_s3.yml
with:
aws_role: "arn:aws:iam::749337293305:role/pytorch_bot_uploader_role"
s3_path: s3://pytorch/whl/rocm6.1/
aws_s3_cp_extra_args: --acl public-read
filter: "*torch2.4.0+rocm6.1*"
execute: ${{ github.repository == 'facebookresearch/xformers' && github.ref_type == 'tag' }}

44 changes: 35 additions & 9 deletions .github/workflows/wheels_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,11 @@ on:
required: true
type: string
description: "Example: 1.13.1"
cuda_short_version:
toolkit_type:
required: true
type: string
description: "Example: cuda for cuda, rocm for rocm"
toolkit_short_version:
required: true
type: string
description: "Example: 117 for 11.7"
Expand All @@ -26,16 +30,20 @@ on:

env:
# you need at least cuda 5.0 for some of the stuff compiled here.
TORCH_CUDA_ARCH_LIST: "5.0+PTX 6.0 6.1 7.0 7.5 8.0+PTX"
TORCH_CUDA_ARCH_LIST: ${{ contains(inputs.toolkit_type, 'cuda') && join('6.0+PTX 7.0 7.5 8.0+PTX', fromJSON(inputs.toolkit_short_version) >= 118 && ' 9.0a' || '') || '' }}
HIP_ARCHITECTURES: ${{ contains(inputs.toolkit_type, 'rocm') && 'gfx90a gfx942' || '' }}
MAX_JOBS: 4
DISTUTILS_USE_SDK: 1 # otherwise distutils will complain on windows about multiple versions of msvc
XFORMERS_BUILD_TYPE: "Release"
TWINE_USERNAME: __token__
XFORMERS_PACKAGE_FROM: "wheel-${{ github.ref_name }}"
# https://github.blog/changelog/2024-03-07-github-actions-all-actions-will-run-on-node20-instead-of-node16-by-default/
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: "true"
PYTORCH_INDEX_URL: "https://download.pytorch.org/whl/${{ contains(inputs.toolkit_type, 'cuda') && 'cu' || 'rocm' }}${{ inputs.toolkit_short_version }}"

jobs:
build:
name: ${{ contains(inputs.os, 'ubuntu') && 'ubuntu' || 'win' }}-py${{ inputs.python }}-pt${{ inputs.torch_version }}+cu${{ inputs.cuda_short_version }}
name: ${{ contains(inputs.os, 'ubuntu') && 'ubuntu' || 'win' }}-py${{ inputs.python }}-pt${{ inputs.torch_version }}+${{ contains(inputs.toolkit_type, 'cuda') && 'cu' || 'rocm' }}${{ inputs.toolkit_short_version }}
runs-on: ${{ inputs.os }}
env:
# alias for the current python version
Expand All @@ -54,14 +62,17 @@ jobs:
import os
import sys
print(sys.version)
cushort = "${{ inputs.cuda_short_version }}"
cushort = "${{ inputs.toolkit_short_version }}"
TORCH_CUDA_DEFAULT = "121" # pytorch 2.1.0
# https://github.com/Jimver/cuda-toolkit/blob/master/src/links/linux-links.ts
full_version, install_script = {
"121": ("12.1.0", "https://developer.download.nvidia.com/compute/cuda/12.1.0/local_installers/cuda_12.1.0_530.30.02_linux.run"),
"118": ("11.8.0", "https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run"),
"117": ("11.7.1", "https://developer.download.nvidia.com/compute/cuda/11.7.1/local_installers/cuda_11.7.1_515.65.01_linux.run"),
"116": ("11.6.2", "https://developer.download.nvidia.com/compute/cuda/11.6.2/local_installers/cuda_11.6.2_510.47.03_linux.run"),
"6.0": ("6.0.3", "https://repo.radeon.com/amdgpu-install/6.0.3/rhel/7/amdgpu-install-6.0.60003-1.el7.noarch.rpm"),
"6.1": ("6.1.2", "https://repo.radeon.com/amdgpu-install/6.1.2/el/7/amdgpu-install-6.1.60102-1.el7.noarch.rpm"),
}[cushort]
with open(os.environ['GITHUB_OUTPUT'], "r+") as fp:
fp.write("CUDA_VERSION=" + full_version + "\n")
Expand All @@ -70,7 +81,7 @@ jobs:
fp.write("TORCH_ORG_S3_PATH=s3://pytorch/whl\n")
fp.write("PUBLISH_PYPI=1\n")
else:
fp.write("CUDA_VERSION_SUFFIX=+cu" + cushort + "\n")
fp.write("CUDA_VERSION_SUFFIX=+" + ("cu" if "cuda" == "${{ inputs.toolkit_type }}" else "rocm") + cushort + "\n")
fp.write("TORCH_ORG_S3_PATH=s3://pytorch/whl/" + cushort + "\n")
fp.write("PUBLISH_PYPI=0\n")
fp.write("CUDA_INSTALL_SCRIPT=" + install_script + "\n")
Expand All @@ -80,6 +91,7 @@ jobs:

- name: Add H100 if nvcc 11.08+
shell: python
if: contains(inputs.toolkit_type, 'cuda')
run: |
import os
import sys
Expand Down Expand Up @@ -140,17 +152,31 @@ jobs:
cuda: ${{ steps.cuda_info.outputs.CUDA_VERSION }}
python: ${{ inputs.python }}

- name: Install dependencies
run: $PY -m pip install wheel setuptools twine -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu${{ inputs.cuda_short_version }}

- if: runner.os == 'Linux'
name: (Linux) list installed packages
run: |
yum list installed
- if: runner.os == 'Linux' && contains(inputs.toolkit_type, 'cuda')
name: (Linux) install cuda
run: >
yum install wget git prename -y &&
wget -q "${{ steps.cuda_info.outputs.CUDA_INSTALL_SCRIPT }}" -O cuda.run &&
sh ./cuda.run --silent --toolkit &&
rm ./cuda.run
- if: runner.os == 'Linux' && contains(inputs.toolkit_type, 'rocm')
name: (Linux) install rocm
run: |
yum install -y libzstd
yum install -y ${{ steps.cuda_info.outputs.CUDA_INSTALL_SCRIPT }}
amdgpu-install -y --usecase=rocm --no-dkms
echo "ROCM_HOME=/opt/rocm" >> ${GITHUB_ENV}
echo "PATH=$PATH:$ROCM_HOME/bin" >> ${GITHUB_ENV}
- name: Install dependencies
run: $PY -m pip install wheel setuptools twine -r requirements.txt --extra-index-url $PYTORCH_INDEX_URL

- name: Build wheel
run: |
$PY setup.py bdist_wheel -d dist/ -k $PLAT_ARG
Expand All @@ -160,6 +186,6 @@ jobs:
- run: du -h dist/*
- uses: actions/upload-artifact@v3
with:
name: ${{ inputs.os }}-py${{ inputs.python }}-torch${{ inputs.torch_version }}+cu${{ inputs.cuda_short_version }}_${{ inputs.artifact_tag }}
name: ${{ inputs.os }}-py${{ inputs.python }}-torch${{ inputs.torch_version }}+${{ contains(inputs.toolkit_type, 'cuda') && 'cu' || 'rocm' }}${{ inputs.toolkit_short_version }}_${{ inputs.artifact_tag }}
path: dist/*.whl
# Note: it might be helpful to have additional steps that test if the built wheels actually work
8 changes: 5 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import torch
from torch.utils.cpp_extension import (
CUDA_HOME,
ROCM_HOME,
BuildExtension,
CppExtension,
CUDAExtension,
Expand Down Expand Up @@ -411,8 +412,7 @@ def get_extensions():
source_hip = list(set(source_hip) - set(source_hip_maxk_256))

rename_cpp_cu(source_hip)
rocm_home = os.getenv("ROCM_PATH")
hip_version = get_hip_version(rocm_home)
hip_version = get_hip_version(ROCM_HOME)

source_hip_cu = []
for ff in source_hip:
Expand All @@ -438,12 +438,14 @@ def get_extensions():
if use_rtn_bf16_convert == "1":
cc_flag += ["-DCK_TILE_FLOAT_TO_BFLOAT16_DEFAULT=0"]

arch_list = os.getenv("HIP_ARCHITECTURES", "native").split()

extra_compile_args = {
"cxx": ["-O3", "-std=c++17"] + generator_flag,
"nvcc": [
"-O3",
"-std=c++17",
f"--offload-arch={os.getenv('HIP_ARCHITECTURES', 'native')}",
*[f"--offload-arch={arch}" for arch in arch_list],
"-U__CUDA_NO_HALF_OPERATORS__",
"-U__CUDA_NO_HALF_CONVERSIONS__",
"-DCK_TILE_FMHA_FWD_FAST_EXP2=1",
Expand Down

0 comments on commit 82a7b20

Please sign in to comment.