Skip to content

Commit

Permalink
Decouple from shared-workflows (#2)
Browse files Browse the repository at this point in the history
This PR makes the GHA workflows here independent of those in
[rapidsai/shared-workflows](https://github.com/rapidsai/shared-workflows/).
It extracts the necessary logic into self-contained workflows so that we
are not tied to the RAPIDS branching/versioning strategy. In addition,
it splits the core logic into a shared workflow that can be used for
both building and publishing, which will enable downstream work to
actually start publishing packages.
  • Loading branch information
vyasr authored May 1, 2024
1 parent 549e1f9 commit d156258
Show file tree
Hide file tree
Showing 4 changed files with 167 additions and 23 deletions.
161 changes: 161 additions & 0 deletions .github/workflows/build_and_test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
name: build_and_test

on:
workflow_call:
inputs:
build_type:
required: true
type: string

permissions:
actions: read
checks: none
contents: read
deployments: none
discussions: none
id-token: write
issues: none
packages: read
pages: none
pull-requests: read
repository-projects: none
security-events: none
statuses: none

jobs:
compute-matrices:
runs-on: ubuntu-latest
outputs:
BUILD_MATRIX: ${{ steps.compute-matrix.outputs.BUILD_MATRIX }}
TEST_MATRIX: ${{ steps.compute-matrix.outputs.TEST_MATRIX }}
steps:
- name: Compute Build Matrix
id: compute-matrix
run: |
set -eo pipefail
# please keep the matrices sorted in ascending order by the following:
#
# [ARCH, PY_VER, CUDA_VER, LINUX_VER, GPU, DRIVER]
#
export BUILD_MATRIX="
# amd64
- { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '11.8.0', LINUX_VER: 'rockylinux8' }
- { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '12.2.2', LINUX_VER: 'rockylinux8' }
# arm64
- { ARCH: 'arm64', PY_VER: '3.11', CUDA_VER: '11.8.0', LINUX_VER: 'rockylinux8' }
- { ARCH: 'arm64', PY_VER: '3.11', CUDA_VER: '12.2.2', LINUX_VER: 'rockylinux8' }
"
BUILD_MATRIX="$(
yq -n -o json 'env(BUILD_MATRIX)' | \
jq -c '{include: .}'
)"
echo "BUILD_MATRIX=${BUILD_MATRIX}" | tee --append "${GITHUB_OUTPUT}"
export TEST_MATRIX="
# amd64
- { ARCH: 'arm64', PY_VER: '3.11', CUDA_VER: '11.8.0', LINUX_VER: 'ubuntu20.04', gpu: 'a100', driver: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.11', CUDA_VER: '12.0.1', LINUX_VER: 'ubuntu22.04', gpu: 'a100', driver: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.11', CUDA_VER: '12.2.2', LINUX_VER: 'ubuntu20.04', gpu: 'a100', driver: 'latest' }
# arm64
- { ARCH: 'arm64', PY_VER: '3.11', CUDA_VER: '11.8.0', LINUX_VER: 'ubuntu22.04', gpu: 'a100', driver: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.11', CUDA_VER: '12.0.1', LINUX_VER: 'ubuntu20.04', gpu: 'a100', driver: 'latest' }
- { ARCH: 'arm64', PY_VER: '3.11', CUDA_VER: '12.2.2', LINUX_VER: 'ubuntu22.04', gpu: 'a100', driver: 'latest' }
"
TEST_MATRIX="$(
yq -n -o json 'env(TEST_MATRIX)' | \
jq -c '{include: .}'
)"
echo "TEST_MATRIX=${TEST_MATRIX}" | tee --append "${GITHUB_OUTPUT}"
build:
name: build-${{ matrix.CUDA_VER }}, ${{ matrix.ARCH }}, ${{ matrix.LINUX_VER }}
needs: compute-matrices
strategy:
matrix: ${{ fromJSON(needs.compute-matrices.outputs.BUILD_MATRIX) }}
runs-on: "linux-${{ matrix.ARCH }}-cpu16"
container:
image: "rapidsai/ci-wheel:cuda${{ matrix.CUDA_VER }}-${{ matrix.LINUX_VER }}-py${{ matrix.PY_VER }}"
env:
RAPIDS_BUILD_TYPE: ${{ inputs.build_type }}
steps:
- uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{ vars.AWS_ROLE_ARN }}
aws-region: ${{ vars.AWS_REGION }}
role-duration-seconds: 900
- name: checkout code repo
uses: actions/checkout@v4
with:
persist-credentials: false
- name: Standardize repository information
uses: rapidsai/shared-actions/rapids-github-info@main
- name: Build and repair the wheel
run: ci/build_wheel.sh
env:
GH_TOKEN: ${{ github.token }}
RAPIDS_BUILD_TYPE: ${{ inputs.build_type }}
# Use a shell that loads the rc file so that we get the compiler settings
shell: bash -leo pipefail {0}
test:
name: test-${{ matrix.CUDA_VER }}, ${{ matrix.ARCH }}, ${{ matrix.LINUX_VER }}, ${{ matrix.gpu }}
needs: [compute-matrices, build]
strategy:
matrix: ${{ fromJSON(needs.compute-matrices.outputs.TEST_MATRIX) }}
runs-on: "linux-${{ matrix.ARCH }}-gpu-${{ matrix.gpu }}-${{ matrix.driver }}-1"
container:
image: "rapidsai/citestwheel:cuda${{ matrix.CUDA_VER }}-${{ matrix.LINUX_VER }}-py${{ matrix.PY_VER }}"
env:
NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} # GPU jobs must set this container env variable
RAPIDS_BUILD_TYPE: ${{ inputs.build_type }}
steps:
- uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{ vars.AWS_ROLE_ARN }}
aws-region: ${{ vars.AWS_REGION }}
role-duration-seconds: 900
- name: Run nvidia-smi to make sure GPU is working
run: nvidia-smi
- name: checkout code repo
uses: actions/checkout@v4
with:
persist-credentials: false
- name: Standardize repository information
uses: rapidsai/shared-actions/rapids-github-info@main
- name: Run tests
run: ci/test_wheel.sh
env:
GH_TOKEN: ${{ github.token }}
RAPIDS_BUILD_TYPE: ${{ inputs.build_type }}
publish:
if: ${{ inputs.build_type == 'branch' }}
needs: test
runs-on: linux-amd64-cpu4
container:
image: "rapidsai/ci-wheel:latest"
env:
RAPIDS_BUILD_TYPE: ${{ inputs.build_type }}
steps:
- uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{ vars.AWS_ROLE_ARN }}
aws-region: ${{ vars.AWS_REGION }}
role-duration-seconds: 900
- name: checkout code repo
uses: actions/checkout@v4
with:
persist-credentials: false
- name: Get current date
id: date
run: |
echo "CURRENT_DATE=$(date --rfc-3339=date)" >> ${GITHUB_ENV}
- name: Standardize repository information
uses: rapidsai/shared-actions/rapids-github-info@main
- name: Download wheels from downloads.rapids.ai and publish to anaconda repository
env:
RAPIDS_CONDA_TOKEN: ${{ secrets.CONDA_RAPIDSAI_WHEELS_NIGHTLY_TOKEN }}
run: |
rapids-wheels-anaconda ucx cpp
23 changes: 3 additions & 20 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,10 @@ concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }}
cancel-in-progress: true

# TODO: I would love to not need RAPIDS shared workflows for these builds, but
# for getting things stood up quickly that's the fastest route.

jobs:
pr-builder:
needs:
- wheel-build-ucx
- wheel-tests-ucx
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
wheel-build-ucx:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber)))
build_type: pull-request
script: ci/build_wheel.sh
wheel-tests-ucx:
needs: wheel-build-ucx
build_and_test:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.06
uses: ./.github/workflows/build_and_test.yaml
with:
matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber)))
build_type: pull-request
script: ci/test_wheel.sh
2 changes: 1 addition & 1 deletion ci/build_wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ sed -i -E "s/^name = \"${package_name}(.*)?\"$/name = \"${package_name}${PACKAGE
python -m pip wheel "${package_dir}"/ -w "${package_dir}"/dist -vvv --no-deps --disable-pip-version-check

python -m auditwheel repair -w ${package_dir}/final_dist --exclude "libcuda.so.1" --exclude "libnvidia-ml.so.1" --exclude "libucm.so.0" --exclude "libuct.so.0" --exclude "libucs.so.0" --exclude "libucp.so.0" ${package_dir}/dist/*
RAPIDS_PY_WHEEL_NAME="${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp ${package_dir}/final_dist
RAPIDS_PY_WHEEL_NAME="ucx_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp ${package_dir}/final_dist
4 changes: 2 additions & 2 deletions ci/test_wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@ package_name="libucx"

WHEELHOUSE="${PWD}/dist/"
RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp "${WHEELHOUSE}"
RAPIDS_PY_WHEEL_NAME="ucx_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp "${WHEELHOUSE}"
python -m pip install "${package_name}-${RAPIDS_PY_CUDA_SUFFIX}" --find-links "${WHEELHOUSE}"
python -c "import libucx; libucx.load_library()"
python -c "import libucx; libucx.load_library(); print('Loaded libucx libraries successfully!')"

0 comments on commit d156258

Please sign in to comment.