-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add GPU-enabled CI * Update file reference * Remove other CI * Bump runner version * Try allocating 10 GB * Debug * Debug * Sync CUDA environment with DGL environment * Debug Torch/CUDA interaction * Try adding `pytorch-gpu` * Debug * tmp add print and trial test * check dgl * add torchdata package * add other torch packages required * Try bumping to newer DGL channel targeting PyTorch 2.1 * Add back `pytorch-gpu`? * Revert "tmp add print and trial test" This reverts commit 6c5f42c. * Revert more temporary changes, fix coverage * Syntax * Debug * Debug * Fix --------- Co-authored-by: Lily Wang <[email protected]>
- Loading branch information
1 parent
2bc95a0
commit fbcf4f2
Showing
5 changed files
with
111 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
name: GPU-enabled CI | ||
on: | ||
workflow_dispatch: | ||
pull_request: | ||
branches: | ||
- "main" | ||
|
||
defaults: | ||
run: | ||
shell: bash -l {0} | ||
|
||
jobs: | ||
start-aws-runner: | ||
runs-on: ubuntu-latest | ||
permissions: | ||
id-token: write | ||
contents: read | ||
outputs: | ||
mapping: ${{ steps.aws-start.outputs.mapping }} | ||
steps: | ||
- name: Configure AWS credentials | ||
uses: aws-actions/configure-aws-credentials@v4 | ||
with: | ||
role-to-assume: arn:aws:iam::649715411074:role/gh-actions-runner-role | ||
aws-region: us-east-1 | ||
- name: Create cloud runner | ||
id: aws-start | ||
uses: omsf-eco-infra/[email protected] | ||
with: | ||
provider: "aws" | ||
action: "start" | ||
aws_image_id: ami-0d5079d9be06933e5 | ||
aws_instance_type: g4dn.xlarge | ||
# IAM default might be 5 GB? | ||
aws_root_device_size: 125 | ||
aws_region_name: us-east-1 | ||
aws_home_dir: /home/ubuntu | ||
env: | ||
GH_PAT: ${{ secrets.GH_PAT }} | ||
self-hosted-test: | ||
runs-on: self-hosted | ||
needs: | ||
- start-aws-runner | ||
steps: | ||
- uses: actions/checkout@v4 | ||
with: | ||
fetch-depth: 0 | ||
|
||
- name: Print disk usage | ||
run: "df -h" | ||
|
||
- name: Print Docker details | ||
run: "docker version || true" | ||
|
||
- name: Setup Conda Environment | ||
uses: mamba-org/setup-micromamba@v2 | ||
with: | ||
environment-file: devtools/conda-envs/test_cuda_env.yaml | ||
|
||
- name: Install Package | ||
run: python -m pip install . --no-deps | ||
|
||
- name: Double-check local installation | ||
run: python -c "from openff.nagl import __version__; print(__version__)" | ||
|
||
- name: Check that PyTorch can see CUDA | ||
run: python -c "import torch; assert torch.cuda.is_available()" | ||
|
||
- name: Check we can see DGL | ||
run: python -c "import dgl; print(dgl.__version__)" | ||
|
||
- name: Run tests | ||
run: python -m pytest -n 4 -v --cov=openff/nagl --cov-config=pyproject.toml --cov-append --cov-report=xml --color=yes openff/nagl/ | ||
|
||
stop-aws-runner: | ||
runs-on: ubuntu-latest | ||
permissions: | ||
id-token: write | ||
contents: read | ||
needs: | ||
- start-aws-runner | ||
- self-hosted-test | ||
if: ${{ always() }} | ||
steps: | ||
- name: Configure AWS credentials | ||
uses: aws-actions/configure-aws-credentials@v4 | ||
with: | ||
role-to-assume: arn:aws:iam::649715411074:role/gh-actions-runner-role | ||
aws-region: us-east-1 | ||
- name: Stop instances | ||
uses: omsf-eco-infra/[email protected] | ||
with: | ||
provider: "aws" | ||
action: "stop" | ||
instance_mapping: ${{ needs.start-aws-runner.outputs.mapping }} | ||
aws_region_name: us-east-1 | ||
env: | ||
GH_PAT: ${{ secrets.GH_PAT }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters