Skip to content

Commit

Permalink
update release testing github action
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexandra Belousov authored and Alexandra Belousov committed Dec 11, 2024
1 parent a5c7569 commit 4382d35
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 56 deletions.
73 changes: 28 additions & 45 deletions .github/workflows/nightly_release_testing.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,6 @@ jobs:
run: pytest --level release tests -k "not cluster" --detached
timeout-minutes: 60

- name: Teardown all clusters
if: always()
run: |
sky status
sky down --all -y
sky status
cluster-tests:
runs-on: ubuntu-latest
permissions:
Expand Down Expand Up @@ -74,16 +67,9 @@ jobs:
KITCHEN_TESTER_USERNAME: ${{ secrets.KITCHEN_TESTER_USERNAME }}
ORG_MEMBER_TOKEN: ${{ secrets.ORG_MEMBER_PROD_TOKEN }}
ORG_MEMBER_USERNAME: ${{ secrets.ORG_MEMBER_USERNAME }}
run: pytest --level release tests -k "clustertest and not ondemand" --detached
run: pytest --level release tests -k "cluster and not ondemand and not TestMultiNodeCluster" --detached
timeout-minutes: 60

- name: Teardown all cluster-tests clusters
if: always()
run: |
sky status
sky down --all -y
sky status
ondemand-aws-tests:
runs-on: ubuntu-latest
permissions:
Expand Down Expand Up @@ -119,13 +105,6 @@ jobs:
run: pytest --level release tests -k "ondemand_aws_https_cluster_with_auth" --detached
timeout-minutes: 60

- name: Teardown all ondemand-aws-tests clusters
if: always()
run: |
sky status
sky down --all -y
sky status
ondemand-aws-multinode-tests:
runs-on: ubuntu-latest
permissions:
Expand Down Expand Up @@ -158,14 +137,6 @@ jobs:
run: pytest --level release tests -k "TestMultiNodeCluster" --detached
timeout-minutes: 60

- name: Teardown all ondemand-aws-multinode clusters
if: always()
run: |
sky status
sky down --all -y
sky status
ondemand-gcp-tests:
runs-on: ubuntu-latest
permissions:
Expand Down Expand Up @@ -198,13 +169,6 @@ jobs:
run: pytest --level release tests -k "ondemand_gcp_cluster" --detached
timeout-minutes: 60

- name: Teardown all ondemand-gcp-tests clusters
if: always()
run: |
sky status
sky down --all -y
sky status
kubernetes-tests:
runs-on: ubuntu-latest
permissions:
Expand Down Expand Up @@ -237,22 +201,41 @@ jobs:
run: pytest --level release tests -k "ondemand_k8s_cluster" --detached
timeout-minutes: 60

- name: Teardown all kubernetes-tests clusters
if: always()
run: |
sky status
sky down --all -y
sky status
check-cluster-status:
teardown-clusters:
if: always()
runs-on: ubuntu-latest
needs:
- not-cluster-tests
- cluster-tests
- ondemand-aws-tests
- ondemand-gcp-tests
- kubernetes-tests
- ondemand-aws-multinode-tests
steps:
- name: Check out repository code
uses: actions/checkout@v3

- name: Setup Release Testing
uses: ./.github/workflows/setup_release_testing
with:
KUBECONFIG: ${{ secrets.KUBECONFIG }}
AWS_OSS_ROLE_ARN: ${{ secrets.AWS_OSS_ROLE_ARN }}
DEV_AWS_ACCESS_KEY: ${{ secrets.DEV_AWS_ACCESS_KEY }}
DEV_AWS_SECRET_KEY: ${{ secrets.DEV_AWS_SECRET_KEY }}
GCP_SERVICE_ACCOUNT_KEY: ${{ secrets.GCP_SERVICE_ACCOUNT_KEY }}
GCP_PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }}
DEN_TESTER_TOKEN: ${{ secrets.DEN_TESTER_PROD_TOKEN }}
DEN_TESTER_USERNAME: ${{ secrets.DEN_TESTER_USERNAME }}
API_SERVER_URL: ${{ env.API_SERVER_URL }}
EKS_ARN: ${{ secrets.EKS_ARN }}

- name: Teardown clusters
run: runhouse cluster down -a -y

check-cluster-status:
if: always()
needs:
- teardown-clusters
runs-on: ubuntu-latest
permissions:
id-token: write
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/setup_release_testing/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ runs:

- name: Install python packages & dependencies
run: |
pip install runhouse[aws,gcp,kubernetes]
pip install git+https://github.com/run-house/runhouse.git@temp-launcher-testing#egg=runhouse[aws,gcp,kubernetes]
pip install -r tests/requirements.txt
shell: bash

Expand Down
16 changes: 8 additions & 8 deletions tests/fixtures/on_demand_cluster_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,12 @@ def ondemand_aws_docker_cluster(request):


@pytest.fixture(scope="session")
def den_launched_ondemand_aws_docker_cluster(request):
def den_launched_ondemand_aws_docker_cluster(request, test_rns_folder):
"""
Note: Also used to test docker and default env with alternate Ray version.
"""
args = {
"name": "aws-cpu-den",
"name": f"{test_rns_folder}-aws-cpu-den",
"instance_type": "CPU:2+",
"provider": "aws",
"image_id": "docker:rayproject/ray:latest-py311-cpu",
Expand Down Expand Up @@ -167,14 +167,14 @@ def ondemand_k8s_cluster(request):


@pytest.fixture(scope="session")
def den_launched_ondemand_aws_k8s_cluster(request):
def den_launched_ondemand_aws_k8s_cluster(request, test_rns_folder):
kube_config_path = Path.home() / ".kube" / "config"

if not kube_config_path.exists():
pytest.skip("no kubeconfig found")

args = {
"name": "k8s-cpu-den",
"name": f"{test_rns_folder}-k8s-cpu-den",
"provider": "kubernetes",
"instance_type": "CPU:1",
"launcher": LauncherType.DEN,
Expand All @@ -185,14 +185,14 @@ def den_launched_ondemand_aws_k8s_cluster(request):


@pytest.fixture(scope="session")
def den_launched_ondemand_gcp_k8s_cluster(request):
def den_launched_ondemand_gcp_k8s_cluster(request, test_rns_folder):
kube_config_path = Path.home() / ".kube" / "config"

if not kube_config_path.exists():
pytest.skip("no kubeconfig found")

args = {
"name": "k8s-cpu-den",
"name": f"{test_rns_folder}-k8s-cpu-den",
"provider": "kubernetes",
"instance_type": "CPU:1",
"launcher": LauncherType.DEN,
Expand Down Expand Up @@ -233,9 +233,9 @@ def v100_gpu_cluster(request):


@pytest.fixture(scope="session")
def den_launcher_v100_gpu_cluster(request):
def den_launcher_v100_gpu_cluster(request, test_rns_folder):
args = {
"name": "rh-v100-den",
"name": f"{test_rns_folder}-rh-v100-den",
"instance_type": "V100:1",
"provider": "aws",
"launcher": LauncherType.DEN,
Expand Down
6 changes: 4 additions & 2 deletions tests/fixtures/static_cluster_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from runhouse.resources.hardware.utils import LauncherType

from tests.conftest import init_args
from tests.fixtures.resource_fixtures import create_folder_path
from tests.utils import test_env


Expand All @@ -21,8 +22,9 @@ def setup_static_cluster(
):
instance_type = "CPU:4" if compute_type == computeType.cpu else "g5.xlarge"
launcher = launcher if launcher else LauncherType.LOCAL
cluster_name = f"{create_folder_path()}-{launcher}-aws-{compute_type}-password"
cluster = rh.cluster(
f"{launcher}-aws-{compute_type}-password",
name=cluster_name,
instance_type=instance_type,
provider="aws",
launcher=launcher,
Expand Down Expand Up @@ -53,7 +55,7 @@ def setup_static_cluster(
"ssh_private_key": "~/.ssh/sky-key",
"password": "cluster-pass",
}
args = dict(name="static-cpu-password", host=[cluster.head_ip], ssh_creds=ssh_creds)
args = dict(name=cluster_name, host=[cluster.head_ip], ssh_creds=ssh_creds)
c = rh.cluster(**args).save()
c.restart_server(resync_rh=True)
init_args[id(c)] = args
Expand Down

0 comments on commit 4382d35

Please sign in to comment.