diff --git a/.github/workflows/nightly_release_testing.yaml b/.github/workflows/nightly_release_testing.yaml index 2f35fe692..8911dfd6e 100644 --- a/.github/workflows/nightly_release_testing.yaml +++ b/.github/workflows/nightly_release_testing.yaml @@ -41,9 +41,7 @@ jobs: - name: Teardown all clusters if: always() run: | - sky status - sky down --all -y - sky status + runhouse cluster down --all -y cluster-tests: runs-on: ubuntu-latest @@ -74,15 +72,13 @@ jobs: KITCHEN_TESTER_USERNAME: ${{ secrets.KITCHEN_TESTER_USERNAME }} ORG_MEMBER_TOKEN: ${{ secrets.ORG_MEMBER_PROD_TOKEN }} ORG_MEMBER_USERNAME: ${{ secrets.ORG_MEMBER_USERNAME }} - run: pytest --level release tests -k "clustertest and not ondemand" --detached + run: pytest --level release tests -k "cluster and not ondemand and not TestMultiNodeCluster" --detached timeout-minutes: 60 - name: Teardown all cluster-tests clusters if: always() run: | - sky status - sky down --all -y - sky status + runhouse cluster down --all -y ondemand-aws-tests: runs-on: ubuntu-latest @@ -122,9 +118,7 @@ jobs: - name: Teardown all ondemand-aws-tests clusters if: always() run: | - sky status - sky down --all -y - sky status + runhouse cluster down --all -y ondemand-aws-multinode-tests: runs-on: ubuntu-latest @@ -161,9 +155,7 @@ jobs: - name: Teardown all ondemand-aws-multinode clusters if: always() run: | - sky status - sky down --all -y - sky status + runhouse cluster down --all -y ondemand-gcp-tests: @@ -201,9 +193,7 @@ jobs: - name: Teardown all ondemand-gcp-tests clusters if: always() run: | - sky status - sky down --all -y - sky status + runhouse cluster down --all -y kubernetes-tests: runs-on: ubuntu-latest @@ -240,9 +230,7 @@ jobs: - name: Teardown all kubernetes-tests clusters if: always() run: | - sky status - sky down --all -y - sky status + runhouse cluster down --all -y check-cluster-status: if: always() diff --git a/tests/fixtures/on_demand_cluster_fixtures.py b/tests/fixtures/on_demand_cluster_fixtures.py index c0882d4a1..a7b6639e5 100644 --- a/tests/fixtures/on_demand_cluster_fixtures.py +++ b/tests/fixtures/on_demand_cluster_fixtures.py @@ -90,12 +90,12 @@ def ondemand_aws_docker_cluster(request): @pytest.fixture(scope="session") -def den_launched_ondemand_aws_docker_cluster(request): +def den_launched_ondemand_aws_docker_cluster(request, test_rns_folder): """ Note: Also used to test docker and default env with alternate Ray version. """ args = { - "name": "aws-cpu-den", + "name": f"{test_rns_folder}-aws-cpu-den", "instance_type": "CPU:2+", "provider": "aws", "image_id": "docker:rayproject/ray:latest-py311-cpu", @@ -167,14 +167,14 @@ def ondemand_k8s_cluster(request): @pytest.fixture(scope="session") -def den_launched_ondemand_aws_k8s_cluster(request): +def den_launched_ondemand_aws_k8s_cluster(request, test_rns_folder): kube_config_path = Path.home() / ".kube" / "config" if not kube_config_path.exists(): pytest.skip("no kubeconfig found") args = { - "name": "k8s-cpu-den", + "name": f"{test_rns_folder}-k8s-cpu-den", "provider": "kubernetes", "instance_type": "CPU:1", "launcher": LauncherType.DEN, @@ -185,14 +185,14 @@ def den_launched_ondemand_aws_k8s_cluster(request): @pytest.fixture(scope="session") -def den_launched_ondemand_gcp_k8s_cluster(request): +def den_launched_ondemand_gcp_k8s_cluster(request, test_rns_folder): kube_config_path = Path.home() / ".kube" / "config" if not kube_config_path.exists(): pytest.skip("no kubeconfig found") args = { - "name": "k8s-cpu-den", + "name": f"{test_rns_folder}-k8s-cpu-den", "provider": "kubernetes", "instance_type": "CPU:1", "launcher": LauncherType.DEN, @@ -233,9 +233,9 @@ def v100_gpu_cluster(request): @pytest.fixture(scope="session") -def den_launcher_v100_gpu_cluster(request): +def den_launcher_v100_gpu_cluster(request, test_rns_folder): args = { - "name": "rh-v100-den", + "name": f"{test_rns_folder}-rh-v100-den", "instance_type": "V100:1", "provider": "aws", "launcher": LauncherType.DEN, diff --git a/tests/fixtures/static_cluster_fixtures.py b/tests/fixtures/static_cluster_fixtures.py index fef9fd3d6..361975b5d 100644 --- a/tests/fixtures/static_cluster_fixtures.py +++ b/tests/fixtures/static_cluster_fixtures.py @@ -7,6 +7,7 @@ from runhouse.resources.hardware.utils import LauncherType from tests.conftest import init_args +from tests.fixtures.resource_fixtures import create_folder_path from tests.utils import test_env @@ -21,8 +22,9 @@ def setup_static_cluster( ): instance_type = "CPU:4" if compute_type == computeType.cpu else "g5.xlarge" launcher = launcher if launcher else LauncherType.LOCAL + cluster_name = f"{create_folder_path()}-{launcher}-aws-{compute_type}-password" cluster = rh.cluster( - f"{launcher}-aws-{compute_type}-password", + name=cluster_name, instance_type=instance_type, provider="aws", launcher=launcher, @@ -53,7 +55,7 @@ def setup_static_cluster( "ssh_private_key": "~/.ssh/sky-key", "password": "cluster-pass", } - args = dict(name="static-cpu-password", host=[cluster.head_ip], ssh_creds=ssh_creds) + args = dict(name=cluster_name, host=[cluster.head_ip], ssh_creds=ssh_creds) c = rh.cluster(**args).save() c.restart_server(resync_rh=True) init_args[id(c)] = args