Skip to content

Commit

Permalink
update cluster tests to use den launcher
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexandra Belousov authored and Alexandra Belousov committed Dec 10, 2024
1 parent 09f5e3e commit 2ec531c
Show file tree
Hide file tree
Showing 6 changed files with 105 additions and 8 deletions.
6 changes: 6 additions & 0 deletions .github/workflows/nightly_release_testing.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ jobs:
DEN_TESTER_TOKEN: ${{ secrets.DEN_TESTER_PROD_TOKEN }}
DEN_TESTER_USERNAME: ${{ secrets.DEN_TESTER_USERNAME }}
API_SERVER_URL: ${{ env.API_SERVER_URL }}
AWS_KUBERNETES_ARN: ${{ secrets.AWS_KUBERNETES_ARN }}

- name: Run cluster and not on-demand tests
env:
Expand Down Expand Up @@ -104,6 +105,7 @@ jobs:
DEN_TESTER_TOKEN: ${{ secrets.DEN_TESTER_PROD_TOKEN }}
DEN_TESTER_USERNAME: ${{ secrets.DEN_TESTER_USERNAME }}
API_SERVER_URL: ${{ env.API_SERVER_URL }}
AWS_KUBERNETES_ARN: ${{ secrets.AWS_KUBERNETES_ARN }}

- name: Run on-demand aws tests
env:
Expand Down Expand Up @@ -145,6 +147,7 @@ jobs:
DEN_TESTER_TOKEN: ${{ secrets.DEN_TESTER_PROD_TOKEN }}
DEN_TESTER_USERNAME: ${{ secrets.DEN_TESTER_USERNAME }}
API_SERVER_URL: ${{ env.API_SERVER_URL }}
AWS_KUBERNETES_ARN: ${{ secrets.AWS_KUBERNETES_ARN }}

- name: Run on-demand aws tests
env:
Expand Down Expand Up @@ -184,6 +187,7 @@ jobs:
DEN_TESTER_TOKEN: ${{ secrets.DEN_TESTER_PROD_TOKEN }}
DEN_TESTER_USERNAME: ${{ secrets.DEN_TESTER_USERNAME }}
API_SERVER_URL: ${{ env.API_SERVER_URL }}
AWS_KUBERNETES_ARN: ${{ secrets.AWS_KUBERNETES_ARN }}

- name: Run on-demand gcp tests
env:
Expand Down Expand Up @@ -222,6 +226,7 @@ jobs:
DEN_TESTER_TOKEN: ${{ secrets.DEN_TESTER_PROD_TOKEN }}
DEN_TESTER_USERNAME: ${{ secrets.DEN_TESTER_USERNAME }}
API_SERVER_URL: ${{ env.API_SERVER_URL }}
AWS_KUBERNETES_ARN: ${{ secrets.AWS_KUBERNETES_ARN }}

- name: Run kubernetes tests
env:
Expand Down Expand Up @@ -268,6 +273,7 @@ jobs:
DEN_TESTER_TOKEN: ${{ secrets.DEN_TESTER_PROD_TOKEN }}
DEN_TESTER_USERNAME: ${{ secrets.DEN_TESTER_USERNAME }}
API_SERVER_URL: ${{ env.API_SERVER_URL }}
AWS_KUBERNETES_ARN: ${{ secrets.AWS_KUBERNETES_ARN }}

- name: Wait to check cluster status
run: sleep 600 # 10 minutes
Expand Down
4 changes: 4 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,10 @@ def event_loop():

from tests.fixtures.on_demand_cluster_fixtures import (
a10g_gpu_cluster, # noqa: F401
den_launched_ondemand_aws_docker_cluster, # noqa: F401
den_launched_ondemand_aws_k8s_cluster, # noqa: F401
den_launched_ondemand_gcp_k8s_cluster, # noqa: F401
den_launcher_v100_gpu_cluster, # noqa: F401
k80_gpu_cluster, # noqa: F401
multinode_cpu_docker_conda_cluster, # noqa: F401
multinode_gpu_cluster, # noqa: F401
Expand Down
90 changes: 87 additions & 3 deletions tests/fixtures/on_demand_cluster_fixtures.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import os
from pathlib import Path

import pytest

import runhouse as rh

from runhouse.constants import DEFAULT_HTTPS_PORT
from runhouse.resources.hardware.utils import LauncherType
from runhouse.resources.images.image import Image
from tests.conftest import init_args

Expand Down Expand Up @@ -36,14 +38,30 @@ def setup_test_cluster(args, request, create_env=False):
@pytest.fixture(
params=[
"ondemand_aws_docker_cluster",
"den_launched_ondemand_aws_docker_cluster",
"ondemand_gcp_cluster",
"ondemand_k8s_cluster",
"ondemand_k8s_docker_cluster",
"v100_gpu_cluster",
"den_launcher_v100_gpu_cluster",
"k80_gpu_cluster",
"a10g_gpu_cluster",
"den_launched_ondemand_aws_k8s_cluster",
"den_launched_ondemand_gcp_k8s_cluster",
],
ids=[
"aws_cpu",
"aws_gpu_den_launcher",
"gcp_cpu",
"k8s_cpu",
"k8s_docker_cpu",
"v100",
"v100_den_launcher",
"k80",
"a10g",
"aws_k8_den_launcher",
"gcp_k8_den_launcher",
],
ids=["aws_cpu", "gcp_cpu", "k8s_cpu", "k8s_docker_cpu", "v100", "k80", "a10g"],
)
def ondemand_cluster(request):
return request.getfixturevalue(request.param)
Expand Down Expand Up @@ -71,6 +89,25 @@ def ondemand_aws_docker_cluster(request):
return cluster


@pytest.fixture(scope="session")
def den_launched_ondemand_aws_docker_cluster(request):
"""
Note: Also used to test docker and default env with alternate Ray version.
"""
args = {
"name": "aws-cpu-den",
"instance_type": "CPU:2+",
"provider": "aws",
"image_id": "docker:rayproject/ray:latest-py311-cpu",
"region": "us-east-2",
"image": Image(name="default_image").install_packages(["ray==2.30.0"]),
"sky_kwargs": {"launch": {"retry_until_up": True}},
"launcher": LauncherType.DEN,
}
cluster = setup_test_cluster(args, request, create_env=True)
return cluster


@pytest.fixture(scope="session")
def ondemand_aws_https_cluster_with_auth(request, test_rns_folder):
args = {
Expand Down Expand Up @@ -129,6 +166,42 @@ def ondemand_k8s_cluster(request):
return cluster


@pytest.fixture(scope="session")
def den_launched_ondemand_aws_k8s_cluster(request):
kube_config_path = Path.home() / ".kube" / "config"

if not kube_config_path.exists():
pytest.skip("no kubeconfig found")

args = {
"name": "k8s-cpu-den",
"provider": "kubernetes",
"instance_type": "CPU:1",
"launcher": LauncherType.DEN,
"context": os.getenv("AWS_KUBERNETES_ARN"),
}
cluster = setup_test_cluster(args, request)
return cluster


@pytest.fixture(scope="session")
def den_launched_ondemand_gcp_k8s_cluster(request):
kube_config_path = Path.home() / ".kube" / "config"

if not kube_config_path.exists():
pytest.skip("no kubeconfig found")

args = {
"name": "k8s-cpu-den",
"provider": "kubernetes",
"instance_type": "CPU:1",
"launcher": LauncherType.DEN,
"context": "gke_testing",
}
cluster = setup_test_cluster(args, request)
return cluster


@pytest.fixture(scope="session")
def ondemand_k8s_docker_cluster(request):
kube_config_path = Path.home() / ".kube" / "config"
Expand All @@ -140,7 +213,6 @@ def ondemand_k8s_docker_cluster(request):
"name": "k8s-docker-cpu",
"provider": "kubernetes",
"instance_type": "CPU:1",
"memory": ".2",
"image": Image(name="default_image").from_docker(
"rayproject/ray:latest-py311-cpu"
),
Expand All @@ -156,7 +228,19 @@ def v100_gpu_cluster(request):
"instance_type": "V100:1",
"provider": "aws",
}
cluster = setup_test_cluster(args, request)
cluster = setup_test_cluster(args, request, create_env=True)
return cluster


@pytest.fixture(scope="session")
def den_launcher_v100_gpu_cluster(request):
args = {
"name": "rh-v100-den",
"instance_type": "V100:1",
"provider": "aws",
"launcher": LauncherType.DEN,
}
cluster = setup_test_cluster(args, request, create_env=True)
return cluster


Expand Down
3 changes: 2 additions & 1 deletion tests/fixtures/static_cluster_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@ def setup_static_cluster(
compute_type: computeType = computeType.cpu,
):
instance_type = "CPU:4" if compute_type == computeType.cpu else "g5.xlarge"
launcher = launcher if launcher else LauncherType.LOCAL
cluster = rh.cluster(
f"aws-{compute_type}-password",
f"{launcher}-aws-{compute_type}-password",
instance_type=instance_type,
provider="aws",
launcher=launcher,
Expand Down
6 changes: 2 additions & 4 deletions tests/test_resources/test_clusters/test_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
set_output_env_vars,
)


""" TODO:
1) In subclasses, test factory methods create same type as parent
2) In subclasses, use monkeypatching to make sure `up()` is called for various methods if the server is not up
Expand Down Expand Up @@ -131,9 +130,7 @@ class TestCluster(tests.test_resources.test_resource.TestResource):
}
MINIMAL = {"cluster": ["static_cpu_pwd_cluster"]}
RELEASE = {
"cluster": [
"static_cpu_pwd_cluster",
]
"cluster": ["static_cpu_pwd_cluster", "static_cpu_pwd_cluster_den_launcher"]
}
MAXIMAL = {
"cluster": [
Expand All @@ -142,6 +139,7 @@ class TestCluster(tests.test_resources.test_resource.TestResource):
"docker_cluster_pwd_ssh_no_auth",
"static_cpu_pwd_cluster",
"multinode_cpu_docker_conda_cluster",
"static_gpu_pwd_cluster_den_launcher",
]
}

Expand Down
4 changes: 4 additions & 0 deletions tests/test_resources/test_clusters/test_on_demand_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,13 @@ class TestOnDemandCluster(tests.test_resources.test_clusters.test_cluster.TestCl
RELEASE = {
"cluster": [
"ondemand_aws_docker_cluster",
"den_launched_ondemand_aws_docker_cluster",
"ondemand_gcp_cluster",
"ondemand_aws_https_cluster_with_auth",
"ondemand_k8s_cluster",
"ondemand_k8s_docker_cluster",
"den_launched_ondemand_aws_k8s_cluster",
"den_launched_ondemand_gcp_k8s_cluster",
]
}
MAXIMAL = {
Expand All @@ -101,6 +104,7 @@ class TestOnDemandCluster(tests.test_resources.test_clusters.test_cluster.TestCl
"ondemand_k8s_docker_cluster",
"ondemand_aws_https_cluster_with_auth",
"v100_gpu_cluster",
"den_launcher_v100_gpu_cluster",
"k80_gpu_cluster",
"a10g_gpu_cluster",
"static_cpu_pwd_cluster",
Expand Down

0 comments on commit 2ec531c

Please sign in to comment.