Skip to content

Commit

Permalink
update cluster tests to use den launcher
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexandra Belousov authored and Alexandra Belousov committed Dec 10, 2024
1 parent 09f5e3e commit db48488
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 7 deletions.
4 changes: 4 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,10 @@ def event_loop():

from tests.fixtures.on_demand_cluster_fixtures import (
a10g_gpu_cluster, # noqa: F401
den_launched_ondemand_aws_docker_cluster, # noqa: F401
den_launched_ondemand_aws_k8s_cluster, # noqa: F401
den_launched_ondemand_gcp_k8s_cluster, # noqa: F401
den_launcher_v100_gpu_cluster, # noqa: F401
k80_gpu_cluster, # noqa: F401
multinode_cpu_docker_conda_cluster, # noqa: F401
multinode_gpu_cluster, # noqa: F401
Expand Down
90 changes: 88 additions & 2 deletions tests/fixtures/on_demand_cluster_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import runhouse as rh

from runhouse.constants import DEFAULT_HTTPS_PORT
from runhouse.resources.hardware.utils import LauncherType
from runhouse.resources.images.image import Image
from tests.conftest import init_args

Expand Down Expand Up @@ -36,14 +37,30 @@ def setup_test_cluster(args, request, create_env=False):
@pytest.fixture(
params=[
"ondemand_aws_docker_cluster",
"den_launched_ondemand_aws_docker_cluster",
"ondemand_gcp_cluster",
"ondemand_k8s_cluster",
"ondemand_k8s_docker_cluster",
"v100_gpu_cluster",
"den_launcher_v100_gpu_cluster",
"k80_gpu_cluster",
"a10g_gpu_cluster",
"den_launched_ondemand_aws_k8s_cluster",
"den_launched_ondemand_gcp_k8s_cluster",
],
ids=[
"aws_cpu",
"aws_gpu_den_launcher",
"gcp_cpu",
"k8s_cpu",
"k8s_docker_cpu",
"v100",
"v100_den_launcher",
"k80",
"a10g",
"aws_k8_den_launcher",
"gcp_k8_den_launcher",
],
ids=["aws_cpu", "gcp_cpu", "k8s_cpu", "k8s_docker_cpu", "v100", "k80", "a10g"],
)
def ondemand_cluster(request):
return request.getfixturevalue(request.param)
Expand Down Expand Up @@ -71,6 +88,25 @@ def ondemand_aws_docker_cluster(request):
return cluster


@pytest.fixture(scope="session")
def den_launched_ondemand_aws_docker_cluster(request):
"""
Note: Also used to test docker and default env with alternate Ray version.
"""
args = {
"name": "aws-cpu-den",
"instance_type": "CPU:2+",
"provider": "aws",
"image_id": "docker:rayproject/ray:latest-py311-cpu",
"region": "us-east-2",
"image": Image(name="default_image").install_packages(["ray==2.30.0"]),
"sky_kwargs": {"launch": {"retry_until_up": True}},
"launcher": LauncherType.DEN,
}
cluster = setup_test_cluster(args, request, create_env=True)
return cluster


@pytest.fixture(scope="session")
def ondemand_aws_https_cluster_with_auth(request, test_rns_folder):
args = {
Expand Down Expand Up @@ -129,6 +165,44 @@ def ondemand_k8s_cluster(request):
return cluster


@pytest.fixture(scope="session")
def den_launched_ondemand_aws_k8s_cluster(request):
kube_config_path = Path.home() / ".kube" / "config"

if not kube_config_path.exists():
pytest.skip("no kubeconfig found")

args = {
"name": "k8s-cpu-den",
"provider": "kubernetes",
"instance_type": "CPU:1",
"memory": ".2",
"launcher": LauncherType.DEN,
"context": "arn:aws:eks:us-east-1:172657097474:cluster/runhouse-k8s",
}
cluster = setup_test_cluster(args, request)
return cluster


@pytest.fixture(scope="session")
def den_launched_ondemand_gcp_k8s_cluster(request):
kube_config_path = Path.home() / ".kube" / "config"

if not kube_config_path.exists():
pytest.skip("no kubeconfig found")

args = {
"name": "k8s-cpu-den",
"provider": "kubernetes",
"instance_type": "CPU:1",
"memory": ".2",
"launcher": LauncherType.DEN,
"context": "gke_testing",
}
cluster = setup_test_cluster(args, request)
return cluster


@pytest.fixture(scope="session")
def ondemand_k8s_docker_cluster(request):
kube_config_path = Path.home() / ".kube" / "config"
Expand Down Expand Up @@ -156,7 +230,19 @@ def v100_gpu_cluster(request):
"instance_type": "V100:1",
"provider": "aws",
}
cluster = setup_test_cluster(args, request)
cluster = setup_test_cluster(args, request, create_env=True)
return cluster


@pytest.fixture(scope="session")
def den_launcher_v100_gpu_cluster(request):
args = {
"name": "rh-v100-den",
"instance_type": "V100:1",
"provider": "aws",
"launcher": LauncherType.DEN,
}
cluster = setup_test_cluster(args, request, create_env=True)
return cluster


Expand Down
3 changes: 2 additions & 1 deletion tests/fixtures/static_cluster_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@ def setup_static_cluster(
compute_type: computeType = computeType.cpu,
):
instance_type = "CPU:4" if compute_type == computeType.cpu else "g5.xlarge"
launcher = launcher if launcher else LauncherType.LOCAL
cluster = rh.cluster(
f"aws-{compute_type}-password",
f"{launcher}-aws-{compute_type}-password",
instance_type=instance_type,
provider="aws",
launcher=launcher,
Expand Down
6 changes: 2 additions & 4 deletions tests/test_resources/test_clusters/test_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
set_output_env_vars,
)


""" TODO:
1) In subclasses, test factory methods create same type as parent
2) In subclasses, use monkeypatching to make sure `up()` is called for various methods if the server is not up
Expand Down Expand Up @@ -131,9 +130,7 @@ class TestCluster(tests.test_resources.test_resource.TestResource):
}
MINIMAL = {"cluster": ["static_cpu_pwd_cluster"]}
RELEASE = {
"cluster": [
"static_cpu_pwd_cluster",
]
"cluster": ["static_cpu_pwd_cluster", "static_cpu_pwd_cluster_den_launcher"]
}
MAXIMAL = {
"cluster": [
Expand All @@ -142,6 +139,7 @@ class TestCluster(tests.test_resources.test_resource.TestResource):
"docker_cluster_pwd_ssh_no_auth",
"static_cpu_pwd_cluster",
"multinode_cpu_docker_conda_cluster",
"static_gpu_pwd_cluster_den_launcher",
]
}

Expand Down
5 changes: 5 additions & 0 deletions tests/test_resources/test_clusters/test_on_demand_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,14 @@ class TestOnDemandCluster(tests.test_resources.test_clusters.test_cluster.TestCl
RELEASE = {
"cluster": [
"ondemand_aws_docker_cluster",
"den_launched_ondemand_aws_docker_cluster",
"ondemand_gcp_cluster",
"ondemand_aws_https_cluster_with_auth",
"ondemand_k8s_cluster",
"ondemand_k8s_docker_cluster",
# TODO [SB]: un-mark once launching k8 cluster via den is fully supported
# "den_launched_ondemand_aws_k8s_cluster",
# "den_launched_ondemand_gcp_k8s_cluster"
]
}
MAXIMAL = {
Expand All @@ -101,6 +105,7 @@ class TestOnDemandCluster(tests.test_resources.test_clusters.test_cluster.TestCl
"ondemand_k8s_docker_cluster",
"ondemand_aws_https_cluster_with_auth",
"v100_gpu_cluster",
"den_launcher_v100_gpu_cluster",
"k80_gpu_cluster",
"a10g_gpu_cluster",
"static_cpu_pwd_cluster",
Expand Down

0 comments on commit db48488

Please sign in to comment.