From c53543b544d6eedca0f9b88125ffc9f644f273ba Mon Sep 17 00:00:00 2001 From: Alexandra Belousov Date: Sun, 1 Dec 2024 18:14:04 +0200 Subject: [PATCH] update cluster tests to use den launcher --- tests/conftest.py | 4 + tests/fixtures/on_demand_cluster_fixtures.py | 90 ++++++++++++++++++- tests/fixtures/static_cluster_fixtures.py | 3 +- .../test_clusters/test_cluster.py | 5 +- .../test_clusters/test_on_demand_cluster.py | 5 ++ 5 files changed, 101 insertions(+), 6 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index af9260d70..3fcff08d7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -235,6 +235,10 @@ def event_loop(): from tests.fixtures.on_demand_cluster_fixtures import ( a10g_gpu_cluster, # noqa: F401 + den_launched_ondemand_aws_docker_cluster, # noqa: F401 + den_launched_ondemand_aws_k8s_cluster, # noqa: F401 + den_launched_ondemand_gcp_k8s_cluster, # noqa: F401 + den_launcher_v100_gpu_cluster, # noqa: F401 k80_gpu_cluster, # noqa: F401 multinode_cpu_docker_conda_cluster, # noqa: F401 multinode_gpu_cluster, # noqa: F401 diff --git a/tests/fixtures/on_demand_cluster_fixtures.py b/tests/fixtures/on_demand_cluster_fixtures.py index aa04bb6f8..9c0c3c654 100644 --- a/tests/fixtures/on_demand_cluster_fixtures.py +++ b/tests/fixtures/on_demand_cluster_fixtures.py @@ -5,6 +5,7 @@ import runhouse as rh from runhouse.constants import DEFAULT_HTTPS_PORT +from runhouse.resources.hardware.utils import LauncherType from runhouse.resources.images.image import Image from tests.conftest import init_args @@ -36,14 +37,30 @@ def setup_test_cluster(args, request, create_env=False): @pytest.fixture( params=[ "ondemand_aws_docker_cluster", + "den_launched_ondemand_aws_docker_cluster", "ondemand_gcp_cluster", "ondemand_k8s_cluster", "ondemand_k8s_docker_cluster", "v100_gpu_cluster", + "den_launcher_v100_gpu_cluster", "k80_gpu_cluster", "a10g_gpu_cluster", + "den_launched_ondemand_aws_k8s_cluster", + "den_launched_ondemand_gcp_k8s_cluster", + ], + ids=[ + "aws_cpu", + "aws_gpu_den_launcher", + "gcp_cpu", + "k8s_cpu", + "k8s_docker_cpu", + "v100", + "v100_den_launcher", + "k80", + "a10g", + "aws_k8_den_launcher", + "gcp_k8_den_launcher", ], - ids=["aws_cpu", "gcp_cpu", "k8s_cpu", "k8s_docker_cpu", "v100", "k80", "a10g"], ) def ondemand_cluster(request): return request.getfixturevalue(request.param) @@ -71,6 +88,25 @@ def ondemand_aws_docker_cluster(request): return cluster +@pytest.fixture(scope="session") +def den_launched_ondemand_aws_docker_cluster(request): + """ + Note: Also used to test docker and default env with alternate Ray version. + """ + args = { + "name": "aws-cpu-den", + "instance_type": "CPU:2+", + "provider": "aws", + "image_id": "docker:rayproject/ray:latest-py311-cpu", + "region": "us-east-2", + "image": Image(name="default_image").install_packages(["ray==2.30.0"]), + "sky_kwargs": {"launch": {"retry_until_up": True}}, + "launcher_type": LauncherType.DEN, + } + cluster = setup_test_cluster(args, request, create_env=True) + return cluster + + @pytest.fixture(scope="session") def ondemand_aws_https_cluster_with_auth(request, test_rns_folder): args = { @@ -129,6 +165,44 @@ def ondemand_k8s_cluster(request): return cluster +@pytest.fixture(scope="session") +def den_launched_ondemand_aws_k8s_cluster(request): + kube_config_path = Path.home() / ".kube" / "config" + + if not kube_config_path.exists(): + pytest.skip("no kubeconfig found") + + args = { + "name": "k8s-cpu-den", + "provider": "kubernetes", + "instance_type": "CPU:1", + "memory": ".2", + "launcher_type": LauncherType.DEN, + "context": "arn:aws:eks:us-east-1:172657097474:cluster/runhouse-k8s", + } + cluster = setup_test_cluster(args, request) + return cluster + + +@pytest.fixture(scope="session") +def den_launched_ondemand_gcp_k8s_cluster(request): + kube_config_path = Path.home() / ".kube" / "config" + + if not kube_config_path.exists(): + pytest.skip("no kubeconfig found") + + args = { + "name": "k8s-cpu-den", + "provider": "kubernetes", + "instance_type": "CPU:1", + "memory": ".2", + "launcher_type": LauncherType.DEN, + "context": "gke_testing", + } + cluster = setup_test_cluster(args, request) + return cluster + + @pytest.fixture(scope="session") def ondemand_k8s_docker_cluster(request): kube_config_path = Path.home() / ".kube" / "config" @@ -156,7 +230,19 @@ def v100_gpu_cluster(request): "instance_type": "V100:1", "provider": "aws", } - cluster = setup_test_cluster(args, request) + cluster = setup_test_cluster(args, request, create_env=True) + return cluster + + +@pytest.fixture(scope="session") +def den_launcher_v100_gpu_cluster(request): + args = { + "name": "rh-v100-den", + "instance_type": "V100:1", + "provider": "aws", + "launcher_type": LauncherType.DEN, + } + cluster = setup_test_cluster(args, request, create_env=True) return cluster diff --git a/tests/fixtures/static_cluster_fixtures.py b/tests/fixtures/static_cluster_fixtures.py index 4e63c8f22..48fcf6cb7 100644 --- a/tests/fixtures/static_cluster_fixtures.py +++ b/tests/fixtures/static_cluster_fixtures.py @@ -20,8 +20,9 @@ def setup_static_cluster( compute_type: computeType = computeType.cpu, ): instance_type = "CPU:4" if compute_type == computeType.cpu else "g5.xlarge" + launcher_type = launcher_type if launcher_type else LauncherType.LOCAL cluster = rh.cluster( - f"aws-{compute_type}-password", + f"{launcher_type}-aws-{compute_type}-password", instance_type=instance_type, provider="aws", launcher_type=launcher_type, diff --git a/tests/test_resources/test_clusters/test_cluster.py b/tests/test_resources/test_clusters/test_cluster.py index a8bb9773c..b3b8e69e4 100644 --- a/tests/test_resources/test_clusters/test_cluster.py +++ b/tests/test_resources/test_clusters/test_cluster.py @@ -130,9 +130,7 @@ class TestCluster(tests.test_resources.test_resource.TestResource): } MINIMAL = {"cluster": ["static_cpu_pwd_cluster"]} RELEASE = { - "cluster": [ - "static_cpu_pwd_cluster", - ] + "cluster": ["static_cpu_pwd_cluster", "static_cpu_pwd_cluster_den_launcher"] } MAXIMAL = { "cluster": [ @@ -141,6 +139,7 @@ class TestCluster(tests.test_resources.test_resource.TestResource): "docker_cluster_pwd_ssh_no_auth", "static_cpu_pwd_cluster", "multinode_cpu_docker_conda_cluster", + "static_gpu_pwd_cluster_den_launcher", ] } diff --git a/tests/test_resources/test_clusters/test_on_demand_cluster.py b/tests/test_resources/test_clusters/test_on_demand_cluster.py index 5c6372a54..d9d771bd6 100644 --- a/tests/test_resources/test_clusters/test_on_demand_cluster.py +++ b/tests/test_resources/test_clusters/test_on_demand_cluster.py @@ -87,10 +87,14 @@ class TestOnDemandCluster(tests.test_resources.test_clusters.test_cluster.TestCl RELEASE = { "cluster": [ "ondemand_aws_docker_cluster", + "den_launched_ondemand_aws_docker_cluster", "ondemand_gcp_cluster", "ondemand_aws_https_cluster_with_auth", "ondemand_k8s_cluster", "ondemand_k8s_docker_cluster", + # TODO [SB]: un-mark once launching k8 cluster via den is fully supported + # "den_launched_ondemand_aws_k8s_cluster", + # "den_launched_ondemand_gcp_k8s_cluster" ] } MAXIMAL = { @@ -101,6 +105,7 @@ class TestOnDemandCluster(tests.test_resources.test_clusters.test_cluster.TestCl "ondemand_k8s_docker_cluster", "ondemand_aws_https_cluster_with_auth", "v100_gpu_cluster", + "den_launcher_v100_gpu_cluster", "k80_gpu_cluster", "a10g_gpu_cluster", "static_cpu_pwd_cluster",