Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update cluster tests to use den launcher #1493

Merged
merged 1 commit into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/workflows/nightly_release_testing.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ jobs:
DEN_TESTER_TOKEN: ${{ secrets.DEN_TESTER_PROD_TOKEN }}
DEN_TESTER_USERNAME: ${{ secrets.DEN_TESTER_USERNAME }}
API_SERVER_URL: ${{ env.API_SERVER_URL }}
EKS_ARN: ${{ secrets.EKS_ARN }}

- name: Run cluster and not on-demand tests
env:
Expand Down Expand Up @@ -104,6 +105,7 @@ jobs:
DEN_TESTER_TOKEN: ${{ secrets.DEN_TESTER_PROD_TOKEN }}
DEN_TESTER_USERNAME: ${{ secrets.DEN_TESTER_USERNAME }}
API_SERVER_URL: ${{ env.API_SERVER_URL }}
EKS_ARN: ${{ secrets.EKS_ARN }}

- name: Run on-demand aws tests
env:
Expand Down Expand Up @@ -145,6 +147,7 @@ jobs:
DEN_TESTER_TOKEN: ${{ secrets.DEN_TESTER_PROD_TOKEN }}
DEN_TESTER_USERNAME: ${{ secrets.DEN_TESTER_USERNAME }}
API_SERVER_URL: ${{ env.API_SERVER_URL }}
EKS_ARN: ${{ secrets.EKS_ARN }}

- name: Run on-demand aws tests
env:
Expand Down Expand Up @@ -184,6 +187,7 @@ jobs:
DEN_TESTER_TOKEN: ${{ secrets.DEN_TESTER_PROD_TOKEN }}
DEN_TESTER_USERNAME: ${{ secrets.DEN_TESTER_USERNAME }}
API_SERVER_URL: ${{ env.API_SERVER_URL }}
EKS_ARN: ${{ secrets.EKS_ARN }}

- name: Run on-demand gcp tests
env:
Expand Down Expand Up @@ -222,6 +226,7 @@ jobs:
DEN_TESTER_TOKEN: ${{ secrets.DEN_TESTER_PROD_TOKEN }}
DEN_TESTER_USERNAME: ${{ secrets.DEN_TESTER_USERNAME }}
API_SERVER_URL: ${{ env.API_SERVER_URL }}
EKS_ARN: ${{ secrets.EKS_ARN }}

- name: Run kubernetes tests
env:
Expand Down Expand Up @@ -268,6 +273,7 @@ jobs:
DEN_TESTER_TOKEN: ${{ secrets.DEN_TESTER_PROD_TOKEN }}
DEN_TESTER_USERNAME: ${{ secrets.DEN_TESTER_USERNAME }}
API_SERVER_URL: ${{ env.API_SERVER_URL }}
EKS_ARN: ${{ secrets.EKS_ARN }}

- name: Wait to check cluster status
run: sleep 600 # 10 minutes
Expand Down
4 changes: 4 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,10 @@ def event_loop():

from tests.fixtures.on_demand_cluster_fixtures import (
a10g_gpu_cluster, # noqa: F401
den_launched_ondemand_aws_docker_cluster, # noqa: F401
den_launched_ondemand_aws_k8s_cluster, # noqa: F401
den_launched_ondemand_gcp_k8s_cluster, # noqa: F401
den_launcher_v100_gpu_cluster, # noqa: F401
k80_gpu_cluster, # noqa: F401
multinode_cpu_docker_conda_cluster, # noqa: F401
multinode_gpu_cluster, # noqa: F401
Expand Down
90 changes: 87 additions & 3 deletions tests/fixtures/on_demand_cluster_fixtures.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import os
from pathlib import Path

import pytest

import runhouse as rh

from runhouse.constants import DEFAULT_HTTPS_PORT
from runhouse.resources.hardware.utils import LauncherType
from runhouse.resources.images.image import Image
from tests.conftest import init_args

Expand Down Expand Up @@ -39,14 +41,30 @@ def setup_test_cluster(args, request, test_rns_folder, setup_base=False):
@pytest.fixture(
params=[
"ondemand_aws_docker_cluster",
"den_launched_ondemand_aws_docker_cluster",
"ondemand_gcp_cluster",
"ondemand_k8s_cluster",
"ondemand_k8s_docker_cluster",
BelSasha marked this conversation as resolved.
Show resolved Hide resolved
"v100_gpu_cluster",
"den_launcher_v100_gpu_cluster",
"k80_gpu_cluster",
"a10g_gpu_cluster",
"den_launched_ondemand_aws_k8s_cluster",
"den_launched_ondemand_gcp_k8s_cluster",
],
ids=[
"aws_cpu",
"aws_gpu_den_launcher",
"gcp_cpu",
"k8s_cpu",
"k8s_docker_cpu",
"v100",
"v100_den_launcher",
"k80",
"a10g",
"aws_k8_den_launcher",
"gcp_k8_den_launcher",
],
ids=["aws_cpu", "gcp_cpu", "k8s_cpu", "k8s_docker_cpu", "v100", "k80", "a10g"],
)
def ondemand_cluster(request):
return request.getfixturevalue(request.param)
Expand Down Expand Up @@ -74,6 +92,25 @@ def ondemand_aws_docker_cluster(request):
return cluster


@pytest.fixture(scope="session")
def den_launched_ondemand_aws_docker_cluster(request):
"""
Note: Also used to test docker and default env with alternate Ray version.
"""
args = {
"name": "aws-cpu-den",
"instance_type": "CPU:2+",
"provider": "aws",
"image_id": "docker:rayproject/ray:latest-py311-cpu",
BelSasha marked this conversation as resolved.
Show resolved Hide resolved
"region": "us-east-2",
"image": Image(name="default_image").install_packages(["ray==2.30.0"]),
"sky_kwargs": {"launch": {"retry_until_up": True}},
"launcher": LauncherType.DEN,
}
cluster = setup_test_cluster(args, request, create_env=True)
return cluster


@pytest.fixture(scope="session")
def ondemand_aws_https_cluster_with_auth(request, test_rns_folder):
args = {
Expand Down Expand Up @@ -132,6 +169,42 @@ def ondemand_k8s_cluster(request):
return cluster


@pytest.fixture(scope="session")
def den_launched_ondemand_aws_k8s_cluster(request):
kube_config_path = Path.home() / ".kube" / "config"

if not kube_config_path.exists():
pytest.skip("no kubeconfig found")

args = {
"name": "k8s-cpu-den",
"provider": "kubernetes",
"instance_type": "CPU:1",
"launcher": LauncherType.DEN,
"context": os.getenv("EKS_ARN"),
}
cluster = setup_test_cluster(args, request)
return cluster


@pytest.fixture(scope="session")
def den_launched_ondemand_gcp_k8s_cluster(request):
kube_config_path = Path.home() / ".kube" / "config"

if not kube_config_path.exists():
pytest.skip("no kubeconfig found")

args = {
"name": "k8s-cpu-den",
"provider": "kubernetes",
"instance_type": "CPU:1",
"launcher": LauncherType.DEN,
"context": "gke_testing",
}
cluster = setup_test_cluster(args, request)
return cluster


@pytest.fixture(scope="session")
def ondemand_k8s_docker_cluster(request):
kube_config_path = Path.home() / ".kube" / "config"
Expand All @@ -143,7 +216,6 @@ def ondemand_k8s_docker_cluster(request):
"name": "k8s-docker-cpu",
"provider": "kubernetes",
"instance_type": "CPU:1",
"memory": ".2",
"image": Image(name="default_image").from_docker(
"rayproject/ray:latest-py311-cpu"
),
Expand All @@ -159,7 +231,19 @@ def v100_gpu_cluster(request):
"instance_type": "V100:1",
"provider": "aws",
}
cluster = setup_test_cluster(args, request)
cluster = setup_test_cluster(args, request, create_env=True)
return cluster


@pytest.fixture(scope="session")
def den_launcher_v100_gpu_cluster(request):
args = {
"name": "rh-v100-den",
"instance_type": "V100:1",
"provider": "aws",
"launcher": LauncherType.DEN,
}
cluster = setup_test_cluster(args, request, create_env=True)
return cluster


Expand Down
3 changes: 2 additions & 1 deletion tests/fixtures/static_cluster_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@ def setup_static_cluster(
f"{create_folder_path()}-{rh.constants.SSH_SKY_SECRET_NAME}"
)
instance_type = "CPU:4" if compute_type == computeType.cpu else "g5.xlarge"
launcher = launcher if launcher else LauncherType.LOCAL
cluster = rh.cluster(
f"aws-{compute_type}-password",
f"{launcher}-aws-{compute_type}-password",
instance_type=instance_type,
provider="aws",
launcher=launcher,
Expand Down
6 changes: 2 additions & 4 deletions tests/test_resources/test_clusters/test_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
set_output_env_vars,
)


""" TODO:
1) In subclasses, test factory methods create same type as parent
2) In subclasses, use monkeypatching to make sure `up()` is called for various methods if the server is not up
Expand Down Expand Up @@ -131,9 +130,7 @@ class TestCluster(tests.test_resources.test_resource.TestResource):
}
MINIMAL = {"cluster": ["static_cpu_pwd_cluster"]}
RELEASE = {
"cluster": [
"static_cpu_pwd_cluster",
]
"cluster": ["static_cpu_pwd_cluster", "static_cpu_pwd_cluster_den_launcher"]
}
MAXIMAL = {
"cluster": [
Expand All @@ -142,6 +139,7 @@ class TestCluster(tests.test_resources.test_resource.TestResource):
"docker_cluster_pwd_ssh_no_auth",
"static_cpu_pwd_cluster",
"multinode_cpu_docker_conda_cluster",
"static_gpu_pwd_cluster_den_launcher",
]
}

Expand Down
4 changes: 4 additions & 0 deletions tests/test_resources/test_clusters/test_on_demand_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,13 @@ class TestOnDemandCluster(tests.test_resources.test_clusters.test_cluster.TestCl
RELEASE = {
"cluster": [
"ondemand_aws_docker_cluster",
"den_launched_ondemand_aws_docker_cluster",
"ondemand_gcp_cluster",
"ondemand_aws_https_cluster_with_auth",
"ondemand_k8s_cluster",
"ondemand_k8s_docker_cluster",
"den_launched_ondemand_aws_k8s_cluster",
"den_launched_ondemand_gcp_k8s_cluster",
]
}
MAXIMAL = {
Expand All @@ -105,6 +108,7 @@ class TestOnDemandCluster(tests.test_resources.test_clusters.test_cluster.TestCl
"ondemand_k8s_docker_cluster",
"ondemand_aws_https_cluster_with_auth",
"v100_gpu_cluster",
"den_launcher_v100_gpu_cluster",
"k80_gpu_cluster",
"a10g_gpu_cluster",
"static_cpu_pwd_cluster",
Expand Down
Loading