Skip to content

Commit

Permalink
Add Nebius exclusions to smoke tests
Browse files Browse the repository at this point in the history
Mark Nebius as unsupported for various test cases due to its lack of support for specific GPUs, features, or configurations. This ensures accurate test coverage and avoids unnecessary failures on incompatible platforms.
  • Loading branch information
SalikovAlex committed Jan 22, 2025
1 parent c75f9d7 commit 9b7ecf3
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 1 deletion.
5 changes: 5 additions & 0 deletions tests/smoke_tests/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ def test_launch_fast(generic_cloud: str):
@pytest.mark.no_lambda_cloud
@pytest.mark.no_ibm
@pytest.mark.no_kubernetes
@pytest.mark.no_nebius
def test_launch_fast_with_autostop(generic_cloud: str):
name = smoke_tests_utils.get_cluster_name()
# Azure takes ~ 7m15s (435s) to autostop a VM, so here we use 600 to ensure
Expand Down Expand Up @@ -423,6 +424,7 @@ def test_load_dump_yaml_config_equivalent(self):
@pytest.mark.no_fluidstack # Fluidstack does not support K80 gpus for now
@pytest.mark.no_paperspace # Paperspace does not support K80 gpus
@pytest.mark.no_do # DO does not support K80s
@pytest.mark.no_nebius # Nebius does not support K80s
def test_multiple_accelerators_ordered():
name = smoke_tests_utils.get_cluster_name()
test = smoke_tests_utils.Test(
Expand All @@ -440,6 +442,7 @@ def test_multiple_accelerators_ordered():
@pytest.mark.no_fluidstack # Fluidstack has low availability for T4 GPUs
@pytest.mark.no_paperspace # Paperspace does not support T4 GPUs
@pytest.mark.no_do # DO does not have multiple accelerators
@pytest.mark.no_nebius # Nebius does not support T4 GPUs
def test_multiple_accelerators_ordered_with_default():
name = smoke_tests_utils.get_cluster_name()
test = smoke_tests_utils.Test(
Expand All @@ -457,6 +460,7 @@ def test_multiple_accelerators_ordered_with_default():
@pytest.mark.no_fluidstack # Fluidstack has low availability for T4 GPUs
@pytest.mark.no_paperspace # Paperspace does not support T4 GPUs
@pytest.mark.no_do # DO does not have multiple accelerators
@pytest.mark.no_nebius # Nebius does not support T4 GPUs
def test_multiple_accelerators_unordered():
name = smoke_tests_utils.get_cluster_name()
test = smoke_tests_utils.Test(
Expand All @@ -473,6 +477,7 @@ def test_multiple_accelerators_unordered():
@pytest.mark.no_fluidstack # Fluidstack has low availability for T4 GPUs
@pytest.mark.no_paperspace # Paperspace does not support T4 GPUs
@pytest.mark.no_do # DO does not support multiple accelerators
@pytest.mark.no_nebius # Nebius does not support T4 GPUs
def test_multiple_accelerators_unordered_with_default():
name = smoke_tests_utils.get_cluster_name()
test = smoke_tests_utils.Test(
Expand Down
12 changes: 11 additions & 1 deletion tests/smoke_tests/test_cluster_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
@pytest.mark.no_scp # SCP does not have T4 gpus. Run test_scp_job_queue instead
@pytest.mark.no_paperspace # Paperspace does not have T4 gpus.
@pytest.mark.no_oci # OCI does not have T4 gpus
@pytest.mark.no_nebius # Nebius does not support T4 GPUs
@pytest.mark.parametrize('accelerator', [{'do': 'H100'}])
def test_job_queue(generic_cloud: str, accelerator: Dict[str, str]):
accelerator = accelerator.get(generic_cloud, 'T4')
Expand Down Expand Up @@ -80,6 +81,7 @@ def test_job_queue(generic_cloud: str, accelerator: Dict[str, str]):
@pytest.mark.no_scp # Doesn't support SCP for now
@pytest.mark.no_oci # Doesn't support OCI for now
@pytest.mark.no_kubernetes # Doesn't support Kubernetes for now
@pytest.mark.no_nebius # Doesn't support Nebius for now
@pytest.mark.parametrize('accelerator', [{'do': 'H100'}])
@pytest.mark.parametrize(
'image_id',
Expand Down Expand Up @@ -220,6 +222,7 @@ def test_scp_job_queue():
@pytest.mark.no_scp # SCP does not support num_nodes > 1 yet
@pytest.mark.no_oci # OCI Cloud does not have T4 gpus.
@pytest.mark.no_kubernetes # Kubernetes not support num_nodes > 1 yet
@pytest.mark.no_nebius # Nebius Cloud does not have T4 gpus.
@pytest.mark.parametrize('accelerator', [{'do': 'H100'}])
def test_job_queue_multinode(generic_cloud: str, accelerator: Dict[str, str]):
accelerator = accelerator.get(generic_cloud, 'T4')
Expand Down Expand Up @@ -370,6 +373,7 @@ def test_ibm_job_queue_multinode():
@pytest.mark.no_scp # Doesn't support SCP for now
@pytest.mark.no_oci # Doesn't support OCI for now
@pytest.mark.no_kubernetes # Doesn't support Kubernetes for now
@pytest.mark.no_nebius # Doesn't support Nebius for now
# TODO(zhwu): we should fix this for kubernetes
def test_docker_preinstalled_package(generic_cloud: str):
name = smoke_tests_utils.get_cluster_name()
Expand All @@ -394,6 +398,7 @@ def test_docker_preinstalled_package(generic_cloud: str):
@pytest.mark.no_scp # SCP does not support num_nodes > 1 yet
@pytest.mark.no_oci # OCI Cloud does not have T4 gpus
@pytest.mark.no_do # DO does not have T4 gpus
@pytest.mark.no_nebius # Nebius does not have T4 gpus
def test_multi_echo(generic_cloud: str):
name = smoke_tests_utils.get_cluster_name()
test = smoke_tests_utils.Test(
Expand Down Expand Up @@ -436,6 +441,7 @@ def test_multi_echo(generic_cloud: str):
@pytest.mark.no_lambda_cloud # Lambda Cloud does not have V100 gpus
@pytest.mark.no_ibm # IBM cloud currently doesn't provide public image with CUDA
@pytest.mark.no_scp # SCP does not have V100 (16GB) GPUs. Run test_scp_huggingface instead.
@pytest.mark.no_nebius # Nebius does not have T4 gpus for now
@pytest.mark.parametrize('accelerator', [{'do': 'H100'}])
def test_huggingface(generic_cloud: str, accelerator: Dict[str, str]):
accelerator = accelerator.get(generic_cloud, 'T4')
Expand Down Expand Up @@ -966,7 +972,8 @@ def test_container_logs_two_simultaneous_jobs_kubernetes():
@pytest.mark.no_lambda_cloud # Lambda Cloud does not have V100 gpus
@pytest.mark.no_ibm # IBM cloud currently doesn't provide public image with CUDA
@pytest.mark.no_scp # SCP does not support num_nodes > 1 yet
@pytest.mark.no_dos # DO does not have V100 gpus
@pytest.mark.no_do # DO does not have V100 gpus
@pytest.mark.no_nebius # Nebius does not have V100 gpus
@pytest.mark.skip(
reason=
'The resnet_distributed_tf_app is flaky, due to it failing to detect GPUs.')
Expand Down Expand Up @@ -1121,6 +1128,7 @@ def test_autostop(generic_cloud: str):
# ---------- Testing Autodowning ----------
@pytest.mark.no_fluidstack # FluidStack does not support stopping in SkyPilot implementation
@pytest.mark.no_scp # SCP does not support num_nodes > 1 yet. Run test_scp_autodown instead.
@pytest.mark.no_nebius # Nebius does not support stopping in SkyPilot implementation
def test_autodown(generic_cloud: str):
name = smoke_tests_utils.get_cluster_name()
# Azure takes ~ 13m30s (810s) to autodown a VM, so here we use 900 to ensure
Expand Down Expand Up @@ -1242,6 +1250,7 @@ def test_cancel_azure():
@pytest.mark.no_ibm # IBM cloud currently doesn't provide public image with CUDA
@pytest.mark.no_paperspace # Paperspace has `gnome-shell` on nvidia-smi
@pytest.mark.no_scp # SCP does not support num_nodes > 1 yet
@pytest.mark.no_nebius # Nebius Cloud does not have V100 gpus
@pytest.mark.parametrize('accelerator', [{'do': 'H100'}])
def test_cancel_pytorch(generic_cloud: str, accelerator: Dict[str, str]):
accelerator = accelerator.get(generic_cloud, 'T4')
Expand Down Expand Up @@ -1299,6 +1308,7 @@ def test_cancel_ibm():
@pytest.mark.no_ibm # IBM Cloud does not support spot instances
@pytest.mark.no_scp # SCP does not support spot instances
@pytest.mark.no_kubernetes # Kubernetes does not have a notion of spot instances
@pytest.mark.no_nebius # Nebius does not support spot instances
@pytest.mark.no_do
def test_use_spot(generic_cloud: str):
"""Test use-spot and sky exec."""
Expand Down

0 comments on commit 9b7ecf3

Please sign in to comment.