Skip to content

Commit

Permalink
Infer cluster from FQDN
Browse files Browse the repository at this point in the history
* Added `gempyor.info._infer_cluster_from_fqdn` internal utility.
* `gempyor.info.get_cluster_info` now accepts `None` for `name`.
* `gempyor.batch._click_submit` accepts `None` for `--cluster` when
  using `--slurm`.
* Minor testing fixes.
  • Loading branch information
TimothyWillard committed Nov 14, 2024
1 parent 0490895 commit 7e7efd5
Show file tree
Hide file tree
Showing 6 changed files with 96 additions and 42 deletions.
2 changes: 0 additions & 2 deletions flepimop/gempyor_pkg/src/gempyor/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -786,8 +786,6 @@ def _submit_scenario_job(
)
elif batch_system == BatchSystem.SLURM:
# Cluster info
if kwargs["cluster"] is None:
raise ValueError("When submitting a batch job to slurm a cluster is required.")
cluster = get_cluster_info(kwargs["cluster"])
if verbosity is not None:
logger.info(
Expand Down
32 changes: 29 additions & 3 deletions flepimop/gempyor_pkg/src/gempyor/info.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@

import os
from pathlib import Path
from typing import TypeVar
import re
from socket import getfqdn
from typing import Pattern, TypeVar

from pydantic import BaseModel
import yaml
Expand All @@ -29,6 +31,12 @@ class Cluster(BaseModel):
T = TypeVar("T", bound=BaseModel)


_CLUSTER_FQDN_REGEXES: tuple[tuple[str, Pattern], ...] = (
("longleaf", re.compile(r"^longleaf\-login[0-9]+\.its\.unc\.edu$")),
("rockfish", re.compile(r"^login[0-9]+\.cm\.cluster$")),
)


def _get_info(
category: str, name: str, model: type[T], flepi_path: os.PathLike | None
) -> T:
Expand Down Expand Up @@ -61,17 +69,35 @@ def _get_info(
return model.model_validate(yaml.safe_load(info.read_text()))


def get_cluster_info(name: str, flepi_path: os.PathLike | None = None) -> Cluster:
def get_cluster_info(name: str | None, flepi_path: os.PathLike | None = None) -> Cluster:
"""
Get cluster specific info.
Args:
name: The name of the cluster to pull information for. Currently only 'longleaf'
and 'rockfish' are supported.
and 'rockfish' are supported or `None` to infer from the FQDN.
flepi_path: Either a path like determine the directory to look for the info
directory in or `None` to use the `FLEPI_PATH` environment variable.
Returns
An object containing the information about the `name` cluster.
"""
name = _infer_cluster_from_fqdn() if name is None else name
return _get_info("cluster", name, Cluster, flepi_path)


def _infer_cluster_from_fqdn() -> str:
"""
Infer the cluster name from the FQDN.
Returns:
The name of the cluster inferred from the FQDN.
Raises:
ValueError: If the value of `socket.getfqdn()` does not match an expected regex.
"""
fqdn = getfqdn()
for cluster, regex in _CLUSTER_FQDN_REGEXES:
if regex.match(fqdn):
return cluster
raise ValueError(f"The fqdn, '{fqdn}', does not match any of the expected clusters.")
32 changes: 4 additions & 28 deletions flepimop/gempyor_pkg/tests/batch/test__click_submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ def test_batch_system_aws_not_implemented_error(tmp_path: Path) -> None:
"1",
"--blocks",
"1",
"--flepi-path",
str(tmp_path.absolute()),
"--project-path",
str(tmp_path.absolute()),
str(config_file.absolute()),
],
)
Expand All @@ -35,31 +39,3 @@ def test_batch_system_aws_not_implemented_error(tmp_path: Path) -> None:
str(result.exception)
== "The `flepimop submit` CLI does not support batch submission to AWS yet."
)


def test_cluster_required_for_slurm_value_error(tmp_path: Path) -> None:
config_file = tmp_path / "config.yml"
with config_file.open(mode="w") as f:
yaml.dump({"name": "foobar", "inference": {"method": "emcee"}}, f)

runner = CliRunner()
result = runner.invoke(
_click_submit,
[
"--slurm",
"--simulations",
"1",
"--jobs",
"1",
"--blocks",
"1",
str(config_file.absolute()),
],
)

assert result.exit_code == 1
assert isinstance(result.exception, ValueError)
assert (
str(result.exception)
== "When submitting a batch job to slurm a cluster is required."
)
37 changes: 37 additions & 0 deletions flepimop/gempyor_pkg/tests/info/test__infer_cluster_from_fqdn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from unittest.mock import patch

import pytest

from gempyor.info import _infer_cluster_from_fqdn


@pytest.mark.parametrize("fqdn", ("new.cluster.com", "unsupported.cluster"))
def test_no_matching_fqdn_found_value_error(fqdn: str) -> None:
def socket_fqdn_wraps() -> str:
return fqdn

with patch("gempyor.info.getfqdn", wraps=socket_fqdn_wraps) as socket_fqdn_patch:
with pytest.raises(
ValueError,
match=f"^The fqdn, '{fqdn}', does not match any of the expected clusters.$",
):
_infer_cluster_from_fqdn()
socket_fqdn_patch.assert_called_once()


@pytest.mark.parametrize(
("fqdn", "expected"),
(
("login01.cm.cluster", "rockfish"),
("login3.cm.cluster", "rockfish"),
("longleaf-login1.its.unc.edu", "longleaf"),
("longleaf-login07.its.unc.edu", "longleaf"),
),
)
def test_exact_results_for_select_values(fqdn: str, expected: str) -> None:
def socket_fqdn_wraps() -> str:
return fqdn

with patch("gempyor.info.getfqdn", wraps=socket_fqdn_wraps) as socket_fqdn_patch:
assert _infer_cluster_from_fqdn() == expected
socket_fqdn_patch.assert_called_once()
20 changes: 19 additions & 1 deletion flepimop/gempyor_pkg/tests/info/test_get_cluster_info.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
from unittest.mock import patch

import pytest

Expand All @@ -10,6 +11,23 @@
os.getenv("FLEPI_PATH") is None,
reason="The $FLEPI_PATH environment variable is not set.",
)
def test_output_validation(name: str) -> None:
def test_exact_results_given_cluster_name(name: str) -> None:
cluster = get_cluster_info(name)
assert isinstance(cluster, Cluster)


@pytest.mark.parametrize("name", ("longleaf", "rockfish"))
@pytest.mark.skipif(
os.getenv("FLEPI_PATH") is None,
reason="The $FLEPI_PATH environment variable is not set.",
)
def test_exact_results_when_inferred_from_fqdn(name: str) -> None:
def infer_cluster_from_fqdn_wraps() -> str:
return name

with patch(
"gempyor.info._infer_cluster_from_fqdn", wraps=infer_cluster_from_fqdn_wraps
) as infer_cluster_from_fqdn_patch:
cluster = get_cluster_info(None)
assert isinstance(cluster, Cluster)
infer_cluster_from_fqdn_patch.assert_called_once()
15 changes: 7 additions & 8 deletions flepimop/gempyor_pkg/tests/templates/test_cluster_setup_bash.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
from typing import Any

import pytest
Expand All @@ -6,15 +7,13 @@
from gempyor.info import Cluster, Module, PathExport, get_cluster_info


@pytest.mark.parametrize(
"cluster",
(
None,
get_cluster_info("longleaf").model_dump(),
get_cluster_info("rockfish").model_dump(),
),
@pytest.mark.parametrize("cluster", (None, "longleaf", "rockfish"))
@pytest.mark.skipif(
os.getenv("FLEPI_PATH") is None,
reason="The $FLEPI_PATH environment variable is not set.",
)
def test_output_validation(cluster: dict[str, Any]) -> None:
def test_output_validation(cluster: str | None) -> None:
cluster = cluster if cluster is None else get_cluster_info(cluster).model_dump()
rendered_template = _render_template("cluster_setup.bash.j2", {"cluster": cluster})
lines = rendered_template.split("\n")
assert "module purge" in lines
Expand Down

0 comments on commit 7e7efd5

Please sign in to comment.