diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 42f9128..f492728 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -11,10 +11,22 @@ jobs: strategy: matrix: python-version: [ '3.8', '3.9', '3.10', '3.11', '3.12' ] - torch-version: [ '1.13', '2.0.1' ] + torch-version: [ '1.13', '2.0.1', '2.5.0'] exclude: - - torch-version: '1.13' - python-version: '3.12' + - python-version: '3.8' + torch-version: '2.5.0' + - python-version: '3.9' + torch-version: '2.5.0' + - python-version: '3.10' + torch-version: '2.5.0' + - python-version: '3.11' + torch-version: '1.13' + - python-version: '3.11' + torch-version: '2.5.0' + - python-version: '3.12' + torch-version: '1.13' + - python-version: '3.12' + torch-version: '2.0.1' steps: - uses: actions/checkout@v3 @@ -23,16 +35,35 @@ jobs: with: python-version: ${{ matrix.python-version }} - - name: Build the package + - name: Check Python version # https://github.com/python/cpython/issues/95299 + id: check-version + run: | + python_version=$(python --version | awk '{print $2}') + major=$(echo $python_version | cut -d'.' -f1) + minor=$(echo $python_version | cut -d'.' -f2) + if ([ "$major" -eq 3 ] && [ "$minor" -ge 12 ]); then + echo "setuptools_present=false" >> $GITHUB_ENV + else + echo "setuptools_present=true" >> $GITHUB_ENV + fi + + - name: Build the package (python >= 3.12) + if: env.setuptools_present == 'false' + run: | + python -m pip install build + python -m build + + - name: Build the package (python < 3.12) + if: env.setuptools_present == 'true' run: | python setup.py sdist - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install -e . python -m pip install -r tests/dev-requirements.txt python -m pip install torch==${{ matrix.torch-version }} + python -m pip install -e . cd tests export MODULE_PARENT=$(python -c "import $MODULE_NAME, os; print(os.path.dirname($MODULE_NAME.__path__[0]))") export MODULE_PARENT=${MODULE_PARENT%"/"} diff --git a/docs/cli/index.md b/docs/cli/index.md index c643515..da887aa 100644 --- a/docs/cli/index.md +++ b/docs/cli/index.md @@ -53,7 +53,7 @@ The command shown above will run SLURM job with 4 CPUs and 100G of RAM. ### Predefined run configs You can predefine run configs to avoid reentering the same flags. -Create `~/.config/thunder/backends.yml` (you can run `thunder show` in your terminal, +Create `~/.config/thunder/backends.yml` (you can run `thunder backend list` in your terminal, required path will be at the title of the table) in you home directory. Now you can specify config name and its parameters: ```yaml diff --git a/docs/examples/mnist.md b/docs/examples/mnist.md index 303bd30..28901d1 100644 --- a/docs/examples/mnist.md +++ b/docs/examples/mnist.md @@ -31,10 +31,17 @@ module = ThunderModule( architecture, nn.CrossEntropyLoss(), optimizer=torch.optim.Adam(architecture.parameters()) ) +# Preparing metrics +# 'y' and 'x' are single label and +# model prediction for a single image, +# hence the 'np.argmax(x)' for extracting +# the predicted label. +group_accuracy = {lambda y, x: (y, np.argmax(x)): accuracy_score} + # Initialize a trainer trainer = Trainer( callbacks=[ModelCheckpoint(save_last=True), - MetricMonitor(group_metrics={lambda y, x: (np.argmax(y), x): accuracy_score})], + MetricMonitor(group_metrics=group_accuracy)], accelerator="auto", devices=1, max_epochs=100, diff --git a/requirements.txt b/requirements.txt index 59f9dce..eac5fb5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ lightning>=2.0.0,<3.0.0 lazycon>=0.6.3,<1.0.0 typer>=0.9.0,<1.0.0 -pydantic<2.0.0 +pydantic<3.0.0 click torch toolz diff --git a/tests/test_cli.py b/tests/test_cli.py index 9d6a7f0..5d0ceb7 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -63,11 +63,11 @@ def test_build(temp_dir, mock_backend): with cleanup(experiment): result = invoke('build', config, experiment, '-u', 'c=3') - assert result.exit_code != 0 + assert result.exit_code != 0, result.output assert 'are missing from the config' in str(result.exception) result = invoke('build', config, experiment, '-u', 'a=10') - assert result.exit_code == 0 + assert result.exit_code == 0, result.output assert Config.load(experiment / 'experiment.config').a == 10 with cleanup(experiment): @@ -111,7 +111,7 @@ def test_build_overwrite(temp_dir): config.write_text('b = 2') result = invoke('build', config, experiment, "--overwrite") - assert result.exit_code == 0 + assert result.exit_code == 0, result.output assert not hasattr(read_config(experiment / "experiment.config"), "a") assert read_config(experiment / "experiment.config").b == 2 @@ -187,6 +187,11 @@ def test_backend_add(temp_dir, mock_backend): local = load_backend_configs() assert "new_config" in local and "new_config_2" in local + invoke("backend", "add", "new_config_3", "backend=cli", "n_workers=8") + local = load_backend_configs() + assert "new_config" in local and "new_config_2" in local + assert "new_config_3" in local + def test_backend_list(temp_dir, mock_backend): # language=yaml @@ -208,10 +213,11 @@ def test_backend_list(temp_dir, mock_backend): def test_backend_set(temp_dir, mock_backend): - assert invoke("backend", "add", "config", "backend=slurm", "ram=100G", "--force").exit_code == 0 + result = invoke("backend", "add", "config", "backend=slurm", "ram=100G", "--force") + assert result.exit_code == 0, result.output result = invoke("backend", "set", "config") - assert result.exit_code == 0 + assert result.exit_code == 0, result.output local = load_backend_configs() assert local[local["meta"].default].config.ram == "100G" diff --git a/thunder/backend/interface.py b/thunder/backend/interface.py index 1fac2a9..f0f35ab 100644 --- a/thunder/backend/interface.py +++ b/thunder/backend/interface.py @@ -1,14 +1,14 @@ from pathlib import Path from typing import Dict, Optional, Sequence, Type -from pydantic import BaseModel, Extra, validator +from pydantic import BaseModel from ..layout import Node +from ..pydantic_compat import PYDANTIC_MAJOR, NoExtra, field_validator, model_validate -class BackendConfig(BaseModel): - class Config: - extra = Extra.ignore +class BackendConfig(NoExtra): + """Backend Parameters""" class Backend: @@ -19,24 +19,33 @@ def run(config: BackendConfig, experiment: Path, nodes: Optional[Sequence[Node]] """Start running the given `nodes` of an experiment located at the given path""" -class BackendEntryConfig(BaseModel): +class BackendEntryConfig(NoExtra): backend: str config: BackendConfig - @validator('config', pre=True) + @field_validator("config", mode="before") def _val_config(cls, v, values): - val = backends[values['backend']] - return val.Config.parse_obj(v) + return parse_backend_config(v, values) @property def backend_cls(self): return backends[self.backend] - class Config: - extra = Extra.ignore + +if PYDANTIC_MAJOR == 2: + def parse_backend_config(v, values): + val = backends[values.data["backend"]] + return model_validate(val.Config, v) +else: + def parse_backend_config(v, values): + val = backends[values["backend"]] + return model_validate(val.Config, v) class MetaEntry(BaseModel): + """ + Default backend set by `thunder backend set` + """ default: str diff --git a/thunder/backend/slurm.py b/thunder/backend/slurm.py index 83326f8..a3bd32e 100644 --- a/thunder/backend/slurm.py +++ b/thunder/backend/slurm.py @@ -8,12 +8,12 @@ from typing import Optional, Sequence from deli import save -from pydantic import validator from pytimeparse.timeparse import timeparse from typer import Option from typing_extensions import Annotated from ..layout import Node +from ..pydantic_compat import field_validator from .interface import Backend, BackendConfig, backends @@ -26,45 +26,45 @@ class Slurm(Backend): class Config(BackendConfig): - ram: Annotated[str, Option( - ..., '-r', '--ram', '--mem', + ram: Annotated[Optional[str], Option( + None, '-r', '--ram', '--mem', help='The amount of RAM required per node. Default units are megabytes. ' 'Different units can be specified using the suffix [K|M|G|T].' )] = None - cpu: Annotated[int, Option( - ..., '-c', '--cpu', '--cpus-per-task', show_default=False, + cpu: Annotated[Optional[int], Option( + None, ..., '-c', '--cpu', '--cpus-per-task', show_default=False, help='Number of CPU cores to allocate. Default to 1' )] = None - gpu: Annotated[int, Option( - ..., '-g', '--gpu', '--gpus-per-node', + gpu: Annotated[Optional[int], Option( + None, '-g', '--gpu', '--gpus-per-node', help='Number of GPUs to allocate' )] = None - partition: Annotated[str, Option( - ..., '-p', '--partition', + partition: Annotated[Optional[str], Option( + None, '-p', '--partition', help='Request a specific partition for the resource allocation' )] = None - nodelist: Annotated[str, Option( - ..., + nodelist: Annotated[Optional[str], Option( + None, help='Request a specific list of hosts. The list may be specified as a comma-separated ' - 'list of hosts, a range of hosts (host[1-5,7,...] for example).' + 'list of hosts, a range of hosts (host[1-5,7,None] for example).' )] = None - time: Annotated[str, Option( - ..., '-t', '--time', + time: Annotated[Optional[str], Option( + None, '-t', '--time', help='Set a limit on the total run time of the job allocation. When the time limit is reached, ' 'each task in each job step is sent SIGTERM followed by SIGKILL.' )] = None - limit: Annotated[int, Option( - ..., + limit: Annotated[Optional[int], Option( + None, help='Limit the number of jobs that are simultaneously running during the experiment', )] = None - @validator('time') + @field_validator("time") def val_time(cls, v): if v is None: return return parse_duration(v) - @validator('limit') + @field_validator("limit") def val_limit(cls, v): assert v is None or v > 0, 'The jobs limit, if specified, must be positive' return v diff --git a/thunder/cli/backend.py b/thunder/cli/backend.py index d396f65..593eec3 100644 --- a/thunder/cli/backend.py +++ b/thunder/cli/backend.py @@ -13,6 +13,7 @@ from typer.models import ParamMeta from ..backend import BackendEntryConfig, MetaEntry, backends +from ..pydantic_compat import model_validate, resolve_pydantic_major from .app import app @@ -92,23 +93,39 @@ def populate(backend_name): show_default=False, ), )] - for field in entry.backend_cls.Config.__fields__.values(): - annotation = field.outer_type_ - # TODO: https://stackoverflow.com/a/68337036 - if not hasattr(annotation, '__metadata__') or not hasattr(annotation, '__origin__'): - raise ValueError('Please use the `Annotated` syntax to annotate you backend config') - - # TODO - default, = annotation.__metadata__ - default = copy.deepcopy(default) - default.default = getattr(entry.config, field.name) - default.help = f'[{backend_name} backend] {default.help}' - backend_params.append(ParamMeta( - name=field.name, default=default, annotation=annotation.__origin__, - )) + backend_params.extend(_collect_backend_params(entry, backend_name)) return backend_params +if resolve_pydantic_major() >= 2: + def _collect_backend_params(entry, backend_name): + """ + Config Annotation depends on pydantic version. + """ + for field_name, field in entry.backend_cls.Config.model_fields.items(): + field_clone = copy.deepcopy(field) + field_clone.default = getattr(entry.config, field_name) + yield ParamMeta( + name=field_name, default=field_clone.default, annotation=field.annotation, + ) +else: + def _collect_backend_params(entry, backend_name): + for field in entry.backend_cls.Config.__fields__.values(): + annotation = field.outer_type_ + # TODO: https://stackoverflow.com/a/68337036 + if not hasattr(annotation, '__metadata__') or not hasattr(annotation, '__origin__'): + raise ValueError('Please use the `Annotated` syntax to annotate you backend config') + + # TODO + default, = annotation.__metadata__ + default = copy.deepcopy(default) + default.default = getattr(entry.config, field.name) + default.help = f'[{backend_name} backend] {default.help}' + yield ParamMeta( + name=field.name, default=default, annotation=annotation.__origin__, + ) + + def collect_backends() -> ChainMap: """ Collects backend for each config. @@ -144,7 +161,7 @@ def collect_configs() -> Tuple[ChainMap, Union[MetaEntry, None]]: def load_backend_configs() -> Dict[str, Union[BackendEntryConfig, MetaEntry]]: path = BACKENDS_CONFIG_PATH if not path.exists(): - # print(path, flush=True) + # TODO: return Option[Dict] return {} with path.open('r') as file: @@ -153,5 +170,5 @@ def load_backend_configs() -> Dict[str, Union[BackendEntryConfig, MetaEntry]]: return {} # FIXME assert isinstance(local, dict), type(local) - return {k: BackendEntryConfig.parse_obj(v) - if k != "meta" else MetaEntry.parse_obj(v) for k, v in local.items()} + return {k: model_validate(BackendEntryConfig, v) + if k != "meta" else model_validate(MetaEntry, v) for k, v in local.items()} diff --git a/thunder/cli/backend_cli.py b/thunder/cli/backend_cli.py index 8d05cae..c5ec526 100644 --- a/thunder/cli/backend_cli.py +++ b/thunder/cli/backend_cli.py @@ -1,4 +1,4 @@ -from typing import List +from typing import Dict, List, Union import yaml from rich.console import Console @@ -7,6 +7,7 @@ from typing_extensions import Annotated from ..backend import MetaEntry +from ..pydantic_compat import model_dump, model_validate from .backend import BACKENDS_CONFIG_PATH, BackendEntryConfig, load_backend_configs @@ -21,6 +22,9 @@ ForceAddArg = Annotated[bool, Option( "--force", "-f", help="Forces overwriting of the same backend in .yml file." )] +CreateYMLArg = Annotated[bool, Option( + "--create", "-c", help="Creates .yml file with stored backends." +)] console = Console() backend_app = Typer(name="backend", help="Commands for managing your backends.") @@ -35,13 +39,14 @@ def _set(name: BackendNameArg): f"available configs: {sorted(local)}`") raise Abort(1) - local["meta"] = MetaEntry.parse_obj({"default": name}) + local["meta"] = model_validate(MetaEntry, {"default": name}) with BACKENDS_CONFIG_PATH.open("w") as stream: - yaml.safe_dump({k: v.dict() for k, v in local.items()}, stream) + yaml.safe_dump({k: _dump_backend_entry(v) for k, v in local.items()}, stream) @backend_app.command() -def add(name: BackendNameArg, params: BackendParamsArg, force: ForceAddArg = False): +def add(name: BackendNameArg, params: BackendParamsArg, force: ForceAddArg = False, + create_yml: CreateYMLArg = False): """ Add run config to the list of available configs. """ local = load_backend_configs() if name in local and not force: @@ -52,9 +57,19 @@ def add(name: BackendNameArg, params: BackendParamsArg, force: ForceAddArg = Fal kwargs = dict(map(lambda p: p.split("="), params)) config = {"backend": kwargs.pop("backend", "cli"), "config": kwargs} - local.update({name: BackendEntryConfig.parse_obj(config)}) + local.update({name: model_validate(BackendEntryConfig, config)}) + + if not BACKENDS_CONFIG_PATH.parent.exists() and not create_yml: + path = str(BACKENDS_CONFIG_PATH) + console.print(f"Backends storage {path} does not exist, " + f"you can create it by adding --create to the command.") + raise Abort(1) + + if create_yml: + BACKENDS_CONFIG_PATH.parent.mkdir(parents=True, exist_ok=True) + with BACKENDS_CONFIG_PATH.open("w") as stream: - yaml.safe_dump({k: v.dict() for k, v in local.items()}, stream) + yaml.safe_dump({k: _dump_backend_entry(v) for k, v in local.items()}, stream) @backend_app.command() @@ -67,7 +82,7 @@ def remove(name: BackendNameArg): local.pop(name) with BACKENDS_CONFIG_PATH.open("w") as stream: - yaml.safe_dump({k: v.dict() for k, v in local.items()}, stream) + yaml.safe_dump({k: _dump_backend_entry(v) for k, v in local.items()}, stream) @backend_app.command(name="list") @@ -78,15 +93,25 @@ def _list(names: BackendNamesArg = None): table = Table("Name", "Backend", "Parameters", title=f"Configs at {str(BACKENDS_CONFIG_PATH.resolve())}") + if names is None: + names = local.copy() + extra = set(names) - set(local) if extra: console.print("These names are not among your configs:", extra) for name in sorted(set(names if names else local) - extra.union({"meta"})): - entry = local[name].dict() + entry = _dump_backend_entry(local[name]) table.add_row(*map(str, [name, entry.get("backend", None), entry.get("config", None)])) console.print(table) if "meta" in local: console.print(f"[italic green]Default is [/italic green]{local['meta'].default}") + + +def _dump_backend_entry(backend: BackendEntryConfig) -> Dict[str, Union[str, Dict]]: + entry = model_dump(backend) + if hasattr(backend, "config"): + entry["config"] = model_dump(backend.config) + return entry diff --git a/thunder/cli/main.py b/thunder/cli/main.py index 34ef17f..2651bb2 100644 --- a/thunder/cli/main.py +++ b/thunder/cli/main.py @@ -1,3 +1,4 @@ +import functools import shutil from io import StringIO from pathlib import Path @@ -13,6 +14,7 @@ from ..config import log_hyperparam from ..layout import Layout, Node, Single +from ..pydantic_compat import model_validate from ..torch.utils import last_checkpoint from ..utils import chdir from .app import app @@ -191,7 +193,8 @@ def load_nodes(experiment: Path): if not nodes.exists(): return {} # TODO: check uniqueness - return {x.name: x for x in map(Node.parse_obj, load(nodes))} + parse_obj = functools.partial(model_validate, Node) + return {x.name: x for x in map(parse_obj, load(nodes))} def get_nodes(experiment: Path, names: Optional[Sequence[str]]): diff --git a/thunder/layout/interface.py b/thunder/layout/interface.py index 12cb231..6b4a65c 100644 --- a/thunder/layout/interface.py +++ b/thunder/layout/interface.py @@ -5,18 +5,16 @@ from typing import Any, Dict, Iterable, Optional, Tuple from lazycon import Config -from pydantic import BaseModel, Extra +from ..pydantic_compat import NoExtra -class Node(BaseModel): + +class Node(NoExtra): name: str # TODO: no layouts with parents so far # parents: Sequence[Node] = () - class Config: - extra = Extra.forbid - class Layout(ABC): @abstractmethod diff --git a/thunder/policy.py b/thunder/policy.py index c7464dc..6af6bc3 100644 --- a/thunder/policy.py +++ b/thunder/policy.py @@ -7,7 +7,12 @@ from more_itertools import zip_equal from toolz import juxt from torch.optim import Optimizer -from torch.optim.lr_scheduler import _LRScheduler as LRScheduler + + +try: + from torch.optim.lr_scheduler import LRScheduler +except ImportError: + from torch.optim.lr_scheduler import _LRScheduler as LRScheduler class Policy(LRScheduler, metaclass=ABCMeta): diff --git a/thunder/pydantic_compat.py b/thunder/pydantic_compat.py new file mode 100644 index 0000000..47e0108 --- /dev/null +++ b/thunder/pydantic_compat.py @@ -0,0 +1,60 @@ +def resolve_pydantic_major() -> int: + import pydantic + return int(pydantic.__version__.split(".")[0]) + + +PYDANTIC_MAJOR = resolve_pydantic_major() + +if PYDANTIC_MAJOR == 2: + from pydantic import BaseModel, field_validator as _field_validator + + def field_validator(*args, always=None, **kwargs): + # we just ignore `always` + return _field_validator(*args, **kwargs) + + def model_validate(cls, data): + return cls.model_validate(data) + + def model_dump(obj, **kwargs): + return obj.model_dump(**kwargs) + + def model_copy(cls, **kwargs): + return cls.model_copy(**kwargs) + + class NoExtra(BaseModel): + model_config = { + 'extra': 'forbid' + } + +elif PYDANTIC_MAJOR == 1: + from pydantic import BaseModel, root_validator, validator as _field_validator + + # we don't use this with pydantic==1 anyway + core_schema = None + + def model_validator(mode: str): + assert mode == 'before' + return root_validator(pre=True) + + def field_validator(*args, mode: str = 'after', **kwargs): + # we just ignore `always` + assert mode in ('before', 'after') + if mode == 'before': + kwargs['pre'] = True + return _field_validator(*args, **kwargs) + + def model_validate(cls, data): + return cls.parse_obj(data) + + def model_dump(obj, **kwargs): + return obj.dict(**kwargs) + + def model_copy(cls, **kwargs): + return cls.copy(**kwargs) + + class NoExtra(BaseModel): + class Config: + extra = 'forbid' +else: + import pydantic + raise RuntimeError(f"Expected pydantic<3.0.0, got {pydantic.__version__}")