From baa67af262894b6256a2c24a01e17cddd98f1473 Mon Sep 17 00:00:00 2001 From: Daniel Saelid Date: Fri, 22 Mar 2024 16:46:12 -0700 Subject: [PATCH 01/11] added base config class --- cyto_dl/api/config/__init__.py | 1 + cyto_dl/api/config/cyto_dl_config.py | 45 +++++++++++++++++++ .../api/config/segmentation_plugin_config.py | 11 +++++ cyto_dl/api/model.py | 3 ++ 4 files changed, 60 insertions(+) create mode 100644 cyto_dl/api/config/__init__.py create mode 100644 cyto_dl/api/config/cyto_dl_config.py create mode 100644 cyto_dl/api/config/segmentation_plugin_config.py diff --git a/cyto_dl/api/config/__init__.py b/cyto_dl/api/config/__init__.py new file mode 100644 index 000000000..d2c4c03ef --- /dev/null +++ b/cyto_dl/api/config/__init__.py @@ -0,0 +1 @@ +from .cyto_dl_config import CytoDLConfig \ No newline at end of file diff --git a/cyto_dl/api/config/cyto_dl_config.py b/cyto_dl/api/config/cyto_dl_config.py new file mode 100644 index 000000000..a54969756 --- /dev/null +++ b/cyto_dl/api/config/cyto_dl_config.py @@ -0,0 +1,45 @@ +from abc import ABC, abstractmethod +from pathlib import Path +from typing import Optional, Dict, Any +from cyto_dl.api.data import * + + +class CytoDLConfig(ABC): + """ + A CytoDLConfig represents the configuration for a CytoDLModel. Can be + passed to CytoDLModel on initialization to create a fully-functional model. + """ + # Note: internally, Config classes will just be used to store overrides and + # possibly a config filepath, while CytoDLModel will handle actually generating + # the full config + @abstractmethod + def __init__(self, config_filepath: Optional[Path]=None, train: bool=True): + """ + :param config_filepath: path to a .yaml config file that will be used as the basis + for this CytoDLConfig. If None, a default config will be used instead. + :param train: indicates whether this config will be used for training or prediction + """ + self._config_filepath: str = config_filepath + self._overrides: Dict[str, Any] = {} + self._overrides["train"] = train + self._overrides["test"] = train + + @abstractmethod + def get_experiment_type(self) -> ExperimentType: + """ + Return experiment type for this config (e.g. segmentation_plugin, gan, etc) + """ + pass + + def get_config_filepath(self) -> Optional[Path]: + return self._config_filepath + + def set_hardware_type(self, hardware_type: HardwareType) -> None: + self._overrides["trainer.accelerator"] = hardware_type.value + + def set_experiment_name(self, name: str) -> None: + self._overrides["experiment_name"] = name + + def set_ckpt_path(self, ckpt_path: Path) -> None: + self._overrides["ckpt_path"] = str(ckpt_path.resolve()) + diff --git a/cyto_dl/api/config/segmentation_plugin_config.py b/cyto_dl/api/config/segmentation_plugin_config.py new file mode 100644 index 000000000..8799e4838 --- /dev/null +++ b/cyto_dl/api/config/segmentation_plugin_config.py @@ -0,0 +1,11 @@ +from typing import Optional +from pathlib import Path +from cyto_dl.api.config import CytoDLConfig +from cyto_dl.api.data import * + +class SegmentationPluginConfig(CytoDLConfig): + def __init__(self, config_filepath: Optional[Path]=None): + super().__init__(config_filepath) + + def get_experiment_type(self) -> ExperimentType: + return ExperimentType.SEGMENTATION_PLUGIN \ No newline at end of file diff --git a/cyto_dl/api/model.py b/cyto_dl/api/model.py index d9010a7f8..4be1eb79c 100644 --- a/cyto_dl/api/model.py +++ b/cyto_dl/api/model.py @@ -14,6 +14,9 @@ class CytoDLModel: + # TODO: add optional CytoDLConfig param to init--if client passes a + # CytoDLConfig subtype, config will be initialized in constructor and + # calls to train/predict can be run immediately def __init__(self): self.cfg = None self._training = False From 45b1a5a877407868e1a388a86a683e08cf5c256d Mon Sep 17 00:00:00 2001 From: Daniel Saelid Date: Mon, 25 Mar 2024 16:58:47 -0700 Subject: [PATCH 02/11] added lots of config methods, WIP --- cyto_dl/api/config/cyto_dl_config.py | 102 +++++++++++++++--- .../api/config/segmentation_plugin_config.py | 63 +++++++++-- 2 files changed, 144 insertions(+), 21 deletions(-) diff --git a/cyto_dl/api/config/cyto_dl_config.py b/cyto_dl/api/config/cyto_dl_config.py index a54969756..17517b9d2 100644 --- a/cyto_dl/api/config/cyto_dl_config.py +++ b/cyto_dl/api/config/cyto_dl_config.py @@ -1,6 +1,11 @@ from abc import ABC, abstractmethod from pathlib import Path -from typing import Optional, Dict, Any +from typing import Optional, List, Any +from hydra import compose, initialize_config_dir +from hydra.core.global_hydra import GlobalHydra +from omegaconf import OmegaConf, open_dict, DictConfig +import pyrootutils +from copy import deepcopy from cyto_dl.api.data import * @@ -9,9 +14,6 @@ class CytoDLConfig(ABC): A CytoDLConfig represents the configuration for a CytoDLModel. Can be passed to CytoDLModel on initialization to create a fully-functional model. """ - # Note: internally, Config classes will just be used to store overrides and - # possibly a config filepath, while CytoDLModel will handle actually generating - # the full config @abstractmethod def __init__(self, config_filepath: Optional[Path]=None, train: bool=True): """ @@ -20,26 +22,96 @@ def __init__(self, config_filepath: Optional[Path]=None, train: bool=True): :param train: indicates whether this config will be used for training or prediction """ self._config_filepath: str = config_filepath - self._overrides: Dict[str, Any] = {} - self._overrides["train"] = train - self._overrides["test"] = train + self._train: bool = train + self._cfg: DictConfig = OmegaConf.load(config_filepath) if config_filepath else self._generate_default_config() + OmegaConf.update(self._cfg, "train", train) + OmegaConf.update(self._cfg, "test", train) + # afaik, task_name isn't used outside of template_utils.py - do we need to support this? + OmegaConf.update(self._cfg, "task_name", "train" if train else "predict") + + # we currently have an override for ['mode'] in ml-seg, but I can't find top-level 'mode' in the configs, + # do we need to support this? + def _generate_default_config(self) -> DictConfig: + cfg_dir: Path = pyrootutils.find_root(search_from=__file__, indicator=("pyproject.toml", "README.md")) / "configs" + GlobalHydra.instance().clear() + with initialize_config_dir(version_base="1.2", config_dir=str(cfg_dir)): + cfg: DictConfig = compose( + config_name="train.yaml" if self._train else "eval.yaml", + return_hydra_config=True, + overrides=[f"experiment=im2im/{self._get_experiment_type().name}"], + ) + with open_dict(cfg): + del cfg["hydra"] + cfg.extras.enforce_tags = False + cfg.extras.print_config = False + return cfg + @abstractmethod - def get_experiment_type(self) -> ExperimentType: + def _get_experiment_type(self) -> ExperimentType: """ Return experiment type for this config (e.g. segmentation_plugin, gan, etc) """ pass - - def get_config_filepath(self) -> Optional[Path]: - return self._config_filepath - def set_hardware_type(self, hardware_type: HardwareType) -> None: - self._overrides["trainer.accelerator"] = hardware_type.value + def _key_exists(self, k: str) -> bool: + keys: List[str] = k.split(".") + curr_dict: DictConfig = self._cfg + while keys: + key: str = keys.pop(0) + if not key in curr_dict: + return False + curr_dict = curr_dict[key] + return True + + def _set_cfg(self, k: str, v: Any) -> None: + if not self._key_exists(k): + raise KeyError(f"{k} not found in config dict") + OmegaConf.update(self._cfg, k, v) + + def _get_cfg(self, k: str) -> Any: + if not self._key_exists(k): + raise KeyError(f"{k} not found in config dict") + return OmegaConf.select(self._cfg, k) def set_experiment_name(self, name: str) -> None: - self._overrides["experiment_name"] = name + self._set_cfg("experiment_name", name) + def get_experiment_name(self) -> str: + return self._get_cfg("experiment_name") + def set_ckpt_path(self, ckpt_path: Path) -> None: - self._overrides["ckpt_path"] = str(ckpt_path.resolve()) + self._set_cfg("ckpt_path", str(ckpt_path.resolve())) + + def get_ckpt_path(self) -> Path: + return Path(self._get_cfg("ckpt_path")) + + def set_hardware_type(self, hardware_type: HardwareType) -> None: + self._set_cfg("trainer.accelerator", hardware_type.value) + + def get_hardware_type(self) -> HardwareType: + return HardwareType(self._get_cfg("trainer.accelerator")) + + def set_max_epochs(self, max_epochs: int) -> None: + self._set_cfg("trainer.max_epochs", max_epochs) + + def get_max_epochs(self) -> int: + return self._get_cfg("trainer.max_epochs") + + def set_output_dir(self, output_dir: Path) -> None: + self._set_cfg("paths.output_dir", str(output_dir)) + + def get_output_dir(self) -> Path: + return Path(self._get_cfg("paths.output_dir")) + + # I can't find where this is actually used in cyto_dl, do we need to support this? + def set_work_dir(self, work_dir: Path) -> None: + self._set_cfg("paths.work_dir", str(work_dir)) + + def get_work_dir(self) -> Path: + return Path(self._get_cfg("paths.work_dir")) + + def get_config(self) -> DictConfig: + return deepcopy(self._cfg) + diff --git a/cyto_dl/api/config/segmentation_plugin_config.py b/cyto_dl/api/config/segmentation_plugin_config.py index 8799e4838..6186386ca 100644 --- a/cyto_dl/api/config/segmentation_plugin_config.py +++ b/cyto_dl/api/config/segmentation_plugin_config.py @@ -1,11 +1,62 @@ -from typing import Optional +from typing import Optional, Tuple, List from pathlib import Path from cyto_dl.api.config import CytoDLConfig from cyto_dl.api.data import * - +# TODO: get rid of using Path or any non-primitive data type since there are format strings in config sometimes... class SegmentationPluginConfig(CytoDLConfig): - def __init__(self, config_filepath: Optional[Path]=None): - super().__init__(config_filepath) + def __init__(self, config_filepath: Optional[Path] = None, train: bool = True): + super().__init__(config_filepath, train) - def get_experiment_type(self) -> ExperimentType: - return ExperimentType.SEGMENTATION_PLUGIN \ No newline at end of file + def _get_experiment_type(self) -> ExperimentType: + return ExperimentType.SEGMENTATION_PLUGIN + + # a lot of these keys are duplicated across the im2im experiment types (but not present in top-level + # train.yaml or eval.yaml) - should we move these into the top-level configs and move these setters and + # getters accordingly? + def set_spatial_dims(self, spatial_dims: int) -> None: + self._set_cfg("spatial_dims", spatial_dims) + + def get_spatial_dims(self) -> int: + return self._get_cfg("spatial_dims") + + def set_input_channel(self, input_channel: int) -> None: + self._set_cfg("input_channel", input_channel) + + def get_input_channel(self) -> int: + return self._get_cfg("input_channel") + + def set_raw_image_channels(self, channels: int) -> None: + self._set_cfg("raw_im_channels", channels) + + def get_raw_image_channels(self) -> int: + return self._get_cfg("raw_im_channels") + + def set_data_path(self, data_path: Path) -> None: + self._set_cfg("data.path", str(data_path)) + + def get_data_path(self) -> Path: + return Path(self._get_cfg("data.path")) + + # TODO: is there a better way to deal with column names + split columns? + def set_manifest_column_names(self, source: str, target1: str, target2: str, merge_mask: str, exclude_mask: str, base_image: str) -> None: + self._set_cfg("source_col", source) + self._set_cfg("target_col1", target1) + self._set_cfg("target_col2", target2) + self._set_cfg("merge_mask_col", merge_mask) + self._set_cfg("exclude_mask_col", exclude_mask) + self._set_cfg("base_image_col", base_image) + + def get_manifest_column_names(self) -> Tuple[str, str, str, str, str, str]: + return self._get_cfg("source_col"), self._get_cfg("target_col1"), self._get_cfg("target_col2"), self._get_cfg("merge_mask_col"), self._get_cfg("exclude_mask_col"), self._get_cfg("base_image_col") + + def set_split_column(self, split_column: str) -> None: + self._set_cfg("data.split_column", split_column) + # need to also add it to the list of columns + existing_cols: List[str] = self._get_cfg("data.columns") + if len(existing_cols) > len(self.get_manifest_column_names()): + existing_cols[-1] = split_column + else: + existing_cols.append(split_column) + + def get_split_column(self) -> str: + self._get_cfg("data.split_column") From d645db92c8533899b1e5a1a2e656253588329610 Mon Sep 17 00:00:00 2001 From: Daniel Saelid Date: Wed, 27 Mar 2024 14:54:42 -0700 Subject: [PATCH 03/11] move setters/getters into subclass --- cyto_dl/api/config/cyto_dl_config.py | 34 ++--------------- .../api/config/segmentation_plugin_config.py | 37 +++++++++++++++---- 2 files changed, 34 insertions(+), 37 deletions(-) diff --git a/cyto_dl/api/config/cyto_dl_config.py b/cyto_dl/api/config/cyto_dl_config.py index 17517b9d2..0dfc7847f 100644 --- a/cyto_dl/api/config/cyto_dl_config.py +++ b/cyto_dl/api/config/cyto_dl_config.py @@ -14,7 +14,6 @@ class CytoDLConfig(ABC): A CytoDLConfig represents the configuration for a CytoDLModel. Can be passed to CytoDLModel on initialization to create a fully-functional model. """ - @abstractmethod def __init__(self, config_filepath: Optional[Path]=None, train: bool=True): """ :param config_filepath: path to a .yaml config file that will be used as the basis @@ -24,10 +23,10 @@ def __init__(self, config_filepath: Optional[Path]=None, train: bool=True): self._config_filepath: str = config_filepath self._train: bool = train self._cfg: DictConfig = OmegaConf.load(config_filepath) if config_filepath else self._generate_default_config() - OmegaConf.update(self._cfg, "train", train) - OmegaConf.update(self._cfg, "test", train) + self._set_cfg("train", train) + self._set_cfg("test", train) # afaik, task_name isn't used outside of template_utils.py - do we need to support this? - OmegaConf.update(self._cfg, "task_name", "train" if train else "predict") + self._set_cfg("task_name", "train" if train else "predict") # we currently have an override for ['mode'] in ml-seg, but I can't find top-level 'mode' in the configs, # do we need to support this? @@ -37,7 +36,7 @@ def _generate_default_config(self) -> DictConfig: GlobalHydra.instance().clear() with initialize_config_dir(version_base="1.2", config_dir=str(cfg_dir)): cfg: DictConfig = compose( - config_name="train.yaml" if self._train else "eval.yaml", + config_name="train.yaml", # only using train.yaml after conversation w/ Benji return_hydra_config=True, overrides=[f"experiment=im2im/{self._get_experiment_type().name}"], ) @@ -85,31 +84,6 @@ def set_ckpt_path(self, ckpt_path: Path) -> None: def get_ckpt_path(self) -> Path: return Path(self._get_cfg("ckpt_path")) - - def set_hardware_type(self, hardware_type: HardwareType) -> None: - self._set_cfg("trainer.accelerator", hardware_type.value) - - def get_hardware_type(self) -> HardwareType: - return HardwareType(self._get_cfg("trainer.accelerator")) - - def set_max_epochs(self, max_epochs: int) -> None: - self._set_cfg("trainer.max_epochs", max_epochs) - - def get_max_epochs(self) -> int: - return self._get_cfg("trainer.max_epochs") - - def set_output_dir(self, output_dir: Path) -> None: - self._set_cfg("paths.output_dir", str(output_dir)) - - def get_output_dir(self) -> Path: - return Path(self._get_cfg("paths.output_dir")) - - # I can't find where this is actually used in cyto_dl, do we need to support this? - def set_work_dir(self, work_dir: Path) -> None: - self._set_cfg("paths.work_dir", str(work_dir)) - - def get_work_dir(self) -> Path: - return Path(self._get_cfg("paths.work_dir")) def get_config(self) -> DictConfig: return deepcopy(self._cfg) diff --git a/cyto_dl/api/config/segmentation_plugin_config.py b/cyto_dl/api/config/segmentation_plugin_config.py index 6186386ca..d4137b9c6 100644 --- a/cyto_dl/api/config/segmentation_plugin_config.py +++ b/cyto_dl/api/config/segmentation_plugin_config.py @@ -1,11 +1,9 @@ -from typing import Optional, Tuple, List +from typing import Tuple, List, Union from pathlib import Path from cyto_dl.api.config import CytoDLConfig from cyto_dl.api.data import * -# TODO: get rid of using Path or any non-primitive data type since there are format strings in config sometimes... + class SegmentationPluginConfig(CytoDLConfig): - def __init__(self, config_filepath: Optional[Path] = None, train: bool = True): - super().__init__(config_filepath, train) def _get_experiment_type(self) -> ExperimentType: return ExperimentType.SEGMENTATION_PLUGIN @@ -31,11 +29,11 @@ def set_raw_image_channels(self, channels: int) -> None: def get_raw_image_channels(self) -> int: return self._get_cfg("raw_im_channels") - def set_data_path(self, data_path: Path) -> None: + def set_data_path(self, data_path: Union[str, Path]) -> None: self._set_cfg("data.path", str(data_path)) - def get_data_path(self) -> Path: - return Path(self._get_cfg("data.path")) + def get_data_path(self) -> str: + return self._get_cfg("data.path") # TODO: is there a better way to deal with column names + split columns? def set_manifest_column_names(self, source: str, target1: str, target2: str, merge_mask: str, exclude_mask: str, base_image: str) -> None: @@ -60,3 +58,28 @@ def set_split_column(self, split_column: str) -> None: def get_split_column(self) -> str: self._get_cfg("data.split_column") + + def set_hardware_type(self, hardware_type: HardwareType) -> None: + self._set_cfg("trainer.accelerator", hardware_type.value) + + def get_hardware_type(self) -> HardwareType: + return HardwareType(self._get_cfg("trainer.accelerator")) + + def set_max_epochs(self, max_epochs: int) -> None: + self._set_cfg("trainer.max_epochs", max_epochs) + + def get_max_epochs(self) -> int: + return self._get_cfg("trainer.max_epochs") + + def set_output_dir(self, output_dir: Union[str, Path]) -> None: + self._set_cfg("paths.output_dir", str(output_dir)) + + def get_output_dir(self) -> str: + return self._get_cfg("paths.output_dir") + + # I can't find where this is actually used in cyto_dl, do we need to support this? + def set_work_dir(self, work_dir: Union[str, Path]) -> None: + self._set_cfg("paths.work_dir", str(work_dir)) + + def get_work_dir(self) -> str: + return self._get_cfg("paths.work_dir") From 4a14fd4f3141645d79c75a936f12cd9e7a8fae64 Mon Sep 17 00:00:00 2001 From: Daniel Saelid Date: Wed, 27 Mar 2024 16:09:39 -0700 Subject: [PATCH 04/11] refactored config classes to model classes --- cyto_dl/api/config/__init__.py | 1 - cyto_dl/api/cyto_dl_model/__init__.py | 2 + .../cyto_dl_base_model.py} | 70 ++++++++++++++----- .../segmentation_plugin_model.py} | 43 ++++-------- 4 files changed, 68 insertions(+), 48 deletions(-) delete mode 100644 cyto_dl/api/config/__init__.py create mode 100644 cyto_dl/api/cyto_dl_model/__init__.py rename cyto_dl/api/{config/cyto_dl_config.py => cyto_dl_model/cyto_dl_base_model.py} (58%) rename cyto_dl/api/{config/segmentation_plugin_config.py => cyto_dl_model/segmentation_plugin_model.py} (75%) diff --git a/cyto_dl/api/config/__init__.py b/cyto_dl/api/config/__init__.py deleted file mode 100644 index d2c4c03ef..000000000 --- a/cyto_dl/api/config/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .cyto_dl_config import CytoDLConfig \ No newline at end of file diff --git a/cyto_dl/api/cyto_dl_model/__init__.py b/cyto_dl/api/cyto_dl_model/__init__.py new file mode 100644 index 000000000..398554b63 --- /dev/null +++ b/cyto_dl/api/cyto_dl_model/__init__.py @@ -0,0 +1,2 @@ +from .cyto_dl_base_model import CytoDLBaseModel +from .segmentation_plugin_model import SegmentationPluginModel \ No newline at end of file diff --git a/cyto_dl/api/config/cyto_dl_config.py b/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py similarity index 58% rename from cyto_dl/api/config/cyto_dl_config.py rename to cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py index 0dfc7847f..d95c1ae7b 100644 --- a/cyto_dl/api/config/cyto_dl_config.py +++ b/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py @@ -1,35 +1,27 @@ from abc import ABC, abstractmethod from pathlib import Path -from typing import Optional, List, Any +from typing import Optional, Union, List, Any from hydra import compose, initialize_config_dir from hydra.core.global_hydra import GlobalHydra from omegaconf import OmegaConf, open_dict, DictConfig import pyrootutils from copy import deepcopy from cyto_dl.api.data import * +from cyto_dl.eval import evaluate as evaluate_model +from cyto_dl.train import train as train_model -class CytoDLConfig(ABC): +class CytoDLBaseModel(ABC): """ - A CytoDLConfig represents the configuration for a CytoDLModel. Can be - passed to CytoDLModel on initialization to create a fully-functional model. + A CytoDLBaseModel is used to configure, train, and run predictions on a cyto-dl model. """ - def __init__(self, config_filepath: Optional[Path]=None, train: bool=True): + def __init__(self, config_filepath: Optional[Path]=None): """ :param config_filepath: path to a .yaml config file that will be used as the basis - for this CytoDLConfig. If None, a default config will be used instead. - :param train: indicates whether this config will be used for training or prediction + for this CytoDLBaseModel (must be generated by the CytoDLBaseModel subclass that wants + to use it). If None, a default config will be used instead. """ - self._config_filepath: str = config_filepath - self._train: bool = train self._cfg: DictConfig = OmegaConf.load(config_filepath) if config_filepath else self._generate_default_config() - self._set_cfg("train", train) - self._set_cfg("test", train) - # afaik, task_name isn't used outside of template_utils.py - do we need to support this? - self._set_cfg("task_name", "train" if train else "predict") - - # we currently have an override for ['mode'] in ml-seg, but I can't find top-level 'mode' in the configs, - # do we need to support this? def _generate_default_config(self) -> DictConfig: cfg_dir: Path = pyrootutils.find_root(search_from=__file__, indicator=("pyproject.toml", "README.md")) / "configs" @@ -73,19 +65,59 @@ def _get_cfg(self, k: str) -> Any: raise KeyError(f"{k} not found in config dict") return OmegaConf.select(self._cfg, k) + def _set_training_config(self, train: bool): + self._set_cfg("train", train) + self._set_cfg("test", train) + # afaik, task_name isn't used outside of template_utils.py - do we need to support this? + self._set_cfg("task_name", "train" if train else "predict") + + @abstractmethod + def _set_max_epochs(self, max_epochs: int) -> None: + pass + + # is 'manifest' a good word for the path to the directory (or CSV?) with list of images + # to use for training, evaluation, etc? + @abstractmethod + def _set_manifest_path(self, manifest_path: Union[str, Path]) -> None: + pass + + @abstractmethod + def _set_output_dir(self, output_dir: Union[str, Path]) -> None: + pass + def set_experiment_name(self, name: str) -> None: self._set_cfg("experiment_name", name) def get_experiment_name(self) -> str: return self._get_cfg("experiment_name") - def set_ckpt_path(self, ckpt_path: Path) -> None: + def set_ckpt_path(self, ckpt_path: Union[str, Path]) -> None: self._set_cfg("ckpt_path", str(ckpt_path.resolve())) - def get_ckpt_path(self) -> Path: - return Path(self._get_cfg("ckpt_path")) + def get_ckpt_path(self) -> str: + return self._get_cfg("ckpt_path") def get_config(self) -> DictConfig: return deepcopy(self._cfg) + + def save_config(self, path: Path) -> None: + OmegaConf.save(self._cfg, path) + + def train(self, max_epochs: int, manifest_path: Union[str, Path], output_dir: Union[str, Path]) -> None: + self._set_training_config(True) + self._set_max_epochs(max_epochs) + self._set_manifest_path(manifest_path) + self._set_output_dir(output_dir) + train_model(self._cfg) + + def predict(self, manifest_path: Union[str, Path], output_dir: Union[str, Path]) -> None: + if self.get_ckpt_path() is None: + raise RuntimeError("Cannot make predictions when checkpoint unspecified") + + self._set_training_config(False) + self._set_manifest_path(manifest_path) + self._set_output_dir(output_dir) + evaluate_model(self._cfg) + diff --git a/cyto_dl/api/config/segmentation_plugin_config.py b/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py similarity index 75% rename from cyto_dl/api/config/segmentation_plugin_config.py rename to cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py index d4137b9c6..3ae724537 100644 --- a/cyto_dl/api/config/segmentation_plugin_config.py +++ b/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py @@ -1,13 +1,25 @@ from typing import Tuple, List, Union from pathlib import Path -from cyto_dl.api.config import CytoDLConfig +from cyto_dl.api.cyto_dl_model import CytoDLBaseModel from cyto_dl.api.data import * -class SegmentationPluginConfig(CytoDLConfig): - +class SegmentationPluginModel(CytoDLBaseModel): + # we currently have an override for ['mode'] in ml-seg, but I can't find top-level 'mode' in the configs, + # do we need to support this? def _get_experiment_type(self) -> ExperimentType: return ExperimentType.SEGMENTATION_PLUGIN + def _set_max_epochs(self, max_epochs: int) -> None: + self._set_cfg("trainer.max_epochs", max_epochs) + + def _set_manifest_path(self, manifest_path: Union[str, Path]) -> None: + self._set_cfg("data.path", str(manifest_path)) + + def _set_output_dir(self, output_dir: Union[str, Path]) -> None: + self._set_cfg("paths.output_dir", str(output_dir)) + # I can't find where work_dir is actually used in cyto_dl, do we need to support this? + self._set_cfg("paths.work_dir", str(output_dir)) + # a lot of these keys are duplicated across the im2im experiment types (but not present in top-level # train.yaml or eval.yaml) - should we move these into the top-level configs and move these setters and # getters accordingly? @@ -29,12 +41,6 @@ def set_raw_image_channels(self, channels: int) -> None: def get_raw_image_channels(self) -> int: return self._get_cfg("raw_im_channels") - def set_data_path(self, data_path: Union[str, Path]) -> None: - self._set_cfg("data.path", str(data_path)) - - def get_data_path(self) -> str: - return self._get_cfg("data.path") - # TODO: is there a better way to deal with column names + split columns? def set_manifest_column_names(self, source: str, target1: str, target2: str, merge_mask: str, exclude_mask: str, base_image: str) -> None: self._set_cfg("source_col", source) @@ -64,22 +70,3 @@ def set_hardware_type(self, hardware_type: HardwareType) -> None: def get_hardware_type(self) -> HardwareType: return HardwareType(self._get_cfg("trainer.accelerator")) - - def set_max_epochs(self, max_epochs: int) -> None: - self._set_cfg("trainer.max_epochs", max_epochs) - - def get_max_epochs(self) -> int: - return self._get_cfg("trainer.max_epochs") - - def set_output_dir(self, output_dir: Union[str, Path]) -> None: - self._set_cfg("paths.output_dir", str(output_dir)) - - def get_output_dir(self) -> str: - return self._get_cfg("paths.output_dir") - - # I can't find where this is actually used in cyto_dl, do we need to support this? - def set_work_dir(self, work_dir: Union[str, Path]) -> None: - self._set_cfg("paths.work_dir", str(work_dir)) - - def get_work_dir(self) -> str: - return self._get_cfg("paths.work_dir") From afe7069669e321004b30b2742f458939228206ae Mon Sep 17 00:00:00 2001 From: Daniel Saelid Date: Wed, 27 Mar 2024 16:27:10 -0700 Subject: [PATCH 05/11] I think all currently-used overrides are accounted for now --- .../api/cyto_dl_model/segmentation_plugin_model.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py b/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py index 3ae724537..ece0dd67a 100644 --- a/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py +++ b/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py @@ -1,6 +1,7 @@ -from typing import Tuple, List, Union +from typing import Tuple, List, Union, Optional from pathlib import Path from cyto_dl.api.cyto_dl_model import CytoDLBaseModel +from omegaconf import ListConfig from cyto_dl.api.data import * class SegmentationPluginModel(CytoDLBaseModel): @@ -65,6 +66,16 @@ def set_split_column(self, split_column: str) -> None: def get_split_column(self) -> str: self._get_cfg("data.split_column") + # is patch_shape required in order to run training/prediction? + # if so, it should be an argument to train/predict/__init__, or a default + # should be set in the config + def set_patch_size(self, patch_size: PatchSize) -> None: + self._set_cfg("data._aux.patch_shape", patch_size.value) + + def get_patch_size(self) -> Optional[PatchSize]: + p_shape: ListConfig = self._get_cfg("data._aux.patch_shape") + return PatchSize(list(p_shape)) if p_shape else None + def set_hardware_type(self, hardware_type: HardwareType) -> None: self._set_cfg("trainer.accelerator", hardware_type.value) From 58d35986490b3445a9a0632a841f9d16b69b254f Mon Sep 17 00:00:00 2001 From: Daniel Saelid Date: Wed, 27 Mar 2024 16:32:25 -0700 Subject: [PATCH 06/11] pre-commit --- cyto_dl/api/cyto_dl_model/__init__.py | 2 +- .../api/cyto_dl_model/cyto_dl_base_model.py | 61 ++++++++++--------- .../segmentation_plugin_model.py | 60 +++++++++++------- 3 files changed, 72 insertions(+), 51 deletions(-) diff --git a/cyto_dl/api/cyto_dl_model/__init__.py b/cyto_dl/api/cyto_dl_model/__init__.py index 398554b63..c7a32124c 100644 --- a/cyto_dl/api/cyto_dl_model/__init__.py +++ b/cyto_dl/api/cyto_dl_model/__init__.py @@ -1,2 +1,2 @@ from .cyto_dl_base_model import CytoDLBaseModel -from .segmentation_plugin_model import SegmentationPluginModel \ No newline at end of file +from .segmentation_plugin_model import SegmentationPluginModel diff --git a/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py b/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py index d95c1ae7b..4363a9b84 100644 --- a/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py +++ b/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py @@ -1,34 +1,40 @@ from abc import ABC, abstractmethod +from copy import deepcopy from pathlib import Path -from typing import Optional, Union, List, Any +from typing import Any, List, Optional, Union + +import pyrootutils from hydra import compose, initialize_config_dir from hydra.core.global_hydra import GlobalHydra -from omegaconf import OmegaConf, open_dict, DictConfig -import pyrootutils -from copy import deepcopy +from omegaconf import DictConfig, OmegaConf, open_dict + from cyto_dl.api.data import * from cyto_dl.eval import evaluate as evaluate_model from cyto_dl.train import train as train_model class CytoDLBaseModel(ABC): - """ - A CytoDLBaseModel is used to configure, train, and run predictions on a cyto-dl model. - """ - def __init__(self, config_filepath: Optional[Path]=None): + """A CytoDLBaseModel is used to configure, train, and run predictions on a cyto-dl model.""" + + def __init__(self, config_filepath: Optional[Path] = None): """ :param config_filepath: path to a .yaml config file that will be used as the basis for this CytoDLBaseModel (must be generated by the CytoDLBaseModel subclass that wants to use it). If None, a default config will be used instead. """ - self._cfg: DictConfig = OmegaConf.load(config_filepath) if config_filepath else self._generate_default_config() - + self._cfg: DictConfig = ( + OmegaConf.load(config_filepath) if config_filepath else self._generate_default_config() + ) + def _generate_default_config(self) -> DictConfig: - cfg_dir: Path = pyrootutils.find_root(search_from=__file__, indicator=("pyproject.toml", "README.md")) / "configs" + cfg_dir: Path = ( + pyrootutils.find_root(search_from=__file__, indicator=("pyproject.toml", "README.md")) + / "configs" + ) GlobalHydra.instance().clear() with initialize_config_dir(version_base="1.2", config_dir=str(cfg_dir)): cfg: DictConfig = compose( - config_name="train.yaml", # only using train.yaml after conversation w/ Benji + config_name="train.yaml", # only using train.yaml after conversation w/ Benji return_hydra_config=True, overrides=[f"experiment=im2im/{self._get_experiment_type().name}"], ) @@ -40,11 +46,9 @@ def _generate_default_config(self) -> DictConfig: @abstractmethod def _get_experiment_type(self) -> ExperimentType: - """ - Return experiment type for this config (e.g. segmentation_plugin, gan, etc) - """ + """Return experiment type for this config (e.g. segmentation_plugin, gan, etc)""" pass - + def _key_exists(self, k: str) -> bool: keys: List[str] = k.split(".") curr_dict: DictConfig = self._cfg @@ -54,7 +58,7 @@ def _key_exists(self, k: str) -> bool: return False curr_dict = curr_dict[key] return True - + def _set_cfg(self, k: str, v: Any) -> None: if not self._key_exists(k): raise KeyError(f"{k} not found in config dict") @@ -64,13 +68,13 @@ def _get_cfg(self, k: str) -> Any: if not self._key_exists(k): raise KeyError(f"{k} not found in config dict") return OmegaConf.select(self._cfg, k) - + def _set_training_config(self, train: bool): self._set_cfg("train", train) self._set_cfg("test", train) # afaik, task_name isn't used outside of template_utils.py - do we need to support this? self._set_cfg("task_name", "train" if train else "predict") - + @abstractmethod def _set_max_epochs(self, max_epochs: int) -> None: pass @@ -87,29 +91,31 @@ def _set_output_dir(self, output_dir: Union[str, Path]) -> None: def set_experiment_name(self, name: str) -> None: self._set_cfg("experiment_name", name) - + def get_experiment_name(self) -> str: return self._get_cfg("experiment_name") def set_ckpt_path(self, ckpt_path: Union[str, Path]) -> None: self._set_cfg("ckpt_path", str(ckpt_path.resolve())) - + def get_ckpt_path(self) -> str: return self._get_cfg("ckpt_path") - + def get_config(self) -> DictConfig: return deepcopy(self._cfg) - + def save_config(self, path: Path) -> None: OmegaConf.save(self._cfg, path) - - def train(self, max_epochs: int, manifest_path: Union[str, Path], output_dir: Union[str, Path]) -> None: + + def train( + self, max_epochs: int, manifest_path: Union[str, Path], output_dir: Union[str, Path] + ) -> None: self._set_training_config(True) self._set_max_epochs(max_epochs) self._set_manifest_path(manifest_path) self._set_output_dir(output_dir) train_model(self._cfg) - + def predict(self, manifest_path: Union[str, Path], output_dir: Union[str, Path]) -> None: if self.get_ckpt_path() is None: raise RuntimeError("Cannot make predictions when checkpoint unspecified") @@ -118,6 +124,3 @@ def predict(self, manifest_path: Union[str, Path], output_dir: Union[str, Path]) self._set_manifest_path(manifest_path) self._set_output_dir(output_dir) evaluate_model(self._cfg) - - - diff --git a/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py b/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py index ece0dd67a..59cb6da0c 100644 --- a/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py +++ b/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py @@ -1,21 +1,24 @@ -from typing import Tuple, List, Union, Optional from pathlib import Path -from cyto_dl.api.cyto_dl_model import CytoDLBaseModel +from typing import List, Optional, Tuple, Union + from omegaconf import ListConfig + +from cyto_dl.api.cyto_dl_model import CytoDLBaseModel from cyto_dl.api.data import * + class SegmentationPluginModel(CytoDLBaseModel): - # we currently have an override for ['mode'] in ml-seg, but I can't find top-level 'mode' in the configs, + # we currently have an override for ['mode'] in ml-seg, but I can't find top-level 'mode' in the configs, # do we need to support this? def _get_experiment_type(self) -> ExperimentType: return ExperimentType.SEGMENTATION_PLUGIN - + def _set_max_epochs(self, max_epochs: int) -> None: self._set_cfg("trainer.max_epochs", max_epochs) def _set_manifest_path(self, manifest_path: Union[str, Path]) -> None: self._set_cfg("data.path", str(manifest_path)) - + def _set_output_dir(self, output_dir: Union[str, Path]) -> None: self._set_cfg("paths.output_dir", str(output_dir)) # I can't find where work_dir is actually used in cyto_dl, do we need to support this? @@ -23,37 +26,52 @@ def _set_output_dir(self, output_dir: Union[str, Path]) -> None: # a lot of these keys are duplicated across the im2im experiment types (but not present in top-level # train.yaml or eval.yaml) - should we move these into the top-level configs and move these setters and - # getters accordingly? + # getters accordingly? def set_spatial_dims(self, spatial_dims: int) -> None: self._set_cfg("spatial_dims", spatial_dims) - + def get_spatial_dims(self) -> int: return self._get_cfg("spatial_dims") - + def set_input_channel(self, input_channel: int) -> None: self._set_cfg("input_channel", input_channel) - + def get_input_channel(self) -> int: return self._get_cfg("input_channel") - + def set_raw_image_channels(self, channels: int) -> None: self._set_cfg("raw_im_channels", channels) - + def get_raw_image_channels(self) -> int: return self._get_cfg("raw_im_channels") - + # TODO: is there a better way to deal with column names + split columns? - def set_manifest_column_names(self, source: str, target1: str, target2: str, merge_mask: str, exclude_mask: str, base_image: str) -> None: + def set_manifest_column_names( + self, + source: str, + target1: str, + target2: str, + merge_mask: str, + exclude_mask: str, + base_image: str, + ) -> None: self._set_cfg("source_col", source) self._set_cfg("target_col1", target1) self._set_cfg("target_col2", target2) self._set_cfg("merge_mask_col", merge_mask) self._set_cfg("exclude_mask_col", exclude_mask) self._set_cfg("base_image_col", base_image) - + def get_manifest_column_names(self) -> Tuple[str, str, str, str, str, str]: - return self._get_cfg("source_col"), self._get_cfg("target_col1"), self._get_cfg("target_col2"), self._get_cfg("merge_mask_col"), self._get_cfg("exclude_mask_col"), self._get_cfg("base_image_col") - + return ( + self._get_cfg("source_col"), + self._get_cfg("target_col1"), + self._get_cfg("target_col2"), + self._get_cfg("merge_mask_col"), + self._get_cfg("exclude_mask_col"), + self._get_cfg("base_image_col"), + ) + def set_split_column(self, split_column: str) -> None: self._set_cfg("data.split_column", split_column) # need to also add it to the list of columns @@ -62,22 +80,22 @@ def set_split_column(self, split_column: str) -> None: existing_cols[-1] = split_column else: existing_cols.append(split_column) - + def get_split_column(self) -> str: self._get_cfg("data.split_column") - + # is patch_shape required in order to run training/prediction? # if so, it should be an argument to train/predict/__init__, or a default # should be set in the config def set_patch_size(self, patch_size: PatchSize) -> None: self._set_cfg("data._aux.patch_shape", patch_size.value) - + def get_patch_size(self) -> Optional[PatchSize]: p_shape: ListConfig = self._get_cfg("data._aux.patch_shape") return PatchSize(list(p_shape)) if p_shape else None - + def set_hardware_type(self, hardware_type: HardwareType) -> None: self._set_cfg("trainer.accelerator", hardware_type.value) - + def get_hardware_type(self) -> HardwareType: return HardwareType(self._get_cfg("trainer.accelerator")) From 12d613669d4b812c88478266274855a8d4215cce Mon Sep 17 00:00:00 2001 From: Daniel Saelid Date: Fri, 29 Mar 2024 16:05:42 -0700 Subject: [PATCH 07/11] added tests for seg plugin --- .../api/cyto_dl_model/cyto_dl_base_model.py | 33 +++-- .../segmentation_plugin_model.py | 24 +++- tests/api/cyto_dl_model/bad_config.yaml | 5 + .../test_segmentation_plugin_model.py | 134 ++++++++++++++++++ 4 files changed, 179 insertions(+), 17 deletions(-) create mode 100644 tests/api/cyto_dl_model/bad_config.yaml create mode 100644 tests/api/cyto_dl_model/test_segmentation_plugin_model.py diff --git a/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py b/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py index 4363a9b84..542d87201 100644 --- a/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py +++ b/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py @@ -33,10 +33,16 @@ def _generate_default_config(self) -> DictConfig: ) GlobalHydra.instance().clear() with initialize_config_dir(version_base="1.2", config_dir=str(cfg_dir)): + # the overrides for 'paths.' are necessary to make later overrides work (for some reason I don't fully understand) + # I'd prefer to do this in experiment configs instead of the overrides here cfg: DictConfig = compose( config_name="train.yaml", # only using train.yaml after conversation w/ Benji return_hydra_config=True, - overrides=[f"experiment=im2im/{self._get_experiment_type().name}"], + overrides=[ + f"experiment=im2im/{self._get_experiment_type().name}", + "paths.output_dir=PLACEHOLDER", + "paths.work_dir=PLACEHOLDER", + ], ) with open_dict(cfg): del cfg["hydra"] @@ -89,18 +95,16 @@ def _set_manifest_path(self, manifest_path: Union[str, Path]) -> None: def _set_output_dir(self, output_dir: Union[str, Path]) -> None: pass + def _set_ckpt(self, ckpt: Optional[Path]) -> None: + self._set_cfg("ckpt_path", str(ckpt.resolve()) if ckpt else ckpt) + + # does experiment name have any effect? def set_experiment_name(self, name: str) -> None: self._set_cfg("experiment_name", name) def get_experiment_name(self) -> str: return self._get_cfg("experiment_name") - def set_ckpt_path(self, ckpt_path: Union[str, Path]) -> None: - self._set_cfg("ckpt_path", str(ckpt_path.resolve())) - - def get_ckpt_path(self) -> str: - return self._get_cfg("ckpt_path") - def get_config(self) -> DictConfig: return deepcopy(self._cfg) @@ -108,19 +112,24 @@ def save_config(self, path: Path) -> None: OmegaConf.save(self._cfg, path) def train( - self, max_epochs: int, manifest_path: Union[str, Path], output_dir: Union[str, Path] + self, + max_epochs: int, + manifest_path: Union[str, Path], + output_dir: Union[str, Path], + checkpoint: Optional[Path] = None, ) -> None: self._set_training_config(True) self._set_max_epochs(max_epochs) self._set_manifest_path(manifest_path) self._set_output_dir(output_dir) + self._set_ckpt(checkpoint) train_model(self._cfg) - def predict(self, manifest_path: Union[str, Path], output_dir: Union[str, Path]) -> None: - if self.get_ckpt_path() is None: - raise RuntimeError("Cannot make predictions when checkpoint unspecified") - + def predict( + self, manifest_path: Union[str, Path], output_dir: Union[str, Path], checkpoint: Path + ) -> None: self._set_training_config(False) self._set_manifest_path(manifest_path) self._set_output_dir(output_dir) + self._set_ckpt(checkpoint) evaluate_model(self._cfg) diff --git a/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py b/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py index 59cb6da0c..783876d49 100644 --- a/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py +++ b/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py @@ -8,6 +8,13 @@ class SegmentationPluginModel(CytoDLBaseModel): + """A SegmentationPluginModel handles configuration, training, and prediction using the default + segmentation_plugin experiment from CytoDL.""" + + def __init__(self, config_filepath: Optional[Path] = None): + super().__init__(config_filepath) + self._has_split_column = False + # we currently have an override for ['mode'] in ml-seg, but I can't find top-level 'mode' in the configs, # do we need to support this? def _get_experiment_type(self) -> ExperimentType: @@ -74,15 +81,22 @@ def get_manifest_column_names(self) -> Tuple[str, str, str, str, str, str]: def set_split_column(self, split_column: str) -> None: self._set_cfg("data.split_column", split_column) - # need to also add it to the list of columns - existing_cols: List[str] = self._get_cfg("data.columns") - if len(existing_cols) > len(self.get_manifest_column_names()): + existing_cols: ListConfig = self._get_cfg("data.columns") + if self._has_split_column: existing_cols[-1] = split_column else: existing_cols.append(split_column) + self._has_split_column = True + + def get_split_column(self) -> Optional[str]: + return self._get_cfg("data.split_column") - def get_split_column(self) -> str: - self._get_cfg("data.split_column") + def remove_split_column(self) -> None: + if self._has_split_column: + self._set_cfg("data.split_column", None) + existing_cols: ListConfig = self._get_cfg("data.columns") + del existing_cols[-1] + self._has_split_column = False # is patch_shape required in order to run training/prediction? # if so, it should be an argument to train/predict/__init__, or a default diff --git a/tests/api/cyto_dl_model/bad_config.yaml b/tests/api/cyto_dl_model/bad_config.yaml new file mode 100644 index 000000000..24122169c --- /dev/null +++ b/tests/api/cyto_dl_model/bad_config.yaml @@ -0,0 +1,5 @@ +this: 1 +is: + a: 2 +bad: 3 +config: 4 diff --git a/tests/api/cyto_dl_model/test_segmentation_plugin_model.py b/tests/api/cyto_dl_model/test_segmentation_plugin_model.py new file mode 100644 index 000000000..8a76111e9 --- /dev/null +++ b/tests/api/cyto_dl_model/test_segmentation_plugin_model.py @@ -0,0 +1,134 @@ +from pathlib import Path +from unittest.mock import Mock, patch + +import pytest + +from cyto_dl.api.cyto_dl_model import SegmentationPluginModel + + +# Taken together, these tests verify that the SegmentationPluginModel aligns with the +# default segmentation_plugin config file. It does this by a) verifying that methods which +# attempt to change config items on a bad (fake) config throw exceptions because those config +# items do not exist and b) verifying that those methods do not throw exceptions when called on +# a model that uses the default config file +# +# Note that these tests do not test whether or not the config variables set by SegmentationPluginModel +# are actually used how we expect by Cyto-DL, just that they exist in the default config. +@pytest.fixture +def model_with_default_config() -> SegmentationPluginModel: + return SegmentationPluginModel() + + +@pytest.fixture +def model_with_bad_config() -> SegmentationPluginModel: + return SegmentationPluginModel(config_filepath=Path(__file__).parent / "bad_config.yaml") + + +class TestDefaultConfig: + @patch("cyto_dl.api.cyto_dl_model.cyto_dl_base_model.train_model") + def test_train_no_ckpt( + self, train_model_mock: Mock, model_with_default_config: SegmentationPluginModel + ): + model_with_default_config.train(1, "manifest", "output_dir") + train_model_mock.assert_called_once() + + @patch("cyto_dl.api.cyto_dl_model.cyto_dl_base_model.train_model") + def test_train_with_ckpt( + self, train_model_mock: Mock, model_with_default_config: SegmentationPluginModel + ): + model_with_default_config.train(1, "manifest", "output_dir", checkpoint=Path("ckpt")) + train_model_mock.assert_called_once() + + @patch("cyto_dl.api.cyto_dl_model.cyto_dl_base_model.evaluate_model") + def test_predict( + self, evaluate_model_mock: Mock, model_with_default_config: SegmentationPluginModel + ): + model_with_default_config.predict("manifest", "output_dir", Path("ckpt")) + evaluate_model_mock.assert_called_once() + + def test_spatial_dims(self, model_with_default_config: SegmentationPluginModel): + assert model_with_default_config.get_spatial_dims() is not None + model_with_default_config.set_spatial_dims(24) + assert model_with_default_config.get_spatial_dims() == 24 + + def test_input_channel(self, model_with_default_config: SegmentationPluginModel): + assert model_with_default_config.get_input_channel() is not None + model_with_default_config.set_input_channel(785) + assert model_with_default_config.get_input_channel() == 785 + + def test_raw_image_channels(self, model_with_default_config: SegmentationPluginModel): + assert model_with_default_config.get_raw_image_channels() is not None + model_with_default_config.set_raw_image_channels(35) + assert model_with_default_config.get_raw_image_channels() == 35 + + def test_manifest_column_names(self, model_with_default_config: SegmentationPluginModel): + assert len(model_with_default_config.get_manifest_column_names()) == 6 + model_with_default_config.set_manifest_column_names("a", "b", "c", "d", "e", "f") + assert model_with_default_config.get_manifest_column_names() == ( + "a", + "b", + "c", + "d", + "e", + "f", + ) + + def test_split_column(self, model_with_default_config: SegmentationPluginModel): + assert model_with_default_config.get_split_column() is None + model_with_default_config.set_split_column("foo") + assert model_with_default_config.get_split_column() == "foo" + model_with_default_config.remove_split_column() + assert model_with_default_config.get_split_column() is None + + +class TestBadConfig: + @patch("cyto_dl.api.cyto_dl_model.cyto_dl_base_model.train_model") + def test_train_no_ckpt( + self, train_model_mock: Mock, model_with_bad_config: SegmentationPluginModel + ): + with pytest.raises(KeyError): + model_with_bad_config.train(1, "manifest", "output_dir") + + @patch("cyto_dl.api.cyto_dl_model.cyto_dl_base_model.train_model") + def test_train_with_ckpt( + self, train_model_mock: Mock, model_with_bad_config: SegmentationPluginModel + ): + with pytest.raises(KeyError): + model_with_bad_config.train(1, "manifest", "output_dir", checkpoint=Path("ckpt")) + + @patch("cyto_dl.api.cyto_dl_model.cyto_dl_base_model.evaluate_model") + def test_predict( + self, evaluate_model_mock: Mock, model_with_bad_config: SegmentationPluginModel + ): + with pytest.raises(KeyError): + model_with_bad_config.predict("manifest", "output_dir", Path("ckpt")) + + def test_spatial_dims(self, model_with_bad_config: SegmentationPluginModel): + with pytest.raises(KeyError): + model_with_bad_config.get_spatial_dims() + with pytest.raises(KeyError): + model_with_bad_config.set_spatial_dims(24) + + def test_input_channel(self, model_with_bad_config: SegmentationPluginModel): + with pytest.raises(KeyError): + model_with_bad_config.get_input_channel() + with pytest.raises(KeyError): + model_with_bad_config.set_input_channel(785) + + def test_raw_image_channels(self, model_with_bad_config: SegmentationPluginModel): + with pytest.raises(KeyError): + model_with_bad_config.get_raw_image_channels() + with pytest.raises(KeyError): + model_with_bad_config.set_raw_image_channels(35) + + def test_manifest_column_names(self, model_with_bad_config: SegmentationPluginModel): + with pytest.raises(KeyError): + model_with_bad_config.get_manifest_column_names() + with pytest.raises(KeyError): + model_with_bad_config.set_manifest_column_names("a", "b", "c", "d", "e", "f") + + def test_split_column(self, model_with_bad_config: SegmentationPluginModel): + with pytest.raises(KeyError): + model_with_bad_config.get_split_column() + with pytest.raises(KeyError): + model_with_bad_config.set_split_column("foo") From 33bee7ac8833c56b8aed77f5a21c39bc15bbaeeb Mon Sep 17 00:00:00 2001 From: Daniel Saelid Date: Fri, 12 Apr 2024 11:50:39 -0700 Subject: [PATCH 08/11] group abstract methods together --- .../api/cyto_dl_model/cyto_dl_base_model.py | 36 +++++++++---------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py b/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py index 542d87201..dbc2bb5d1 100644 --- a/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py +++ b/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py @@ -26,6 +26,23 @@ def __init__(self, config_filepath: Optional[Path] = None): OmegaConf.load(config_filepath) if config_filepath else self._generate_default_config() ) + @abstractmethod + def _get_experiment_type(self) -> ExperimentType: + """Return experiment type for this config (e.g. segmentation_plugin, gan, etc)""" + pass + + @abstractmethod + def _set_max_epochs(self, max_epochs: int) -> None: + pass + + @abstractmethod + def _set_manifest_path(self, manifest_path: Union[str, Path]) -> None: + pass + + @abstractmethod + def _set_output_dir(self, output_dir: Union[str, Path]) -> None: + pass + def _generate_default_config(self) -> DictConfig: cfg_dir: Path = ( pyrootutils.find_root(search_from=__file__, indicator=("pyproject.toml", "README.md")) @@ -50,11 +67,6 @@ def _generate_default_config(self) -> DictConfig: cfg.extras.print_config = False return cfg - @abstractmethod - def _get_experiment_type(self) -> ExperimentType: - """Return experiment type for this config (e.g. segmentation_plugin, gan, etc)""" - pass - def _key_exists(self, k: str) -> bool: keys: List[str] = k.split(".") curr_dict: DictConfig = self._cfg @@ -81,20 +93,6 @@ def _set_training_config(self, train: bool): # afaik, task_name isn't used outside of template_utils.py - do we need to support this? self._set_cfg("task_name", "train" if train else "predict") - @abstractmethod - def _set_max_epochs(self, max_epochs: int) -> None: - pass - - # is 'manifest' a good word for the path to the directory (or CSV?) with list of images - # to use for training, evaluation, etc? - @abstractmethod - def _set_manifest_path(self, manifest_path: Union[str, Path]) -> None: - pass - - @abstractmethod - def _set_output_dir(self, output_dir: Union[str, Path]) -> None: - pass - def _set_ckpt(self, ckpt: Optional[Path]) -> None: self._set_cfg("ckpt_path", str(ckpt.resolve()) if ckpt else ckpt) From 8fc070da84f40bae93effd9e53e07236f777ad00 Mon Sep 17 00:00:00 2001 From: Daniel Saelid Date: Fri, 12 Apr 2024 13:18:01 -0700 Subject: [PATCH 09/11] lint + lowercase experiment type --- cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py b/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py index dbc2bb5d1..be8c1bea2 100644 --- a/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py +++ b/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py @@ -30,7 +30,7 @@ def __init__(self, config_filepath: Optional[Path] = None): def _get_experiment_type(self) -> ExperimentType: """Return experiment type for this config (e.g. segmentation_plugin, gan, etc)""" pass - + @abstractmethod def _set_max_epochs(self, max_epochs: int) -> None: pass @@ -56,7 +56,7 @@ def _generate_default_config(self) -> DictConfig: config_name="train.yaml", # only using train.yaml after conversation w/ Benji return_hydra_config=True, overrides=[ - f"experiment=im2im/{self._get_experiment_type().name}", + f"experiment=im2im/{self._get_experiment_type().name.lower()}", "paths.output_dir=PLACEHOLDER", "paths.work_dir=PLACEHOLDER", ], From 889775c5b15fcbf38de32cce0dee57cade5afe53 Mon Sep 17 00:00:00 2001 From: Daniel Saelid Date: Fri, 12 Apr 2024 13:23:23 -0700 Subject: [PATCH 10/11] flake --- cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py | 4 ++-- cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py b/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py index be8c1bea2..8986da834 100644 --- a/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py +++ b/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py @@ -8,7 +8,7 @@ from hydra.core.global_hydra import GlobalHydra from omegaconf import DictConfig, OmegaConf, open_dict -from cyto_dl.api.data import * +from cyto_dl.api.data import ExperimentType from cyto_dl.eval import evaluate as evaluate_model from cyto_dl.train import train as train_model @@ -72,7 +72,7 @@ def _key_exists(self, k: str) -> bool: curr_dict: DictConfig = self._cfg while keys: key: str = keys.pop(0) - if not key in curr_dict: + if key not in curr_dict: return False curr_dict = curr_dict[key] return True diff --git a/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py b/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py index 783876d49..68a0b9deb 100644 --- a/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py +++ b/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py @@ -4,7 +4,7 @@ from omegaconf import ListConfig from cyto_dl.api.cyto_dl_model import CytoDLBaseModel -from cyto_dl.api.data import * +from cyto_dl.api.data import ExperimentType, HardwareType, PatchSize class SegmentationPluginModel(CytoDLBaseModel): From 73ef86cf49f6a4ecfb36d5920f33a4fd0eee8bb9 Mon Sep 17 00:00:00 2001 From: Daniel Saelid Date: Mon, 15 Apr 2024 11:49:01 -0700 Subject: [PATCH 11/11] move overrides to top-level --- .../experiment/im2im/segmentation_plugin.yaml | 34 +++++----- .../api/cyto_dl_model/cyto_dl_base_model.py | 66 +++++++++++-------- .../segmentation_plugin_model.py | 24 ++----- .../test_segmentation_plugin_model.py | 15 +---- 4 files changed, 65 insertions(+), 74 deletions(-) diff --git a/configs/experiment/im2im/segmentation_plugin.yaml b/configs/experiment/im2im/segmentation_plugin.yaml index c69274c39..bff9895c9 100644 --- a/configs/experiment/im2im/segmentation_plugin.yaml +++ b/configs/experiment/im2im/segmentation_plugin.yaml @@ -6,16 +6,21 @@ defaults: - override /model: im2im/segmentation_plugin.yaml - override /callbacks: default.yaml - override /trainer: gpu.yaml - - override /logger: mlflow.yaml + - override /logger: csv.yaml # all parameters below will be merged with parameters from default configurations set above # this allows you to overwrite only specified parameters +# parameters with value MUST_OVERRIDE must be overridden before using this config, all other +# parameters have a reasonable default value + tags: ["dev"] seed: 12345 -experiment_name: YOUR_EXP_NAME -run_name: YOUR_RUN_NAME +ckpt_path: null # must override for prediction + +experiment_name: experiment_name +run_name: run_name # manifest columns source_col: raw @@ -26,24 +31,21 @@ exclude_mask_col: exclude_mask base_image_col: base_image # data params -spatial_dims: 3 +spatial_dims: MUST_OVERRIDE # int value, req for first training, should not change after input_channel: 0 raw_im_channels: 1 trainer: - max_epochs: 100 + max_epochs: 1 # must override for training + accelerator: gpu data: - path: ${paths.data_dir}/example_experiment_data/s3_data - cache_dir: ${paths.data_dir}/example_experiment_data/cache + path: MUST_OVERRIDE # string path to manifest + split_column: null batch_size: 1 _aux: - patch_shape: -# small, medium, large -# 32 pix, 64 pix, 128 pix - -# OVERRIDE: -# data._aux.patch_shape -# model._aux.strides -# model._aux.kernel_size -# model._aux.upsample_kernel_size + patch_shape: [16, 32, 32] + +paths: + output_dir: MUST_OVERRIDE + work_dir: ${paths.output_dir} # it's unclear to me if this is necessary or used diff --git a/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py b/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py index 8986da834..2261b951d 100644 --- a/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py +++ b/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py @@ -12,60 +12,74 @@ from cyto_dl.eval import evaluate as evaluate_model from cyto_dl.train import train as train_model +# TODO: encapsulate experiment management (file system) details here, will require passing output_dir +# into the factory methods, maybe + class CytoDLBaseModel(ABC): """A CytoDLBaseModel is used to configure, train, and run predictions on a cyto-dl model.""" - def __init__(self, config_filepath: Optional[Path] = None): - """ - :param config_filepath: path to a .yaml config file that will be used as the basis - for this CytoDLBaseModel (must be generated by the CytoDLBaseModel subclass that wants - to use it). If None, a default config will be used instead. + def __init__(self, cfg: DictConfig): + """Not intended for direct use by clients. + + Please see the classmethod factory methods instead. """ - self._cfg: DictConfig = ( - OmegaConf.load(config_filepath) if config_filepath else self._generate_default_config() - ) + self._cfg: DictConfig = cfg + @classmethod @abstractmethod - def _get_experiment_type(self) -> ExperimentType: + def _get_experiment_type(cls) -> ExperimentType: """Return experiment type for this config (e.g. segmentation_plugin, gan, etc)""" pass - @abstractmethod - def _set_max_epochs(self, max_epochs: int) -> None: - pass + @classmethod + def from_existing_config(cls, config_filepath: Path): + """Returns a model from an existing config. - @abstractmethod - def _set_manifest_path(self, manifest_path: Union[str, Path]) -> None: - pass + :param config_filepath: path to a .yaml config file that will be used as the basis + for this CytoDLBaseModel (must be generated by the CytoDLBaseModel subclass that wants + to use it). + """ + return cls(OmegaConf.load(config_filepath)) - @abstractmethod - def _set_output_dir(self, output_dir: Union[str, Path]) -> None: - pass + # TODO: if spatial_dims is only ever 2 or 3, create an enum for it + @classmethod + def from_default_config(cls, spatial_dims: int): + """Returns a model from the default config. - def _generate_default_config(self) -> DictConfig: + :param spatial_dims: dimensions for the model (e.g. 2) + """ cfg_dir: Path = ( pyrootutils.find_root(search_from=__file__, indicator=("pyproject.toml", "README.md")) / "configs" ) GlobalHydra.instance().clear() with initialize_config_dir(version_base="1.2", config_dir=str(cfg_dir)): - # the overrides for 'paths.' are necessary to make later overrides work (for some reason I don't fully understand) - # I'd prefer to do this in experiment configs instead of the overrides here cfg: DictConfig = compose( - config_name="train.yaml", # only using train.yaml after conversation w/ Benji + config_name="train.yaml", # train.yaml can work for prediction too return_hydra_config=True, overrides=[ - f"experiment=im2im/{self._get_experiment_type().name.lower()}", - "paths.output_dir=PLACEHOLDER", - "paths.work_dir=PLACEHOLDER", + f"experiment=im2im/{cls._get_experiment_type().name.lower()}", + f"spatial_dims={spatial_dims}", ], ) with open_dict(cfg): del cfg["hydra"] cfg.extras.enforce_tags = False cfg.extras.print_config = False - return cfg + return cls(cfg) + + @abstractmethod + def _set_max_epochs(self, max_epochs: int) -> None: + pass + + @abstractmethod + def _set_manifest_path(self, manifest_path: Union[str, Path]) -> None: + pass + + @abstractmethod + def _set_output_dir(self, output_dir: Union[str, Path]) -> None: + pass def _key_exists(self, k: str) -> bool: keys: List[str] = k.split(".") diff --git a/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py b/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py index 68a0b9deb..0e014dbae 100644 --- a/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py +++ b/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py @@ -1,7 +1,7 @@ from pathlib import Path from typing import List, Optional, Tuple, Union -from omegaconf import ListConfig +from omegaconf import DictConfig, ListConfig from cyto_dl.api.cyto_dl_model import CytoDLBaseModel from cyto_dl.api.data import ExperimentType, HardwareType, PatchSize @@ -11,13 +11,12 @@ class SegmentationPluginModel(CytoDLBaseModel): """A SegmentationPluginModel handles configuration, training, and prediction using the default segmentation_plugin experiment from CytoDL.""" - def __init__(self, config_filepath: Optional[Path] = None): - super().__init__(config_filepath) + def __init__(self, cfg: DictConfig): + super().__init__(cfg) self._has_split_column = False - # we currently have an override for ['mode'] in ml-seg, but I can't find top-level 'mode' in the configs, - # do we need to support this? - def _get_experiment_type(self) -> ExperimentType: + @classmethod + def _get_experiment_type(cls) -> ExperimentType: return ExperimentType.SEGMENTATION_PLUGIN def _set_max_epochs(self, max_epochs: int) -> None: @@ -28,18 +27,8 @@ def _set_manifest_path(self, manifest_path: Union[str, Path]) -> None: def _set_output_dir(self, output_dir: Union[str, Path]) -> None: self._set_cfg("paths.output_dir", str(output_dir)) - # I can't find where work_dir is actually used in cyto_dl, do we need to support this? self._set_cfg("paths.work_dir", str(output_dir)) - # a lot of these keys are duplicated across the im2im experiment types (but not present in top-level - # train.yaml or eval.yaml) - should we move these into the top-level configs and move these setters and - # getters accordingly? - def set_spatial_dims(self, spatial_dims: int) -> None: - self._set_cfg("spatial_dims", spatial_dims) - - def get_spatial_dims(self) -> int: - return self._get_cfg("spatial_dims") - def set_input_channel(self, input_channel: int) -> None: self._set_cfg("input_channel", input_channel) @@ -98,9 +87,6 @@ def remove_split_column(self) -> None: del existing_cols[-1] self._has_split_column = False - # is patch_shape required in order to run training/prediction? - # if so, it should be an argument to train/predict/__init__, or a default - # should be set in the config def set_patch_size(self, patch_size: PatchSize) -> None: self._set_cfg("data._aux.patch_shape", patch_size.value) diff --git a/tests/api/cyto_dl_model/test_segmentation_plugin_model.py b/tests/api/cyto_dl_model/test_segmentation_plugin_model.py index 8a76111e9..d2190d1ea 100644 --- a/tests/api/cyto_dl_model/test_segmentation_plugin_model.py +++ b/tests/api/cyto_dl_model/test_segmentation_plugin_model.py @@ -16,12 +16,12 @@ # are actually used how we expect by Cyto-DL, just that they exist in the default config. @pytest.fixture def model_with_default_config() -> SegmentationPluginModel: - return SegmentationPluginModel() + return SegmentationPluginModel.from_default_config(3) @pytest.fixture def model_with_bad_config() -> SegmentationPluginModel: - return SegmentationPluginModel(config_filepath=Path(__file__).parent / "bad_config.yaml") + return SegmentationPluginModel.from_existing_config(Path(__file__).parent / "bad_config.yaml") class TestDefaultConfig: @@ -46,11 +46,6 @@ def test_predict( model_with_default_config.predict("manifest", "output_dir", Path("ckpt")) evaluate_model_mock.assert_called_once() - def test_spatial_dims(self, model_with_default_config: SegmentationPluginModel): - assert model_with_default_config.get_spatial_dims() is not None - model_with_default_config.set_spatial_dims(24) - assert model_with_default_config.get_spatial_dims() == 24 - def test_input_channel(self, model_with_default_config: SegmentationPluginModel): assert model_with_default_config.get_input_channel() is not None model_with_default_config.set_input_channel(785) @@ -103,12 +98,6 @@ def test_predict( with pytest.raises(KeyError): model_with_bad_config.predict("manifest", "output_dir", Path("ckpt")) - def test_spatial_dims(self, model_with_bad_config: SegmentationPluginModel): - with pytest.raises(KeyError): - model_with_bad_config.get_spatial_dims() - with pytest.raises(KeyError): - model_with_bad_config.set_spatial_dims(24) - def test_input_channel(self, model_with_bad_config: SegmentationPluginModel): with pytest.raises(KeyError): model_with_bad_config.get_input_channel()