From baa67af262894b6256a2c24a01e17cddd98f1473 Mon Sep 17 00:00:00 2001
From: Daniel Saelid <daniel.saelid@alleninstitute.org>
Date: Fri, 22 Mar 2024 16:46:12 -0700
Subject: [PATCH 01/11] added base config class

---
 cyto_dl/api/config/__init__.py                |  1 +
 cyto_dl/api/config/cyto_dl_config.py          | 45 +++++++++++++++++++
 .../api/config/segmentation_plugin_config.py  | 11 +++++
 cyto_dl/api/model.py                          |  3 ++
 4 files changed, 60 insertions(+)
 create mode 100644 cyto_dl/api/config/__init__.py
 create mode 100644 cyto_dl/api/config/cyto_dl_config.py
 create mode 100644 cyto_dl/api/config/segmentation_plugin_config.py

diff --git a/cyto_dl/api/config/__init__.py b/cyto_dl/api/config/__init__.py
new file mode 100644
index 000000000..d2c4c03ef
--- /dev/null
+++ b/cyto_dl/api/config/__init__.py
@@ -0,0 +1 @@
+from .cyto_dl_config import CytoDLConfig
\ No newline at end of file
diff --git a/cyto_dl/api/config/cyto_dl_config.py b/cyto_dl/api/config/cyto_dl_config.py
new file mode 100644
index 000000000..a54969756
--- /dev/null
+++ b/cyto_dl/api/config/cyto_dl_config.py
@@ -0,0 +1,45 @@
+from abc import ABC, abstractmethod
+from pathlib import Path
+from typing import Optional, Dict, Any
+from cyto_dl.api.data import *
+
+
+class CytoDLConfig(ABC):
+    """
+    A CytoDLConfig represents the configuration for a CytoDLModel. Can be
+    passed to CytoDLModel on initialization to create a fully-functional model.
+    """
+    # Note: internally, Config classes will just be used to store overrides and
+    # possibly a config filepath, while CytoDLModel will handle actually generating
+    # the full config
+    @abstractmethod
+    def __init__(self, config_filepath: Optional[Path]=None, train: bool=True):
+        """
+        :param config_filepath: path to a .yaml config file that will be used as the basis
+        for this CytoDLConfig. If None, a default config will be used instead.
+        :param train: indicates whether this config will be used for training or prediction
+        """
+        self._config_filepath: str = config_filepath
+        self._overrides: Dict[str, Any] = {}
+        self._overrides["train"] = train
+        self._overrides["test"] = train
+    
+    @abstractmethod
+    def get_experiment_type(self) -> ExperimentType:
+        """
+        Return experiment type for this config (e.g. segmentation_plugin, gan, etc)
+        """
+        pass
+
+    def get_config_filepath(self) -> Optional[Path]:
+        return self._config_filepath
+    
+    def set_hardware_type(self, hardware_type: HardwareType) -> None:
+        self._overrides["trainer.accelerator"] = hardware_type.value
+    
+    def set_experiment_name(self, name: str) -> None:
+        self._overrides["experiment_name"] = name
+    
+    def set_ckpt_path(self, ckpt_path: Path) -> None:
+        self._overrides["ckpt_path"] = str(ckpt_path.resolve())
+
diff --git a/cyto_dl/api/config/segmentation_plugin_config.py b/cyto_dl/api/config/segmentation_plugin_config.py
new file mode 100644
index 000000000..8799e4838
--- /dev/null
+++ b/cyto_dl/api/config/segmentation_plugin_config.py
@@ -0,0 +1,11 @@
+from typing import Optional
+from pathlib import Path
+from cyto_dl.api.config import CytoDLConfig
+from cyto_dl.api.data import *
+
+class SegmentationPluginConfig(CytoDLConfig):
+    def __init__(self, config_filepath: Optional[Path]=None):
+        super().__init__(config_filepath)
+    
+    def get_experiment_type(self) -> ExperimentType:
+        return ExperimentType.SEGMENTATION_PLUGIN
\ No newline at end of file
diff --git a/cyto_dl/api/model.py b/cyto_dl/api/model.py
index d9010a7f8..4be1eb79c 100644
--- a/cyto_dl/api/model.py
+++ b/cyto_dl/api/model.py
@@ -14,6 +14,9 @@
 
 
 class CytoDLModel:
+    # TODO: add optional CytoDLConfig param to init--if client passes a
+    # CytoDLConfig subtype, config will be initialized in constructor and
+    # calls to train/predict can be run immediately
     def __init__(self):
         self.cfg = None
         self._training = False

From 45b1a5a877407868e1a388a86a683e08cf5c256d Mon Sep 17 00:00:00 2001
From: Daniel Saelid <daniel.saelid@alleninstitute.org>
Date: Mon, 25 Mar 2024 16:58:47 -0700
Subject: [PATCH 02/11] added lots of config methods, WIP

---
 cyto_dl/api/config/cyto_dl_config.py          | 102 +++++++++++++++---
 .../api/config/segmentation_plugin_config.py  |  63 +++++++++--
 2 files changed, 144 insertions(+), 21 deletions(-)

diff --git a/cyto_dl/api/config/cyto_dl_config.py b/cyto_dl/api/config/cyto_dl_config.py
index a54969756..17517b9d2 100644
--- a/cyto_dl/api/config/cyto_dl_config.py
+++ b/cyto_dl/api/config/cyto_dl_config.py
@@ -1,6 +1,11 @@
 from abc import ABC, abstractmethod
 from pathlib import Path
-from typing import Optional, Dict, Any
+from typing import Optional, List, Any
+from hydra import compose, initialize_config_dir
+from hydra.core.global_hydra import GlobalHydra
+from omegaconf import OmegaConf, open_dict, DictConfig
+import pyrootutils
+from copy import deepcopy
 from cyto_dl.api.data import *
 
 
@@ -9,9 +14,6 @@ class CytoDLConfig(ABC):
     A CytoDLConfig represents the configuration for a CytoDLModel. Can be
     passed to CytoDLModel on initialization to create a fully-functional model.
     """
-    # Note: internally, Config classes will just be used to store overrides and
-    # possibly a config filepath, while CytoDLModel will handle actually generating
-    # the full config
     @abstractmethod
     def __init__(self, config_filepath: Optional[Path]=None, train: bool=True):
         """
@@ -20,26 +22,96 @@ def __init__(self, config_filepath: Optional[Path]=None, train: bool=True):
         :param train: indicates whether this config will be used for training or prediction
         """
         self._config_filepath: str = config_filepath
-        self._overrides: Dict[str, Any] = {}
-        self._overrides["train"] = train
-        self._overrides["test"] = train
+        self._train: bool = train
+        self._cfg: DictConfig = OmegaConf.load(config_filepath) if config_filepath else self._generate_default_config()
+        OmegaConf.update(self._cfg, "train", train)
+        OmegaConf.update(self._cfg, "test", train)
+        # afaik, task_name isn't used outside of template_utils.py - do we need to support this?
+        OmegaConf.update(self._cfg, "task_name", "train" if train else "predict")
+
+        # we currently have an override for ['mode'] in ml-seg, but I can't find top-level 'mode' in the configs, 
+        # do we need to support this?
     
+    def _generate_default_config(self) -> DictConfig:
+        cfg_dir: Path = pyrootutils.find_root(search_from=__file__, indicator=("pyproject.toml", "README.md")) / "configs"
+        GlobalHydra.instance().clear()
+        with initialize_config_dir(version_base="1.2", config_dir=str(cfg_dir)):
+            cfg: DictConfig = compose(
+                config_name="train.yaml" if self._train else "eval.yaml",
+                return_hydra_config=True,
+                overrides=[f"experiment=im2im/{self._get_experiment_type().name}"],
+            )
+        with open_dict(cfg):
+            del cfg["hydra"]
+            cfg.extras.enforce_tags = False
+            cfg.extras.print_config = False
+        return cfg
+
     @abstractmethod
-    def get_experiment_type(self) -> ExperimentType:
+    def _get_experiment_type(self) -> ExperimentType:
         """
         Return experiment type for this config (e.g. segmentation_plugin, gan, etc)
         """
         pass
-
-    def get_config_filepath(self) -> Optional[Path]:
-        return self._config_filepath
     
-    def set_hardware_type(self, hardware_type: HardwareType) -> None:
-        self._overrides["trainer.accelerator"] = hardware_type.value
+    def _key_exists(self, k: str) -> bool:
+        keys: List[str] = k.split(".")
+        curr_dict: DictConfig = self._cfg
+        while keys:
+            key: str = keys.pop(0)
+            if not key in curr_dict:
+                return False
+            curr_dict = curr_dict[key]
+        return True
+    
+    def _set_cfg(self, k: str, v: Any) -> None:
+        if not self._key_exists(k):
+            raise KeyError(f"{k} not found in config dict")
+        OmegaConf.update(self._cfg, k, v)
+
+    def _get_cfg(self, k: str) -> Any:
+        if not self._key_exists(k):
+            raise KeyError(f"{k} not found in config dict")
+        return OmegaConf.select(self._cfg, k)
     
     def set_experiment_name(self, name: str) -> None:
-        self._overrides["experiment_name"] = name
+        self._set_cfg("experiment_name", name)
     
+    def get_experiment_name(self) -> str:
+        return self._get_cfg("experiment_name")
+
     def set_ckpt_path(self, ckpt_path: Path) -> None:
-        self._overrides["ckpt_path"] = str(ckpt_path.resolve())
+        self._set_cfg("ckpt_path", str(ckpt_path.resolve()))
+    
+    def get_ckpt_path(self) -> Path:
+        return Path(self._get_cfg("ckpt_path"))
+
+    def set_hardware_type(self, hardware_type: HardwareType) -> None:
+        self._set_cfg("trainer.accelerator", hardware_type.value)
+    
+    def get_hardware_type(self) -> HardwareType:
+        return HardwareType(self._get_cfg("trainer.accelerator"))
+
+    def set_max_epochs(self, max_epochs: int) -> None:
+        self._set_cfg("trainer.max_epochs", max_epochs)
+    
+    def get_max_epochs(self) -> int:
+        return self._get_cfg("trainer.max_epochs")
+    
+    def set_output_dir(self, output_dir: Path) -> None:
+        self._set_cfg("paths.output_dir", str(output_dir))
+    
+    def get_output_dir(self) -> Path:
+        return Path(self._get_cfg("paths.output_dir"))
+    
+    # I can't find where this is actually used in cyto_dl, do we need to support this?
+    def set_work_dir(self, work_dir: Path) -> None:
+        self._set_cfg("paths.work_dir", str(work_dir))
+    
+    def get_work_dir(self) -> Path:
+        return Path(self._get_cfg("paths.work_dir"))
+    
+    def get_config(self) -> DictConfig:
+        return deepcopy(self._cfg)
+
 
diff --git a/cyto_dl/api/config/segmentation_plugin_config.py b/cyto_dl/api/config/segmentation_plugin_config.py
index 8799e4838..6186386ca 100644
--- a/cyto_dl/api/config/segmentation_plugin_config.py
+++ b/cyto_dl/api/config/segmentation_plugin_config.py
@@ -1,11 +1,62 @@
-from typing import Optional
+from typing import Optional, Tuple, List
 from pathlib import Path
 from cyto_dl.api.config import CytoDLConfig
 from cyto_dl.api.data import *
-
+# TODO: get rid of using Path or any non-primitive data type since there are format strings in config sometimes...
 class SegmentationPluginConfig(CytoDLConfig):
-    def __init__(self, config_filepath: Optional[Path]=None):
-        super().__init__(config_filepath)
+    def __init__(self, config_filepath: Optional[Path] = None, train: bool = True):
+        super().__init__(config_filepath, train)
     
-    def get_experiment_type(self) -> ExperimentType:
-        return ExperimentType.SEGMENTATION_PLUGIN
\ No newline at end of file
+    def _get_experiment_type(self) -> ExperimentType:
+        return ExperimentType.SEGMENTATION_PLUGIN
+    
+    # a lot of these keys are duplicated across the im2im experiment types (but not present in top-level
+    # train.yaml or eval.yaml) - should we move these into the top-level configs and move these setters and
+    # getters accordingly? 
+    def set_spatial_dims(self, spatial_dims: int) -> None:
+        self._set_cfg("spatial_dims", spatial_dims)
+    
+    def get_spatial_dims(self) -> int:
+        return self._get_cfg("spatial_dims")
+    
+    def set_input_channel(self, input_channel: int) -> None:
+        self._set_cfg("input_channel", input_channel)
+    
+    def get_input_channel(self) -> int:
+        return self._get_cfg("input_channel")
+    
+    def set_raw_image_channels(self, channels: int) -> None:
+        self._set_cfg("raw_im_channels", channels)
+    
+    def get_raw_image_channels(self) -> int:
+        return self._get_cfg("raw_im_channels")
+    
+    def set_data_path(self, data_path: Path) -> None:
+        self._set_cfg("data.path", str(data_path))
+    
+    def get_data_path(self) -> Path:
+        return Path(self._get_cfg("data.path"))
+    
+    # TODO: is there a better way to deal with column names + split columns?
+    def set_manifest_column_names(self, source: str, target1: str, target2: str, merge_mask: str, exclude_mask: str, base_image: str) -> None:
+        self._set_cfg("source_col", source)
+        self._set_cfg("target_col1", target1)
+        self._set_cfg("target_col2", target2)
+        self._set_cfg("merge_mask_col", merge_mask)
+        self._set_cfg("exclude_mask_col", exclude_mask)
+        self._set_cfg("base_image_col", base_image)
+    
+    def get_manifest_column_names(self) -> Tuple[str, str, str, str, str, str]:
+        return self._get_cfg("source_col"), self._get_cfg("target_col1"), self._get_cfg("target_col2"), self._get_cfg("merge_mask_col"), self._get_cfg("exclude_mask_col"), self._get_cfg("base_image_col")
+    
+    def set_split_column(self, split_column: str) -> None:
+        self._set_cfg("data.split_column", split_column)
+        # need to also add it to the list of columns
+        existing_cols: List[str] = self._get_cfg("data.columns")
+        if len(existing_cols) > len(self.get_manifest_column_names()):
+            existing_cols[-1] = split_column
+        else:
+            existing_cols.append(split_column)
+    
+    def get_split_column(self) -> str:
+        self._get_cfg("data.split_column")

From d645db92c8533899b1e5a1a2e656253588329610 Mon Sep 17 00:00:00 2001
From: Daniel Saelid <daniel.saelid@alleninstitute.org>
Date: Wed, 27 Mar 2024 14:54:42 -0700
Subject: [PATCH 03/11] move setters/getters into subclass

---
 cyto_dl/api/config/cyto_dl_config.py          | 34 ++---------------
 .../api/config/segmentation_plugin_config.py  | 37 +++++++++++++++----
 2 files changed, 34 insertions(+), 37 deletions(-)

diff --git a/cyto_dl/api/config/cyto_dl_config.py b/cyto_dl/api/config/cyto_dl_config.py
index 17517b9d2..0dfc7847f 100644
--- a/cyto_dl/api/config/cyto_dl_config.py
+++ b/cyto_dl/api/config/cyto_dl_config.py
@@ -14,7 +14,6 @@ class CytoDLConfig(ABC):
     A CytoDLConfig represents the configuration for a CytoDLModel. Can be
     passed to CytoDLModel on initialization to create a fully-functional model.
     """
-    @abstractmethod
     def __init__(self, config_filepath: Optional[Path]=None, train: bool=True):
         """
         :param config_filepath: path to a .yaml config file that will be used as the basis
@@ -24,10 +23,10 @@ def __init__(self, config_filepath: Optional[Path]=None, train: bool=True):
         self._config_filepath: str = config_filepath
         self._train: bool = train
         self._cfg: DictConfig = OmegaConf.load(config_filepath) if config_filepath else self._generate_default_config()
-        OmegaConf.update(self._cfg, "train", train)
-        OmegaConf.update(self._cfg, "test", train)
+        self._set_cfg("train", train)
+        self._set_cfg("test", train)
         # afaik, task_name isn't used outside of template_utils.py - do we need to support this?
-        OmegaConf.update(self._cfg, "task_name", "train" if train else "predict")
+        self._set_cfg("task_name", "train" if train else "predict")
 
         # we currently have an override for ['mode'] in ml-seg, but I can't find top-level 'mode' in the configs, 
         # do we need to support this?
@@ -37,7 +36,7 @@ def _generate_default_config(self) -> DictConfig:
         GlobalHydra.instance().clear()
         with initialize_config_dir(version_base="1.2", config_dir=str(cfg_dir)):
             cfg: DictConfig = compose(
-                config_name="train.yaml" if self._train else "eval.yaml",
+                config_name="train.yaml", # only using train.yaml after conversation w/ Benji
                 return_hydra_config=True,
                 overrides=[f"experiment=im2im/{self._get_experiment_type().name}"],
             )
@@ -85,31 +84,6 @@ def set_ckpt_path(self, ckpt_path: Path) -> None:
     
     def get_ckpt_path(self) -> Path:
         return Path(self._get_cfg("ckpt_path"))
-
-    def set_hardware_type(self, hardware_type: HardwareType) -> None:
-        self._set_cfg("trainer.accelerator", hardware_type.value)
-    
-    def get_hardware_type(self) -> HardwareType:
-        return HardwareType(self._get_cfg("trainer.accelerator"))
-
-    def set_max_epochs(self, max_epochs: int) -> None:
-        self._set_cfg("trainer.max_epochs", max_epochs)
-    
-    def get_max_epochs(self) -> int:
-        return self._get_cfg("trainer.max_epochs")
-    
-    def set_output_dir(self, output_dir: Path) -> None:
-        self._set_cfg("paths.output_dir", str(output_dir))
-    
-    def get_output_dir(self) -> Path:
-        return Path(self._get_cfg("paths.output_dir"))
-    
-    # I can't find where this is actually used in cyto_dl, do we need to support this?
-    def set_work_dir(self, work_dir: Path) -> None:
-        self._set_cfg("paths.work_dir", str(work_dir))
-    
-    def get_work_dir(self) -> Path:
-        return Path(self._get_cfg("paths.work_dir"))
     
     def get_config(self) -> DictConfig:
         return deepcopy(self._cfg)
diff --git a/cyto_dl/api/config/segmentation_plugin_config.py b/cyto_dl/api/config/segmentation_plugin_config.py
index 6186386ca..d4137b9c6 100644
--- a/cyto_dl/api/config/segmentation_plugin_config.py
+++ b/cyto_dl/api/config/segmentation_plugin_config.py
@@ -1,11 +1,9 @@
-from typing import Optional, Tuple, List
+from typing import Tuple, List, Union
 from pathlib import Path
 from cyto_dl.api.config import CytoDLConfig
 from cyto_dl.api.data import *
-# TODO: get rid of using Path or any non-primitive data type since there are format strings in config sometimes...
+
 class SegmentationPluginConfig(CytoDLConfig):
-    def __init__(self, config_filepath: Optional[Path] = None, train: bool = True):
-        super().__init__(config_filepath, train)
     
     def _get_experiment_type(self) -> ExperimentType:
         return ExperimentType.SEGMENTATION_PLUGIN
@@ -31,11 +29,11 @@ def set_raw_image_channels(self, channels: int) -> None:
     def get_raw_image_channels(self) -> int:
         return self._get_cfg("raw_im_channels")
     
-    def set_data_path(self, data_path: Path) -> None:
+    def set_data_path(self, data_path: Union[str, Path]) -> None:
         self._set_cfg("data.path", str(data_path))
     
-    def get_data_path(self) -> Path:
-        return Path(self._get_cfg("data.path"))
+    def get_data_path(self) -> str:
+        return self._get_cfg("data.path")
     
     # TODO: is there a better way to deal with column names + split columns?
     def set_manifest_column_names(self, source: str, target1: str, target2: str, merge_mask: str, exclude_mask: str, base_image: str) -> None:
@@ -60,3 +58,28 @@ def set_split_column(self, split_column: str) -> None:
     
     def get_split_column(self) -> str:
         self._get_cfg("data.split_column")
+    
+    def set_hardware_type(self, hardware_type: HardwareType) -> None:
+        self._set_cfg("trainer.accelerator", hardware_type.value)
+    
+    def get_hardware_type(self) -> HardwareType:
+        return HardwareType(self._get_cfg("trainer.accelerator"))
+
+    def set_max_epochs(self, max_epochs: int) -> None:
+        self._set_cfg("trainer.max_epochs", max_epochs)
+    
+    def get_max_epochs(self) -> int:
+        return self._get_cfg("trainer.max_epochs")
+    
+    def set_output_dir(self, output_dir: Union[str, Path]) -> None:
+        self._set_cfg("paths.output_dir", str(output_dir))
+    
+    def get_output_dir(self) -> str:
+        return self._get_cfg("paths.output_dir")
+    
+    # I can't find where this is actually used in cyto_dl, do we need to support this?
+    def set_work_dir(self, work_dir: Union[str, Path]) -> None:
+        self._set_cfg("paths.work_dir", str(work_dir))
+    
+    def get_work_dir(self) -> str:
+        return self._get_cfg("paths.work_dir")

From 4a14fd4f3141645d79c75a936f12cd9e7a8fae64 Mon Sep 17 00:00:00 2001
From: Daniel Saelid <daniel.saelid@alleninstitute.org>
Date: Wed, 27 Mar 2024 16:09:39 -0700
Subject: [PATCH 04/11] refactored config classes to model classes

---
 cyto_dl/api/config/__init__.py                |  1 -
 cyto_dl/api/cyto_dl_model/__init__.py         |  2 +
 .../cyto_dl_base_model.py}                    | 70 ++++++++++++++-----
 .../segmentation_plugin_model.py}             | 43 ++++--------
 4 files changed, 68 insertions(+), 48 deletions(-)
 delete mode 100644 cyto_dl/api/config/__init__.py
 create mode 100644 cyto_dl/api/cyto_dl_model/__init__.py
 rename cyto_dl/api/{config/cyto_dl_config.py => cyto_dl_model/cyto_dl_base_model.py} (58%)
 rename cyto_dl/api/{config/segmentation_plugin_config.py => cyto_dl_model/segmentation_plugin_model.py} (75%)

diff --git a/cyto_dl/api/config/__init__.py b/cyto_dl/api/config/__init__.py
deleted file mode 100644
index d2c4c03ef..000000000
--- a/cyto_dl/api/config/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .cyto_dl_config import CytoDLConfig
\ No newline at end of file
diff --git a/cyto_dl/api/cyto_dl_model/__init__.py b/cyto_dl/api/cyto_dl_model/__init__.py
new file mode 100644
index 000000000..398554b63
--- /dev/null
+++ b/cyto_dl/api/cyto_dl_model/__init__.py
@@ -0,0 +1,2 @@
+from .cyto_dl_base_model import CytoDLBaseModel
+from .segmentation_plugin_model import SegmentationPluginModel
\ No newline at end of file
diff --git a/cyto_dl/api/config/cyto_dl_config.py b/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py
similarity index 58%
rename from cyto_dl/api/config/cyto_dl_config.py
rename to cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py
index 0dfc7847f..d95c1ae7b 100644
--- a/cyto_dl/api/config/cyto_dl_config.py
+++ b/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py
@@ -1,35 +1,27 @@
 from abc import ABC, abstractmethod
 from pathlib import Path
-from typing import Optional, List, Any
+from typing import Optional, Union, List, Any
 from hydra import compose, initialize_config_dir
 from hydra.core.global_hydra import GlobalHydra
 from omegaconf import OmegaConf, open_dict, DictConfig
 import pyrootutils
 from copy import deepcopy
 from cyto_dl.api.data import *
+from cyto_dl.eval import evaluate as evaluate_model
+from cyto_dl.train import train as train_model
 
 
-class CytoDLConfig(ABC):
+class CytoDLBaseModel(ABC):
     """
-    A CytoDLConfig represents the configuration for a CytoDLModel. Can be
-    passed to CytoDLModel on initialization to create a fully-functional model.
+    A CytoDLBaseModel is used to configure, train, and run predictions on a cyto-dl model.
     """
-    def __init__(self, config_filepath: Optional[Path]=None, train: bool=True):
+    def __init__(self, config_filepath: Optional[Path]=None):
         """
         :param config_filepath: path to a .yaml config file that will be used as the basis
-        for this CytoDLConfig. If None, a default config will be used instead.
-        :param train: indicates whether this config will be used for training or prediction
+        for this CytoDLBaseModel (must be generated by the CytoDLBaseModel subclass that wants
+        to use it). If None, a default config will be used instead.
         """
-        self._config_filepath: str = config_filepath
-        self._train: bool = train
         self._cfg: DictConfig = OmegaConf.load(config_filepath) if config_filepath else self._generate_default_config()
-        self._set_cfg("train", train)
-        self._set_cfg("test", train)
-        # afaik, task_name isn't used outside of template_utils.py - do we need to support this?
-        self._set_cfg("task_name", "train" if train else "predict")
-
-        # we currently have an override for ['mode'] in ml-seg, but I can't find top-level 'mode' in the configs, 
-        # do we need to support this?
     
     def _generate_default_config(self) -> DictConfig:
         cfg_dir: Path = pyrootutils.find_root(search_from=__file__, indicator=("pyproject.toml", "README.md")) / "configs"
@@ -73,19 +65,59 @@ def _get_cfg(self, k: str) -> Any:
             raise KeyError(f"{k} not found in config dict")
         return OmegaConf.select(self._cfg, k)
     
+    def _set_training_config(self, train: bool):
+        self._set_cfg("train", train)
+        self._set_cfg("test", train)
+        # afaik, task_name isn't used outside of template_utils.py - do we need to support this?
+        self._set_cfg("task_name", "train" if train else "predict")
+    
+    @abstractmethod
+    def _set_max_epochs(self, max_epochs: int) -> None:
+        pass
+
+    # is 'manifest' a good word for the path to the directory (or CSV?) with list of images
+    # to use for training, evaluation, etc?
+    @abstractmethod
+    def _set_manifest_path(self, manifest_path: Union[str, Path]) -> None:
+        pass
+
+    @abstractmethod
+    def _set_output_dir(self, output_dir: Union[str, Path]) -> None:
+        pass
+
     def set_experiment_name(self, name: str) -> None:
         self._set_cfg("experiment_name", name)
     
     def get_experiment_name(self) -> str:
         return self._get_cfg("experiment_name")
 
-    def set_ckpt_path(self, ckpt_path: Path) -> None:
+    def set_ckpt_path(self, ckpt_path: Union[str, Path]) -> None:
         self._set_cfg("ckpt_path", str(ckpt_path.resolve()))
     
-    def get_ckpt_path(self) -> Path:
-        return Path(self._get_cfg("ckpt_path"))
+    def get_ckpt_path(self) -> str:
+        return self._get_cfg("ckpt_path")
     
     def get_config(self) -> DictConfig:
         return deepcopy(self._cfg)
+    
+    def save_config(self, path: Path) -> None:
+        OmegaConf.save(self._cfg, path)
+    
+    def train(self, max_epochs: int, manifest_path: Union[str, Path], output_dir: Union[str, Path]) -> None:
+        self._set_training_config(True)
+        self._set_max_epochs(max_epochs)
+        self._set_manifest_path(manifest_path)
+        self._set_output_dir(output_dir)
+        train_model(self._cfg)
+    
+    def predict(self, manifest_path: Union[str, Path], output_dir: Union[str, Path]) -> None:
+        if self.get_ckpt_path() is None:
+            raise RuntimeError("Cannot make predictions when checkpoint unspecified")
+
+        self._set_training_config(False)
+        self._set_manifest_path(manifest_path)
+        self._set_output_dir(output_dir)
+        evaluate_model(self._cfg)
+    
 
 
diff --git a/cyto_dl/api/config/segmentation_plugin_config.py b/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py
similarity index 75%
rename from cyto_dl/api/config/segmentation_plugin_config.py
rename to cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py
index d4137b9c6..3ae724537 100644
--- a/cyto_dl/api/config/segmentation_plugin_config.py
+++ b/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py
@@ -1,13 +1,25 @@
 from typing import Tuple, List, Union
 from pathlib import Path
-from cyto_dl.api.config import CytoDLConfig
+from cyto_dl.api.cyto_dl_model import CytoDLBaseModel
 from cyto_dl.api.data import *
 
-class SegmentationPluginConfig(CytoDLConfig):
-    
+class SegmentationPluginModel(CytoDLBaseModel):
+    # we currently have an override for ['mode'] in ml-seg, but I can't find top-level 'mode' in the configs, 
+    # do we need to support this?
     def _get_experiment_type(self) -> ExperimentType:
         return ExperimentType.SEGMENTATION_PLUGIN
     
+    def _set_max_epochs(self, max_epochs: int) -> None:
+        self._set_cfg("trainer.max_epochs", max_epochs)
+
+    def _set_manifest_path(self, manifest_path: Union[str, Path]) -> None:
+        self._set_cfg("data.path", str(manifest_path))
+    
+    def _set_output_dir(self, output_dir: Union[str, Path]) -> None:
+        self._set_cfg("paths.output_dir", str(output_dir))
+        # I can't find where work_dir is actually used in cyto_dl, do we need to support this?
+        self._set_cfg("paths.work_dir", str(output_dir))
+
     # a lot of these keys are duplicated across the im2im experiment types (but not present in top-level
     # train.yaml or eval.yaml) - should we move these into the top-level configs and move these setters and
     # getters accordingly? 
@@ -29,12 +41,6 @@ def set_raw_image_channels(self, channels: int) -> None:
     def get_raw_image_channels(self) -> int:
         return self._get_cfg("raw_im_channels")
     
-    def set_data_path(self, data_path: Union[str, Path]) -> None:
-        self._set_cfg("data.path", str(data_path))
-    
-    def get_data_path(self) -> str:
-        return self._get_cfg("data.path")
-    
     # TODO: is there a better way to deal with column names + split columns?
     def set_manifest_column_names(self, source: str, target1: str, target2: str, merge_mask: str, exclude_mask: str, base_image: str) -> None:
         self._set_cfg("source_col", source)
@@ -64,22 +70,3 @@ def set_hardware_type(self, hardware_type: HardwareType) -> None:
     
     def get_hardware_type(self) -> HardwareType:
         return HardwareType(self._get_cfg("trainer.accelerator"))
-
-    def set_max_epochs(self, max_epochs: int) -> None:
-        self._set_cfg("trainer.max_epochs", max_epochs)
-    
-    def get_max_epochs(self) -> int:
-        return self._get_cfg("trainer.max_epochs")
-    
-    def set_output_dir(self, output_dir: Union[str, Path]) -> None:
-        self._set_cfg("paths.output_dir", str(output_dir))
-    
-    def get_output_dir(self) -> str:
-        return self._get_cfg("paths.output_dir")
-    
-    # I can't find where this is actually used in cyto_dl, do we need to support this?
-    def set_work_dir(self, work_dir: Union[str, Path]) -> None:
-        self._set_cfg("paths.work_dir", str(work_dir))
-    
-    def get_work_dir(self) -> str:
-        return self._get_cfg("paths.work_dir")

From afe7069669e321004b30b2742f458939228206ae Mon Sep 17 00:00:00 2001
From: Daniel Saelid <daniel.saelid@alleninstitute.org>
Date: Wed, 27 Mar 2024 16:27:10 -0700
Subject: [PATCH 05/11] I think all currently-used overrides are accounted for
 now

---
 .../api/cyto_dl_model/segmentation_plugin_model.py  | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py b/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py
index 3ae724537..ece0dd67a 100644
--- a/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py
+++ b/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py
@@ -1,6 +1,7 @@
-from typing import Tuple, List, Union
+from typing import Tuple, List, Union, Optional
 from pathlib import Path
 from cyto_dl.api.cyto_dl_model import CytoDLBaseModel
+from omegaconf import ListConfig
 from cyto_dl.api.data import *
 
 class SegmentationPluginModel(CytoDLBaseModel):
@@ -65,6 +66,16 @@ def set_split_column(self, split_column: str) -> None:
     def get_split_column(self) -> str:
         self._get_cfg("data.split_column")
     
+    # is patch_shape required in order to run training/prediction?
+    # if so, it should be an argument to train/predict/__init__, or a default
+    # should be set in the config
+    def set_patch_size(self, patch_size: PatchSize) -> None:
+        self._set_cfg("data._aux.patch_shape", patch_size.value)
+    
+    def get_patch_size(self) -> Optional[PatchSize]:
+        p_shape: ListConfig = self._get_cfg("data._aux.patch_shape")
+        return PatchSize(list(p_shape)) if p_shape else None
+    
     def set_hardware_type(self, hardware_type: HardwareType) -> None:
         self._set_cfg("trainer.accelerator", hardware_type.value)
     

From 58d35986490b3445a9a0632a841f9d16b69b254f Mon Sep 17 00:00:00 2001
From: Daniel Saelid <daniel.saelid@alleninstitute.org>
Date: Wed, 27 Mar 2024 16:32:25 -0700
Subject: [PATCH 06/11] pre-commit

---
 cyto_dl/api/cyto_dl_model/__init__.py         |  2 +-
 .../api/cyto_dl_model/cyto_dl_base_model.py   | 61 ++++++++++---------
 .../segmentation_plugin_model.py              | 60 +++++++++++-------
 3 files changed, 72 insertions(+), 51 deletions(-)

diff --git a/cyto_dl/api/cyto_dl_model/__init__.py b/cyto_dl/api/cyto_dl_model/__init__.py
index 398554b63..c7a32124c 100644
--- a/cyto_dl/api/cyto_dl_model/__init__.py
+++ b/cyto_dl/api/cyto_dl_model/__init__.py
@@ -1,2 +1,2 @@
 from .cyto_dl_base_model import CytoDLBaseModel
-from .segmentation_plugin_model import SegmentationPluginModel
\ No newline at end of file
+from .segmentation_plugin_model import SegmentationPluginModel
diff --git a/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py b/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py
index d95c1ae7b..4363a9b84 100644
--- a/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py
+++ b/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py
@@ -1,34 +1,40 @@
 from abc import ABC, abstractmethod
+from copy import deepcopy
 from pathlib import Path
-from typing import Optional, Union, List, Any
+from typing import Any, List, Optional, Union
+
+import pyrootutils
 from hydra import compose, initialize_config_dir
 from hydra.core.global_hydra import GlobalHydra
-from omegaconf import OmegaConf, open_dict, DictConfig
-import pyrootutils
-from copy import deepcopy
+from omegaconf import DictConfig, OmegaConf, open_dict
+
 from cyto_dl.api.data import *
 from cyto_dl.eval import evaluate as evaluate_model
 from cyto_dl.train import train as train_model
 
 
 class CytoDLBaseModel(ABC):
-    """
-    A CytoDLBaseModel is used to configure, train, and run predictions on a cyto-dl model.
-    """
-    def __init__(self, config_filepath: Optional[Path]=None):
+    """A CytoDLBaseModel is used to configure, train, and run predictions on a cyto-dl model."""
+
+    def __init__(self, config_filepath: Optional[Path] = None):
         """
         :param config_filepath: path to a .yaml config file that will be used as the basis
         for this CytoDLBaseModel (must be generated by the CytoDLBaseModel subclass that wants
         to use it). If None, a default config will be used instead.
         """
-        self._cfg: DictConfig = OmegaConf.load(config_filepath) if config_filepath else self._generate_default_config()
-    
+        self._cfg: DictConfig = (
+            OmegaConf.load(config_filepath) if config_filepath else self._generate_default_config()
+        )
+
     def _generate_default_config(self) -> DictConfig:
-        cfg_dir: Path = pyrootutils.find_root(search_from=__file__, indicator=("pyproject.toml", "README.md")) / "configs"
+        cfg_dir: Path = (
+            pyrootutils.find_root(search_from=__file__, indicator=("pyproject.toml", "README.md"))
+            / "configs"
+        )
         GlobalHydra.instance().clear()
         with initialize_config_dir(version_base="1.2", config_dir=str(cfg_dir)):
             cfg: DictConfig = compose(
-                config_name="train.yaml", # only using train.yaml after conversation w/ Benji
+                config_name="train.yaml",  # only using train.yaml after conversation w/ Benji
                 return_hydra_config=True,
                 overrides=[f"experiment=im2im/{self._get_experiment_type().name}"],
             )
@@ -40,11 +46,9 @@ def _generate_default_config(self) -> DictConfig:
 
     @abstractmethod
     def _get_experiment_type(self) -> ExperimentType:
-        """
-        Return experiment type for this config (e.g. segmentation_plugin, gan, etc)
-        """
+        """Return experiment type for this config (e.g. segmentation_plugin, gan, etc)"""
         pass
-    
+
     def _key_exists(self, k: str) -> bool:
         keys: List[str] = k.split(".")
         curr_dict: DictConfig = self._cfg
@@ -54,7 +58,7 @@ def _key_exists(self, k: str) -> bool:
                 return False
             curr_dict = curr_dict[key]
         return True
-    
+
     def _set_cfg(self, k: str, v: Any) -> None:
         if not self._key_exists(k):
             raise KeyError(f"{k} not found in config dict")
@@ -64,13 +68,13 @@ def _get_cfg(self, k: str) -> Any:
         if not self._key_exists(k):
             raise KeyError(f"{k} not found in config dict")
         return OmegaConf.select(self._cfg, k)
-    
+
     def _set_training_config(self, train: bool):
         self._set_cfg("train", train)
         self._set_cfg("test", train)
         # afaik, task_name isn't used outside of template_utils.py - do we need to support this?
         self._set_cfg("task_name", "train" if train else "predict")
-    
+
     @abstractmethod
     def _set_max_epochs(self, max_epochs: int) -> None:
         pass
@@ -87,29 +91,31 @@ def _set_output_dir(self, output_dir: Union[str, Path]) -> None:
 
     def set_experiment_name(self, name: str) -> None:
         self._set_cfg("experiment_name", name)
-    
+
     def get_experiment_name(self) -> str:
         return self._get_cfg("experiment_name")
 
     def set_ckpt_path(self, ckpt_path: Union[str, Path]) -> None:
         self._set_cfg("ckpt_path", str(ckpt_path.resolve()))
-    
+
     def get_ckpt_path(self) -> str:
         return self._get_cfg("ckpt_path")
-    
+
     def get_config(self) -> DictConfig:
         return deepcopy(self._cfg)
-    
+
     def save_config(self, path: Path) -> None:
         OmegaConf.save(self._cfg, path)
-    
-    def train(self, max_epochs: int, manifest_path: Union[str, Path], output_dir: Union[str, Path]) -> None:
+
+    def train(
+        self, max_epochs: int, manifest_path: Union[str, Path], output_dir: Union[str, Path]
+    ) -> None:
         self._set_training_config(True)
         self._set_max_epochs(max_epochs)
         self._set_manifest_path(manifest_path)
         self._set_output_dir(output_dir)
         train_model(self._cfg)
-    
+
     def predict(self, manifest_path: Union[str, Path], output_dir: Union[str, Path]) -> None:
         if self.get_ckpt_path() is None:
             raise RuntimeError("Cannot make predictions when checkpoint unspecified")
@@ -118,6 +124,3 @@ def predict(self, manifest_path: Union[str, Path], output_dir: Union[str, Path])
         self._set_manifest_path(manifest_path)
         self._set_output_dir(output_dir)
         evaluate_model(self._cfg)
-    
-
-
diff --git a/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py b/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py
index ece0dd67a..59cb6da0c 100644
--- a/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py
+++ b/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py
@@ -1,21 +1,24 @@
-from typing import Tuple, List, Union, Optional
 from pathlib import Path
-from cyto_dl.api.cyto_dl_model import CytoDLBaseModel
+from typing import List, Optional, Tuple, Union
+
 from omegaconf import ListConfig
+
+from cyto_dl.api.cyto_dl_model import CytoDLBaseModel
 from cyto_dl.api.data import *
 
+
 class SegmentationPluginModel(CytoDLBaseModel):
-    # we currently have an override for ['mode'] in ml-seg, but I can't find top-level 'mode' in the configs, 
+    # we currently have an override for ['mode'] in ml-seg, but I can't find top-level 'mode' in the configs,
     # do we need to support this?
     def _get_experiment_type(self) -> ExperimentType:
         return ExperimentType.SEGMENTATION_PLUGIN
-    
+
     def _set_max_epochs(self, max_epochs: int) -> None:
         self._set_cfg("trainer.max_epochs", max_epochs)
 
     def _set_manifest_path(self, manifest_path: Union[str, Path]) -> None:
         self._set_cfg("data.path", str(manifest_path))
-    
+
     def _set_output_dir(self, output_dir: Union[str, Path]) -> None:
         self._set_cfg("paths.output_dir", str(output_dir))
         # I can't find where work_dir is actually used in cyto_dl, do we need to support this?
@@ -23,37 +26,52 @@ def _set_output_dir(self, output_dir: Union[str, Path]) -> None:
 
     # a lot of these keys are duplicated across the im2im experiment types (but not present in top-level
     # train.yaml or eval.yaml) - should we move these into the top-level configs and move these setters and
-    # getters accordingly? 
+    # getters accordingly?
     def set_spatial_dims(self, spatial_dims: int) -> None:
         self._set_cfg("spatial_dims", spatial_dims)
-    
+
     def get_spatial_dims(self) -> int:
         return self._get_cfg("spatial_dims")
-    
+
     def set_input_channel(self, input_channel: int) -> None:
         self._set_cfg("input_channel", input_channel)
-    
+
     def get_input_channel(self) -> int:
         return self._get_cfg("input_channel")
-    
+
     def set_raw_image_channels(self, channels: int) -> None:
         self._set_cfg("raw_im_channels", channels)
-    
+
     def get_raw_image_channels(self) -> int:
         return self._get_cfg("raw_im_channels")
-    
+
     # TODO: is there a better way to deal with column names + split columns?
-    def set_manifest_column_names(self, source: str, target1: str, target2: str, merge_mask: str, exclude_mask: str, base_image: str) -> None:
+    def set_manifest_column_names(
+        self,
+        source: str,
+        target1: str,
+        target2: str,
+        merge_mask: str,
+        exclude_mask: str,
+        base_image: str,
+    ) -> None:
         self._set_cfg("source_col", source)
         self._set_cfg("target_col1", target1)
         self._set_cfg("target_col2", target2)
         self._set_cfg("merge_mask_col", merge_mask)
         self._set_cfg("exclude_mask_col", exclude_mask)
         self._set_cfg("base_image_col", base_image)
-    
+
     def get_manifest_column_names(self) -> Tuple[str, str, str, str, str, str]:
-        return self._get_cfg("source_col"), self._get_cfg("target_col1"), self._get_cfg("target_col2"), self._get_cfg("merge_mask_col"), self._get_cfg("exclude_mask_col"), self._get_cfg("base_image_col")
-    
+        return (
+            self._get_cfg("source_col"),
+            self._get_cfg("target_col1"),
+            self._get_cfg("target_col2"),
+            self._get_cfg("merge_mask_col"),
+            self._get_cfg("exclude_mask_col"),
+            self._get_cfg("base_image_col"),
+        )
+
     def set_split_column(self, split_column: str) -> None:
         self._set_cfg("data.split_column", split_column)
         # need to also add it to the list of columns
@@ -62,22 +80,22 @@ def set_split_column(self, split_column: str) -> None:
             existing_cols[-1] = split_column
         else:
             existing_cols.append(split_column)
-    
+
     def get_split_column(self) -> str:
         self._get_cfg("data.split_column")
-    
+
     # is patch_shape required in order to run training/prediction?
     # if so, it should be an argument to train/predict/__init__, or a default
     # should be set in the config
     def set_patch_size(self, patch_size: PatchSize) -> None:
         self._set_cfg("data._aux.patch_shape", patch_size.value)
-    
+
     def get_patch_size(self) -> Optional[PatchSize]:
         p_shape: ListConfig = self._get_cfg("data._aux.patch_shape")
         return PatchSize(list(p_shape)) if p_shape else None
-    
+
     def set_hardware_type(self, hardware_type: HardwareType) -> None:
         self._set_cfg("trainer.accelerator", hardware_type.value)
-    
+
     def get_hardware_type(self) -> HardwareType:
         return HardwareType(self._get_cfg("trainer.accelerator"))

From 12d613669d4b812c88478266274855a8d4215cce Mon Sep 17 00:00:00 2001
From: Daniel Saelid <daniel.saelid@alleninstitute.org>
Date: Fri, 29 Mar 2024 16:05:42 -0700
Subject: [PATCH 07/11] added tests for seg plugin

---
 .../api/cyto_dl_model/cyto_dl_base_model.py   |  33 +++--
 .../segmentation_plugin_model.py              |  24 +++-
 tests/api/cyto_dl_model/bad_config.yaml       |   5 +
 .../test_segmentation_plugin_model.py         | 134 ++++++++++++++++++
 4 files changed, 179 insertions(+), 17 deletions(-)
 create mode 100644 tests/api/cyto_dl_model/bad_config.yaml
 create mode 100644 tests/api/cyto_dl_model/test_segmentation_plugin_model.py

diff --git a/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py b/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py
index 4363a9b84..542d87201 100644
--- a/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py
+++ b/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py
@@ -33,10 +33,16 @@ def _generate_default_config(self) -> DictConfig:
         )
         GlobalHydra.instance().clear()
         with initialize_config_dir(version_base="1.2", config_dir=str(cfg_dir)):
+            # the overrides for 'paths.' are necessary to make later overrides work (for some reason I don't fully understand)
+            # I'd prefer to do this in experiment configs instead of the overrides here
             cfg: DictConfig = compose(
                 config_name="train.yaml",  # only using train.yaml after conversation w/ Benji
                 return_hydra_config=True,
-                overrides=[f"experiment=im2im/{self._get_experiment_type().name}"],
+                overrides=[
+                    f"experiment=im2im/{self._get_experiment_type().name}",
+                    "paths.output_dir=PLACEHOLDER",
+                    "paths.work_dir=PLACEHOLDER",
+                ],
             )
         with open_dict(cfg):
             del cfg["hydra"]
@@ -89,18 +95,16 @@ def _set_manifest_path(self, manifest_path: Union[str, Path]) -> None:
     def _set_output_dir(self, output_dir: Union[str, Path]) -> None:
         pass
 
+    def _set_ckpt(self, ckpt: Optional[Path]) -> None:
+        self._set_cfg("ckpt_path", str(ckpt.resolve()) if ckpt else ckpt)
+
+    # does experiment name have any effect?
     def set_experiment_name(self, name: str) -> None:
         self._set_cfg("experiment_name", name)
 
     def get_experiment_name(self) -> str:
         return self._get_cfg("experiment_name")
 
-    def set_ckpt_path(self, ckpt_path: Union[str, Path]) -> None:
-        self._set_cfg("ckpt_path", str(ckpt_path.resolve()))
-
-    def get_ckpt_path(self) -> str:
-        return self._get_cfg("ckpt_path")
-
     def get_config(self) -> DictConfig:
         return deepcopy(self._cfg)
 
@@ -108,19 +112,24 @@ def save_config(self, path: Path) -> None:
         OmegaConf.save(self._cfg, path)
 
     def train(
-        self, max_epochs: int, manifest_path: Union[str, Path], output_dir: Union[str, Path]
+        self,
+        max_epochs: int,
+        manifest_path: Union[str, Path],
+        output_dir: Union[str, Path],
+        checkpoint: Optional[Path] = None,
     ) -> None:
         self._set_training_config(True)
         self._set_max_epochs(max_epochs)
         self._set_manifest_path(manifest_path)
         self._set_output_dir(output_dir)
+        self._set_ckpt(checkpoint)
         train_model(self._cfg)
 
-    def predict(self, manifest_path: Union[str, Path], output_dir: Union[str, Path]) -> None:
-        if self.get_ckpt_path() is None:
-            raise RuntimeError("Cannot make predictions when checkpoint unspecified")
-
+    def predict(
+        self, manifest_path: Union[str, Path], output_dir: Union[str, Path], checkpoint: Path
+    ) -> None:
         self._set_training_config(False)
         self._set_manifest_path(manifest_path)
         self._set_output_dir(output_dir)
+        self._set_ckpt(checkpoint)
         evaluate_model(self._cfg)
diff --git a/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py b/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py
index 59cb6da0c..783876d49 100644
--- a/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py
+++ b/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py
@@ -8,6 +8,13 @@
 
 
 class SegmentationPluginModel(CytoDLBaseModel):
+    """A SegmentationPluginModel handles configuration, training, and prediction using the default
+    segmentation_plugin experiment from CytoDL."""
+
+    def __init__(self, config_filepath: Optional[Path] = None):
+        super().__init__(config_filepath)
+        self._has_split_column = False
+
     # we currently have an override for ['mode'] in ml-seg, but I can't find top-level 'mode' in the configs,
     # do we need to support this?
     def _get_experiment_type(self) -> ExperimentType:
@@ -74,15 +81,22 @@ def get_manifest_column_names(self) -> Tuple[str, str, str, str, str, str]:
 
     def set_split_column(self, split_column: str) -> None:
         self._set_cfg("data.split_column", split_column)
-        # need to also add it to the list of columns
-        existing_cols: List[str] = self._get_cfg("data.columns")
-        if len(existing_cols) > len(self.get_manifest_column_names()):
+        existing_cols: ListConfig = self._get_cfg("data.columns")
+        if self._has_split_column:
             existing_cols[-1] = split_column
         else:
             existing_cols.append(split_column)
+        self._has_split_column = True
+
+    def get_split_column(self) -> Optional[str]:
+        return self._get_cfg("data.split_column")
 
-    def get_split_column(self) -> str:
-        self._get_cfg("data.split_column")
+    def remove_split_column(self) -> None:
+        if self._has_split_column:
+            self._set_cfg("data.split_column", None)
+            existing_cols: ListConfig = self._get_cfg("data.columns")
+            del existing_cols[-1]
+        self._has_split_column = False
 
     # is patch_shape required in order to run training/prediction?
     # if so, it should be an argument to train/predict/__init__, or a default
diff --git a/tests/api/cyto_dl_model/bad_config.yaml b/tests/api/cyto_dl_model/bad_config.yaml
new file mode 100644
index 000000000..24122169c
--- /dev/null
+++ b/tests/api/cyto_dl_model/bad_config.yaml
@@ -0,0 +1,5 @@
+this: 1
+is:
+  a: 2
+bad: 3
+config: 4
diff --git a/tests/api/cyto_dl_model/test_segmentation_plugin_model.py b/tests/api/cyto_dl_model/test_segmentation_plugin_model.py
new file mode 100644
index 000000000..8a76111e9
--- /dev/null
+++ b/tests/api/cyto_dl_model/test_segmentation_plugin_model.py
@@ -0,0 +1,134 @@
+from pathlib import Path
+from unittest.mock import Mock, patch
+
+import pytest
+
+from cyto_dl.api.cyto_dl_model import SegmentationPluginModel
+
+
+# Taken together, these tests verify that the SegmentationPluginModel aligns with the
+# default segmentation_plugin config file. It does this by a) verifying that methods which
+# attempt to change config items on a bad (fake) config throw exceptions because those config
+# items do not exist and b) verifying that those methods do not throw exceptions when called on
+# a model that uses the default config file
+#
+# Note that these tests do not test whether or not the config variables set by SegmentationPluginModel
+# are actually used how we expect by Cyto-DL, just that they exist in the default config.
+@pytest.fixture
+def model_with_default_config() -> SegmentationPluginModel:
+    return SegmentationPluginModel()
+
+
+@pytest.fixture
+def model_with_bad_config() -> SegmentationPluginModel:
+    return SegmentationPluginModel(config_filepath=Path(__file__).parent / "bad_config.yaml")
+
+
+class TestDefaultConfig:
+    @patch("cyto_dl.api.cyto_dl_model.cyto_dl_base_model.train_model")
+    def test_train_no_ckpt(
+        self, train_model_mock: Mock, model_with_default_config: SegmentationPluginModel
+    ):
+        model_with_default_config.train(1, "manifest", "output_dir")
+        train_model_mock.assert_called_once()
+
+    @patch("cyto_dl.api.cyto_dl_model.cyto_dl_base_model.train_model")
+    def test_train_with_ckpt(
+        self, train_model_mock: Mock, model_with_default_config: SegmentationPluginModel
+    ):
+        model_with_default_config.train(1, "manifest", "output_dir", checkpoint=Path("ckpt"))
+        train_model_mock.assert_called_once()
+
+    @patch("cyto_dl.api.cyto_dl_model.cyto_dl_base_model.evaluate_model")
+    def test_predict(
+        self, evaluate_model_mock: Mock, model_with_default_config: SegmentationPluginModel
+    ):
+        model_with_default_config.predict("manifest", "output_dir", Path("ckpt"))
+        evaluate_model_mock.assert_called_once()
+
+    def test_spatial_dims(self, model_with_default_config: SegmentationPluginModel):
+        assert model_with_default_config.get_spatial_dims() is not None
+        model_with_default_config.set_spatial_dims(24)
+        assert model_with_default_config.get_spatial_dims() == 24
+
+    def test_input_channel(self, model_with_default_config: SegmentationPluginModel):
+        assert model_with_default_config.get_input_channel() is not None
+        model_with_default_config.set_input_channel(785)
+        assert model_with_default_config.get_input_channel() == 785
+
+    def test_raw_image_channels(self, model_with_default_config: SegmentationPluginModel):
+        assert model_with_default_config.get_raw_image_channels() is not None
+        model_with_default_config.set_raw_image_channels(35)
+        assert model_with_default_config.get_raw_image_channels() == 35
+
+    def test_manifest_column_names(self, model_with_default_config: SegmentationPluginModel):
+        assert len(model_with_default_config.get_manifest_column_names()) == 6
+        model_with_default_config.set_manifest_column_names("a", "b", "c", "d", "e", "f")
+        assert model_with_default_config.get_manifest_column_names() == (
+            "a",
+            "b",
+            "c",
+            "d",
+            "e",
+            "f",
+        )
+
+    def test_split_column(self, model_with_default_config: SegmentationPluginModel):
+        assert model_with_default_config.get_split_column() is None
+        model_with_default_config.set_split_column("foo")
+        assert model_with_default_config.get_split_column() == "foo"
+        model_with_default_config.remove_split_column()
+        assert model_with_default_config.get_split_column() is None
+
+
+class TestBadConfig:
+    @patch("cyto_dl.api.cyto_dl_model.cyto_dl_base_model.train_model")
+    def test_train_no_ckpt(
+        self, train_model_mock: Mock, model_with_bad_config: SegmentationPluginModel
+    ):
+        with pytest.raises(KeyError):
+            model_with_bad_config.train(1, "manifest", "output_dir")
+
+    @patch("cyto_dl.api.cyto_dl_model.cyto_dl_base_model.train_model")
+    def test_train_with_ckpt(
+        self, train_model_mock: Mock, model_with_bad_config: SegmentationPluginModel
+    ):
+        with pytest.raises(KeyError):
+            model_with_bad_config.train(1, "manifest", "output_dir", checkpoint=Path("ckpt"))
+
+    @patch("cyto_dl.api.cyto_dl_model.cyto_dl_base_model.evaluate_model")
+    def test_predict(
+        self, evaluate_model_mock: Mock, model_with_bad_config: SegmentationPluginModel
+    ):
+        with pytest.raises(KeyError):
+            model_with_bad_config.predict("manifest", "output_dir", Path("ckpt"))
+
+    def test_spatial_dims(self, model_with_bad_config: SegmentationPluginModel):
+        with pytest.raises(KeyError):
+            model_with_bad_config.get_spatial_dims()
+        with pytest.raises(KeyError):
+            model_with_bad_config.set_spatial_dims(24)
+
+    def test_input_channel(self, model_with_bad_config: SegmentationPluginModel):
+        with pytest.raises(KeyError):
+            model_with_bad_config.get_input_channel()
+        with pytest.raises(KeyError):
+            model_with_bad_config.set_input_channel(785)
+
+    def test_raw_image_channels(self, model_with_bad_config: SegmentationPluginModel):
+        with pytest.raises(KeyError):
+            model_with_bad_config.get_raw_image_channels()
+        with pytest.raises(KeyError):
+            model_with_bad_config.set_raw_image_channels(35)
+
+    def test_manifest_column_names(self, model_with_bad_config: SegmentationPluginModel):
+        with pytest.raises(KeyError):
+            model_with_bad_config.get_manifest_column_names()
+        with pytest.raises(KeyError):
+            model_with_bad_config.set_manifest_column_names("a", "b", "c", "d", "e", "f")
+
+    def test_split_column(self, model_with_bad_config: SegmentationPluginModel):
+        with pytest.raises(KeyError):
+            model_with_bad_config.get_split_column()
+        with pytest.raises(KeyError):
+            model_with_bad_config.set_split_column("foo")

From 33bee7ac8833c56b8aed77f5a21c39bc15bbaeeb Mon Sep 17 00:00:00 2001
From: Daniel Saelid <daniel.saelid@alleninstitute.org>
Date: Fri, 12 Apr 2024 11:50:39 -0700
Subject: [PATCH 08/11] group abstract methods together

---
 .../api/cyto_dl_model/cyto_dl_base_model.py   | 36 +++++++++----------
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py b/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py
index 542d87201..dbc2bb5d1 100644
--- a/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py
+++ b/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py
@@ -26,6 +26,23 @@ def __init__(self, config_filepath: Optional[Path] = None):
             OmegaConf.load(config_filepath) if config_filepath else self._generate_default_config()
         )
 
+    @abstractmethod
+    def _get_experiment_type(self) -> ExperimentType:
+        """Return experiment type for this config (e.g. segmentation_plugin, gan, etc)"""
+        pass
+    
+    @abstractmethod
+    def _set_max_epochs(self, max_epochs: int) -> None:
+        pass
+
+    @abstractmethod
+    def _set_manifest_path(self, manifest_path: Union[str, Path]) -> None:
+        pass
+
+    @abstractmethod
+    def _set_output_dir(self, output_dir: Union[str, Path]) -> None:
+        pass
+
     def _generate_default_config(self) -> DictConfig:
         cfg_dir: Path = (
             pyrootutils.find_root(search_from=__file__, indicator=("pyproject.toml", "README.md"))
@@ -50,11 +67,6 @@ def _generate_default_config(self) -> DictConfig:
             cfg.extras.print_config = False
         return cfg
 
-    @abstractmethod
-    def _get_experiment_type(self) -> ExperimentType:
-        """Return experiment type for this config (e.g. segmentation_plugin, gan, etc)"""
-        pass
-
     def _key_exists(self, k: str) -> bool:
         keys: List[str] = k.split(".")
         curr_dict: DictConfig = self._cfg
@@ -81,20 +93,6 @@ def _set_training_config(self, train: bool):
         # afaik, task_name isn't used outside of template_utils.py - do we need to support this?
         self._set_cfg("task_name", "train" if train else "predict")
 
-    @abstractmethod
-    def _set_max_epochs(self, max_epochs: int) -> None:
-        pass
-
-    # is 'manifest' a good word for the path to the directory (or CSV?) with list of images
-    # to use for training, evaluation, etc?
-    @abstractmethod
-    def _set_manifest_path(self, manifest_path: Union[str, Path]) -> None:
-        pass
-
-    @abstractmethod
-    def _set_output_dir(self, output_dir: Union[str, Path]) -> None:
-        pass
-
     def _set_ckpt(self, ckpt: Optional[Path]) -> None:
         self._set_cfg("ckpt_path", str(ckpt.resolve()) if ckpt else ckpt)
 

From 8fc070da84f40bae93effd9e53e07236f777ad00 Mon Sep 17 00:00:00 2001
From: Daniel Saelid <daniel.saelid@alleninstitute.org>
Date: Fri, 12 Apr 2024 13:18:01 -0700
Subject: [PATCH 09/11] lint + lowercase experiment type

---
 cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py b/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py
index dbc2bb5d1..be8c1bea2 100644
--- a/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py
+++ b/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py
@@ -30,7 +30,7 @@ def __init__(self, config_filepath: Optional[Path] = None):
     def _get_experiment_type(self) -> ExperimentType:
         """Return experiment type for this config (e.g. segmentation_plugin, gan, etc)"""
         pass
-    
+
     @abstractmethod
     def _set_max_epochs(self, max_epochs: int) -> None:
         pass
@@ -56,7 +56,7 @@ def _generate_default_config(self) -> DictConfig:
                 config_name="train.yaml",  # only using train.yaml after conversation w/ Benji
                 return_hydra_config=True,
                 overrides=[
-                    f"experiment=im2im/{self._get_experiment_type().name}",
+                    f"experiment=im2im/{self._get_experiment_type().name.lower()}",
                     "paths.output_dir=PLACEHOLDER",
                     "paths.work_dir=PLACEHOLDER",
                 ],

From 889775c5b15fcbf38de32cce0dee57cade5afe53 Mon Sep 17 00:00:00 2001
From: Daniel Saelid <daniel.saelid@alleninstitute.org>
Date: Fri, 12 Apr 2024 13:23:23 -0700
Subject: [PATCH 10/11] flake

---
 cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py        | 4 ++--
 cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py b/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py
index be8c1bea2..8986da834 100644
--- a/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py
+++ b/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py
@@ -8,7 +8,7 @@
 from hydra.core.global_hydra import GlobalHydra
 from omegaconf import DictConfig, OmegaConf, open_dict
 
-from cyto_dl.api.data import *
+from cyto_dl.api.data import ExperimentType
 from cyto_dl.eval import evaluate as evaluate_model
 from cyto_dl.train import train as train_model
 
@@ -72,7 +72,7 @@ def _key_exists(self, k: str) -> bool:
         curr_dict: DictConfig = self._cfg
         while keys:
             key: str = keys.pop(0)
-            if not key in curr_dict:
+            if key not in curr_dict:
                 return False
             curr_dict = curr_dict[key]
         return True
diff --git a/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py b/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py
index 783876d49..68a0b9deb 100644
--- a/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py
+++ b/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py
@@ -4,7 +4,7 @@
 from omegaconf import ListConfig
 
 from cyto_dl.api.cyto_dl_model import CytoDLBaseModel
-from cyto_dl.api.data import *
+from cyto_dl.api.data import ExperimentType, HardwareType, PatchSize
 
 
 class SegmentationPluginModel(CytoDLBaseModel):

From 73ef86cf49f6a4ecfb36d5920f33a4fd0eee8bb9 Mon Sep 17 00:00:00 2001
From: Daniel Saelid <daniel.saelid@alleninstitute.org>
Date: Mon, 15 Apr 2024 11:49:01 -0700
Subject: [PATCH 11/11] move overrides to top-level

---
 .../experiment/im2im/segmentation_plugin.yaml | 34 +++++-----
 .../api/cyto_dl_model/cyto_dl_base_model.py   | 66 +++++++++++--------
 .../segmentation_plugin_model.py              | 24 ++-----
 .../test_segmentation_plugin_model.py         | 15 +----
 4 files changed, 65 insertions(+), 74 deletions(-)

diff --git a/configs/experiment/im2im/segmentation_plugin.yaml b/configs/experiment/im2im/segmentation_plugin.yaml
index c69274c39..bff9895c9 100644
--- a/configs/experiment/im2im/segmentation_plugin.yaml
+++ b/configs/experiment/im2im/segmentation_plugin.yaml
@@ -6,16 +6,21 @@ defaults:
   - override /model: im2im/segmentation_plugin.yaml
   - override /callbacks: default.yaml
   - override /trainer: gpu.yaml
-  - override /logger: mlflow.yaml
+  - override /logger: csv.yaml
 
 # all parameters below will be merged with parameters from default configurations set above
 # this allows you to overwrite only specified parameters
 
+# parameters with value MUST_OVERRIDE must be overridden before using this config, all other
+# parameters have a reasonable default value
+
 tags: ["dev"]
 seed: 12345
 
-experiment_name: YOUR_EXP_NAME
-run_name: YOUR_RUN_NAME
+ckpt_path: null # must override for prediction
+
+experiment_name: experiment_name
+run_name: run_name
 
 # manifest columns
 source_col: raw
@@ -26,24 +31,21 @@ exclude_mask_col: exclude_mask
 base_image_col: base_image
 
 # data params
-spatial_dims: 3
+spatial_dims: MUST_OVERRIDE # int value, req for first training, should not change after
 input_channel: 0
 raw_im_channels: 1
 
 trainer:
-  max_epochs: 100
+  max_epochs: 1 # must override for training
+  accelerator: gpu
 
 data:
-  path: ${paths.data_dir}/example_experiment_data/s3_data
-  cache_dir: ${paths.data_dir}/example_experiment_data/cache
+  path: MUST_OVERRIDE # string path to manifest
+  split_column: null
   batch_size: 1
   _aux:
-    patch_shape:
-# small, medium, large
-# 32 pix, 64 pix, 128 pix
-
-# OVERRIDE:
-# data._aux.patch_shape
-# model._aux.strides
-# model._aux.kernel_size
-# model._aux.upsample_kernel_size
+    patch_shape: [16, 32, 32]
+
+paths:
+  output_dir: MUST_OVERRIDE
+  work_dir: ${paths.output_dir} # it's unclear to me if this is necessary or used
diff --git a/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py b/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py
index 8986da834..2261b951d 100644
--- a/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py
+++ b/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py
@@ -12,60 +12,74 @@
 from cyto_dl.eval import evaluate as evaluate_model
 from cyto_dl.train import train as train_model
 
+# TODO: encapsulate experiment management (file system) details here, will require passing output_dir
+# into the factory methods, maybe
+
 
 class CytoDLBaseModel(ABC):
     """A CytoDLBaseModel is used to configure, train, and run predictions on a cyto-dl model."""
 
-    def __init__(self, config_filepath: Optional[Path] = None):
-        """
-        :param config_filepath: path to a .yaml config file that will be used as the basis
-        for this CytoDLBaseModel (must be generated by the CytoDLBaseModel subclass that wants
-        to use it). If None, a default config will be used instead.
+    def __init__(self, cfg: DictConfig):
+        """Not intended for direct use by clients.
+
+        Please see the classmethod factory methods instead.
         """
-        self._cfg: DictConfig = (
-            OmegaConf.load(config_filepath) if config_filepath else self._generate_default_config()
-        )
+        self._cfg: DictConfig = cfg
 
+    @classmethod
     @abstractmethod
-    def _get_experiment_type(self) -> ExperimentType:
+    def _get_experiment_type(cls) -> ExperimentType:
         """Return experiment type for this config (e.g. segmentation_plugin, gan, etc)"""
         pass
 
-    @abstractmethod
-    def _set_max_epochs(self, max_epochs: int) -> None:
-        pass
+    @classmethod
+    def from_existing_config(cls, config_filepath: Path):
+        """Returns a model from an existing config.
 
-    @abstractmethod
-    def _set_manifest_path(self, manifest_path: Union[str, Path]) -> None:
-        pass
+        :param config_filepath: path to a .yaml config file that will be used as the basis
+        for this CytoDLBaseModel (must be generated by the CytoDLBaseModel subclass that wants
+        to use it).
+        """
+        return cls(OmegaConf.load(config_filepath))
 
-    @abstractmethod
-    def _set_output_dir(self, output_dir: Union[str, Path]) -> None:
-        pass
+    # TODO: if spatial_dims is only ever 2 or 3, create an enum for it
+    @classmethod
+    def from_default_config(cls, spatial_dims: int):
+        """Returns a model from the default config.
 
-    def _generate_default_config(self) -> DictConfig:
+        :param spatial_dims: dimensions for the model (e.g. 2)
+        """
         cfg_dir: Path = (
             pyrootutils.find_root(search_from=__file__, indicator=("pyproject.toml", "README.md"))
             / "configs"
         )
         GlobalHydra.instance().clear()
         with initialize_config_dir(version_base="1.2", config_dir=str(cfg_dir)):
-            # the overrides for 'paths.' are necessary to make later overrides work (for some reason I don't fully understand)
-            # I'd prefer to do this in experiment configs instead of the overrides here
             cfg: DictConfig = compose(
-                config_name="train.yaml",  # only using train.yaml after conversation w/ Benji
+                config_name="train.yaml",  # train.yaml can work for prediction too
                 return_hydra_config=True,
                 overrides=[
-                    f"experiment=im2im/{self._get_experiment_type().name.lower()}",
-                    "paths.output_dir=PLACEHOLDER",
-                    "paths.work_dir=PLACEHOLDER",
+                    f"experiment=im2im/{cls._get_experiment_type().name.lower()}",
+                    f"spatial_dims={spatial_dims}",
                 ],
             )
         with open_dict(cfg):
             del cfg["hydra"]
             cfg.extras.enforce_tags = False
             cfg.extras.print_config = False
-        return cfg
+        return cls(cfg)
+
+    @abstractmethod
+    def _set_max_epochs(self, max_epochs: int) -> None:
+        pass
+
+    @abstractmethod
+    def _set_manifest_path(self, manifest_path: Union[str, Path]) -> None:
+        pass
+
+    @abstractmethod
+    def _set_output_dir(self, output_dir: Union[str, Path]) -> None:
+        pass
 
     def _key_exists(self, k: str) -> bool:
         keys: List[str] = k.split(".")
diff --git a/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py b/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py
index 68a0b9deb..0e014dbae 100644
--- a/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py
+++ b/cyto_dl/api/cyto_dl_model/segmentation_plugin_model.py
@@ -1,7 +1,7 @@
 from pathlib import Path
 from typing import List, Optional, Tuple, Union
 
-from omegaconf import ListConfig
+from omegaconf import DictConfig, ListConfig
 
 from cyto_dl.api.cyto_dl_model import CytoDLBaseModel
 from cyto_dl.api.data import ExperimentType, HardwareType, PatchSize
@@ -11,13 +11,12 @@ class SegmentationPluginModel(CytoDLBaseModel):
     """A SegmentationPluginModel handles configuration, training, and prediction using the default
     segmentation_plugin experiment from CytoDL."""
 
-    def __init__(self, config_filepath: Optional[Path] = None):
-        super().__init__(config_filepath)
+    def __init__(self, cfg: DictConfig):
+        super().__init__(cfg)
         self._has_split_column = False
 
-    # we currently have an override for ['mode'] in ml-seg, but I can't find top-level 'mode' in the configs,
-    # do we need to support this?
-    def _get_experiment_type(self) -> ExperimentType:
+    @classmethod
+    def _get_experiment_type(cls) -> ExperimentType:
         return ExperimentType.SEGMENTATION_PLUGIN
 
     def _set_max_epochs(self, max_epochs: int) -> None:
@@ -28,18 +27,8 @@ def _set_manifest_path(self, manifest_path: Union[str, Path]) -> None:
 
     def _set_output_dir(self, output_dir: Union[str, Path]) -> None:
         self._set_cfg("paths.output_dir", str(output_dir))
-        # I can't find where work_dir is actually used in cyto_dl, do we need to support this?
         self._set_cfg("paths.work_dir", str(output_dir))
 
-    # a lot of these keys are duplicated across the im2im experiment types (but not present in top-level
-    # train.yaml or eval.yaml) - should we move these into the top-level configs and move these setters and
-    # getters accordingly?
-    def set_spatial_dims(self, spatial_dims: int) -> None:
-        self._set_cfg("spatial_dims", spatial_dims)
-
-    def get_spatial_dims(self) -> int:
-        return self._get_cfg("spatial_dims")
-
     def set_input_channel(self, input_channel: int) -> None:
         self._set_cfg("input_channel", input_channel)
 
@@ -98,9 +87,6 @@ def remove_split_column(self) -> None:
             del existing_cols[-1]
         self._has_split_column = False
 
-    # is patch_shape required in order to run training/prediction?
-    # if so, it should be an argument to train/predict/__init__, or a default
-    # should be set in the config
     def set_patch_size(self, patch_size: PatchSize) -> None:
         self._set_cfg("data._aux.patch_shape", patch_size.value)
 
diff --git a/tests/api/cyto_dl_model/test_segmentation_plugin_model.py b/tests/api/cyto_dl_model/test_segmentation_plugin_model.py
index 8a76111e9..d2190d1ea 100644
--- a/tests/api/cyto_dl_model/test_segmentation_plugin_model.py
+++ b/tests/api/cyto_dl_model/test_segmentation_plugin_model.py
@@ -16,12 +16,12 @@
 # are actually used how we expect by Cyto-DL, just that they exist in the default config.
 @pytest.fixture
 def model_with_default_config() -> SegmentationPluginModel:
-    return SegmentationPluginModel()
+    return SegmentationPluginModel.from_default_config(3)
 
 
 @pytest.fixture
 def model_with_bad_config() -> SegmentationPluginModel:
-    return SegmentationPluginModel(config_filepath=Path(__file__).parent / "bad_config.yaml")
+    return SegmentationPluginModel.from_existing_config(Path(__file__).parent / "bad_config.yaml")
 
 
 class TestDefaultConfig:
@@ -46,11 +46,6 @@ def test_predict(
         model_with_default_config.predict("manifest", "output_dir", Path("ckpt"))
         evaluate_model_mock.assert_called_once()
 
-    def test_spatial_dims(self, model_with_default_config: SegmentationPluginModel):
-        assert model_with_default_config.get_spatial_dims() is not None
-        model_with_default_config.set_spatial_dims(24)
-        assert model_with_default_config.get_spatial_dims() == 24
-
     def test_input_channel(self, model_with_default_config: SegmentationPluginModel):
         assert model_with_default_config.get_input_channel() is not None
         model_with_default_config.set_input_channel(785)
@@ -103,12 +98,6 @@ def test_predict(
         with pytest.raises(KeyError):
             model_with_bad_config.predict("manifest", "output_dir", Path("ckpt"))
 
-    def test_spatial_dims(self, model_with_bad_config: SegmentationPluginModel):
-        with pytest.raises(KeyError):
-            model_with_bad_config.get_spatial_dims()
-        with pytest.raises(KeyError):
-            model_with_bad_config.set_spatial_dims(24)
-
     def test_input_channel(self, model_with_bad_config: SegmentationPluginModel):
         with pytest.raises(KeyError):
             model_with_bad_config.get_input_channel()