Api updates (#79)

* pluralize sessions in batch * fix type annotation * updates * improve coverage * prepare 24.1a5.dev * fix docs * update docs * remove extra whitespace
USEPA · Nov 12, 2024 · 4f97dea · 4f97dea
1 parent 5754ab4
commit 4f97dea
Show file tree

Hide file tree

Showing 6 changed files with 155 additions and 43 deletions.
diff --git a/docs/source/development.md b/docs/source/development.md
@@ -90,6 +90,34 @@ make docs-clean #  Clean documentation
 
 Using the `make serve` command is recommended for editing documentation; it updates the preview in realtime files are saved.
 
+## Versioning
+
+We use [calendar versioning](https://calver.org/) for `pybmds`, where:
+
+* `major` is the year of the release (ex: `28` for a 2028 release)
+* `minor` is incremented for each release of the calendar year, starting at `1`
+* `aN` is the alpha release for testing, where N starts at `1`
+* `dev` is any upcoming pre-release currently under development.
+
+As an example, consider the scenario where we're beginning development our first release in 2028:
+
+* In `pybmds.__init__`, set `__version__ = "28.1a1.dev"`
+* Iterate until we're ready for an alpha release
+    * Update the version to `28.1a1`, and git tag the release `28.1a1`
+    * Immediately change the `main` branch to `28.1a2.dev`
+* Begin testing of `28.1a1`
+    * If changes are needed, iterate on `28.1a2.dev`
+    * If changes are not needed, release a `28.1` by changing the version and minting a tag
+
+The [packaging](https://packaging.pypa.io/en/stable/index.html) package implements [PEP440](https://peps.python.org/pep-0440/), and can be used to check candidate versions:
+
+```python
+from packaging.version import Version
+
+Version('28.1a1.dev')
+# _Version(release=(28, 1), pre=('a', 1), dev=('dev', 0))
+```
+
 ### Priors Report
 
 The `pybmds` package includes Bayesian priors and frequentist parameter initialization settings that have been tuned to help improve model fit performance. To generate a report of the settings in all permutations, run the following command:

diff --git a/docs/source/recipes/custom-excel-exports.ipynb b/docs/source/recipes/custom-excel-exports.ipynb
@@ -101,9 +101,9 @@
     "        sess.add_model(Model, settings)\n",
     "\n",
     "    option_sets = [\n",
-    "        (pybmds.ContinuousRiskType.RelativeDeviation, 0.1, \n",
+    "        (pybmds.ContinuousRiskType.RelativeDeviation, 0.1,\n",
     "         pybmds.ContinuousDistType.normal),\n",
-    "        (pybmds.ContinuousRiskType.RelativeDeviation, 0.1, \n",
+    "        (pybmds.ContinuousRiskType.RelativeDeviation, 0.1,\n",
     "         pybmds.ContinuousDistType.normal_ncv),\n",
     "    ]\n",
     "    sessions = []\n",
@@ -225,7 +225,7 @@
    "outputs": [],
    "source": [
     "rows = []\n",
-    "for i, session in enumerate(sess_batch.session):\n",
+    "for i, session in enumerate(sess_batch.sessions):\n",
     "    for j, model in enumerate(session.models):\n",
     "        data = {\n",
     "            \"session_index\": i,\n",
@@ -356,7 +356,7 @@
    },
    "outputs": [],
    "source": [
-    "model = sess_batch.session[0].models[0]\n",
+    "model = sess_batch.sessions[0].models[0]\n",
     "res = model.results"
    ]
   },

diff --git a/src/pybmds/__init__.py b/src/pybmds/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "24.1a4"
+__version__ = "24.1a5"  # see docs/development for versioning
 
 from .batch import BatchResponse, BatchSession  # noqa: F401
 from .constants import DistType as ContinuousDistType  # noqa: F401

diff --git a/src/pybmds/batch.py b/src/pybmds/batch.py
@@ -10,7 +10,8 @@
 import pandas as pd
 from tqdm import tqdm
 
-from .datasets.base import DatasetBase
+from .constants import Dtype
+from .datasets.base import DatasetType
 from .models.multi_tumor import Multitumor
 from .reporting.styling import Report, write_citation
 from .session import Session
@@ -25,11 +26,24 @@ class BatchBase:
     pass
 
 
+def _make_zip(data: str, archive: Path):
+    with zipfile.ZipFile(
+        archive, mode="w", compression=zipfile.ZIP_DEFLATED, compresslevel=9
+    ) as zf:
+        zf.writestr("data.json", data=data)
+
+
+def _load_zip(archive: Path) -> str:
+    with zipfile.ZipFile(archive) as zf:
+        with zf.open("data.json") as f:
+            return f.read()
+
+
 class BatchSession(BatchBase):
     def __init__(self, sessions: list[Session] | None = None):
         if sessions is None:
             sessions = []
-        self.session: list[Session] = sessions
+        self.sessions: list[Session] = sessions
         self.errors = []
 
     def df_summary(self) -> pd.DataFrame:
@@ -43,13 +57,13 @@ def df_summary(self) -> pd.DataFrame:
                 ),
                 clean=False,
             )
-            for idx, session in enumerate(self.session)
+            for idx, session in enumerate(self.sessions)
         ]
         return pd.concat(dfs).dropna(axis=1, how="all").fillna("")
 
     def df_dataset(self) -> pd.DataFrame:
         data: list[dict] = []
-        for idx, session in enumerate(self.session):
+        for idx, session in enumerate(self.sessions):
             data.extend(
                 session.dataset.rows(
                     extras=dict(
@@ -64,11 +78,16 @@ def df_dataset(self) -> pd.DataFrame:
 
     def df_params(self) -> pd.DataFrame:
         data: list[dict] = []
-        for idx, session in enumerate(self.session):
+        for idx, session in enumerate(self.sessions):
             for model_index, model in enumerate(session.models):
                 if model.has_results:
+                    func = (
+                        model.results.parameter_rows
+                        if session.dataset.dtype is Dtype.NESTED_DICHOTOMOUS
+                        else model.results.parameters.rows
+                    )
                     data.extend(
-                        model.results.parameters.rows(
+                        func(
                             extras=dict(
                                 session_index=idx,
                                 session_id=session.id,
@@ -122,7 +141,7 @@ def to_docx(
         if report is None:
             report = Report.build_default()
 
-        for session in self.session:
+        for session in self.sessions:
             session.to_docx(
                 report,
                 header_level=header_level,
@@ -133,7 +152,7 @@ def to_docx(
                 session_inputs_table=session_inputs_table,
             )
 
-        if citation and len(self.session) > 0:
+        if citation and len(self.sessions) > 0:
             write_citation(report, header_level=header_level)
 
         return report.document
@@ -144,16 +163,19 @@ def serialize(self) -> str:
         Returns:
             str: A JSON string
         """
-        return json.dumps([session.to_dict() for session in self.session])
+        return json.dumps([session.to_dict() for session in self.sessions])
 
     @classmethod
     def execute(
-        cls, datasets: list[DatasetBase], runner: Callable, nprocs: int | None = None
+        cls,
+        datasets: list[DatasetType],
+        runner: Callable[[DatasetType], BatchResponse],
+        nprocs: int | None = None,
     ) -> Self:
         """Execute sessions using multiple processors.
 
         Args:
-            datasets (list[DatasetBase]): The datasets to execute
+            datasets (list[DatasetType]): The datasets to execute
             runner (Callable[dataset] -> BatchResponse): The method which executes a session
             nprocs (Optional[int]): the number of processors to use; defaults to N-1. If 1 is
                 specified; the batch session is called linearly without a process pool
@@ -187,9 +209,9 @@ def execute(
             if result.success:
                 if isinstance(result.content, list):
                     for item in result.content:
-                        batch.session.append(Session.from_serialized(item))
+                        batch.sessions.append(Session.from_serialized(item))
                 else:
-                    batch.session.append(Session.from_serialized(result.content))
+                    batch.sessions.append(Session.from_serialized(result.content))
             else:
                 batch.errors.append(result.content)
 
@@ -216,28 +238,22 @@ def load(cls, archive: Path) -> Self:
         Returns:
             BatchSession: An instance of this session
         """
-        with zipfile.ZipFile(archive) as zf:
-            with zf.open("data.json") as f:
-                data = f.read()
-        return BatchSession.deserialize(data)
+        return BatchSession.deserialize(_load_zip(archive))
 
     def save(self, archive: Path):
         """Save Session to a compressed zipfile
 
         Args:
             fn (Path): The zipfile path
         """
-        with zipfile.ZipFile(
-            archive, mode="w", compression=zipfile.ZIP_DEFLATED, compresslevel=9
-        ) as zf:
-            zf.writestr("data.json", data=self.serialize())
+        return _make_zip(self.serialize(), archive)
 
 
 class MultitumorBatch(BatchBase):
     def __init__(self, sessions: list[Multitumor] | None = None):
         if sessions is None:
             sessions = []
-        self.session: list[Multitumor] = sessions
+        self.sessions: list[Multitumor] = sessions
         self.errors = []
 
     def to_docx(
@@ -260,20 +276,42 @@ def to_docx(
         if report is None:
             report = Report.build_default()
 
-        for session in self.session:
+        for session in self.sessions:
             session.to_docx(
                 report,
                 header_level=header_level,
                 citation=False,
             )
 
-        if citation and len(self.session) > 0:
+        if citation and len(self.sessions) > 0:
             write_citation(report, header_level=header_level)
 
         return report.document
 
     def serialize(self) -> str:
-        return json.dumps([session.to_dict() for session in self.session])
+        return json.dumps([session.to_dict() for session in self.sessions])
+
+    @classmethod
+    def execute(cls, datasets: list[dict], runner: Callable, nprocs: int | None = None) -> Self:
+        """Execute sessions using multiple processors.
+
+        Args:
+            datasets (list[dict]): The datasets to execute
+            runner (Callable[dict] -> Multitumor): The method which executes a session.
+            nprocs (Optional[int]): the number of processors to use; defaults to N-1. If 1 is
+                specified; the batch session is called sequentially
+
+        Returns:
+            A MultitumorBatch with sessions executed.
+        """
+        if nprocs is None:
+            nprocs = max(os.cpu_count() - 1, 1)
+
+        if nprocs > 1:
+            raise NotImplementedError("Not implemented (yet)")
+
+        sessions = [runner(dataset) for dataset in tqdm(datasets, desc="Executing...")]
+        return cls(sessions=sessions)
 
     @classmethod
     def deserialize(cls, data: str) -> Self:
@@ -287,7 +325,7 @@ def df_summary(self) -> pd.DataFrame:
                 extras=dict(session_index=idx),
                 clean=False,
             )
-            for idx, session in enumerate(self.session)
+            for idx, session in enumerate(self.sessions)
         ]
         return pd.concat(dfs).dropna(axis=1, how="all").fillna("")
 
@@ -296,7 +334,7 @@ def df_dataset(self) -> pd.DataFrame:
             session.datasets_df(
                 extras=dict(session_index=idx),
             )
-            for idx, session in enumerate(self.session)
+            for idx, session in enumerate(self.sessions)
         ]
         return pd.concat(dfs).dropna(axis=1, how="all").fillna("")
 
@@ -305,7 +343,7 @@ def df_params(self) -> pd.DataFrame:
             session.params_df(
                 extras=dict(session_index=idx),
             )
-            for idx, session in enumerate(self.session)
+            for idx, session in enumerate(self.sessions)
         ]
         return pd.concat(dfs).dropna(axis=1, how="all").fillna("")
 
@@ -320,3 +358,23 @@ def to_excel(self, path: Path | None = None) -> Path | BytesIO:
             for name, df in data.items():
                 df.to_excel(writer, sheet_name=name, index=False)
         return f
+
+    @classmethod
+    def load(cls, archive: Path) -> Self:
+        """Load a Session from a compressed zipfile
+
+        Args:
+            fn (Path): The zipfile path
+
+        Returns:
+            MultitumorBatch: An instance of this session
+        """
+        return cls.deserialize(_load_zip(archive))
+
+    def save(self, archive: Path):
+        """Save Session to a compressed zipfile
+
+        Args:
+            fn (Path): The zipfile path
+        """
+        return _make_zip(self.serialize(), archive)
diff --git a/tests/test_pybmds/models/test_multi_tumor.py b/tests/test_pybmds/models/test_multi_tumor.py
@@ -31,7 +31,7 @@ def test_execute(self, mt_datasets, rewrite_data_files, data_path):
         df = session.datasets_df()
 
         # docx
-        docx = session.to_docx()
+        docx = session.to_docx(all_models=True, bmd_cdf_table=True)
 
         if rewrite_data_files:
             (data_path / "reports/multitumor.txt").write_text(text)