Skip to content

Commit

Permalink
Api updates (#79)
Browse files Browse the repository at this point in the history
* pluralize sessions in batch

* fix type annotation

* updates

* improve coverage

* prepare 24.1a5.dev

* fix docs

* update docs

* remove extra whitespace
  • Loading branch information
shapiromatron authored Nov 12, 2024
1 parent 5754ab4 commit 4f97dea
Show file tree
Hide file tree
Showing 6 changed files with 155 additions and 43 deletions.
28 changes: 28 additions & 0 deletions docs/source/development.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,34 @@ make docs-clean # Clean documentation

Using the `make serve` command is recommended for editing documentation; it updates the preview in realtime files are saved.

## Versioning

We use [calendar versioning](https://calver.org/) for `pybmds`, where:

* `major` is the year of the release (ex: `28` for a 2028 release)
* `minor` is incremented for each release of the calendar year, starting at `1`
* `aN` is the alpha release for testing, where N starts at `1`
* `dev` is any upcoming pre-release currently under development.

As an example, consider the scenario where we're beginning development our first release in 2028:

* In `pybmds.__init__`, set `__version__ = "28.1a1.dev"`
* Iterate until we're ready for an alpha release
* Update the version to `28.1a1`, and git tag the release `28.1a1`
* Immediately change the `main` branch to `28.1a2.dev`
* Begin testing of `28.1a1`
* If changes are needed, iterate on `28.1a2.dev`
* If changes are not needed, release a `28.1` by changing the version and minting a tag

The [packaging](https://packaging.pypa.io/en/stable/index.html) package implements [PEP440](https://peps.python.org/pep-0440/), and can be used to check candidate versions:

```python
from packaging.version import Version

Version('28.1a1.dev')
# _Version(release=(28, 1), pre=('a', 1), dev=('dev', 0))
```

### Priors Report

The `pybmds` package includes Bayesian priors and frequentist parameter initialization settings that have been tuned to help improve model fit performance. To generate a report of the settings in all permutations, run the following command:
Expand Down
8 changes: 4 additions & 4 deletions docs/source/recipes/custom-excel-exports.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,9 @@
" sess.add_model(Model, settings)\n",
"\n",
" option_sets = [\n",
" (pybmds.ContinuousRiskType.RelativeDeviation, 0.1, \n",
" (pybmds.ContinuousRiskType.RelativeDeviation, 0.1,\n",
" pybmds.ContinuousDistType.normal),\n",
" (pybmds.ContinuousRiskType.RelativeDeviation, 0.1, \n",
" (pybmds.ContinuousRiskType.RelativeDeviation, 0.1,\n",
" pybmds.ContinuousDistType.normal_ncv),\n",
" ]\n",
" sessions = []\n",
Expand Down Expand Up @@ -225,7 +225,7 @@
"outputs": [],
"source": [
"rows = []\n",
"for i, session in enumerate(sess_batch.session):\n",
"for i, session in enumerate(sess_batch.sessions):\n",
" for j, model in enumerate(session.models):\n",
" data = {\n",
" \"session_index\": i,\n",
Expand Down Expand Up @@ -356,7 +356,7 @@
},
"outputs": [],
"source": [
"model = sess_batch.session[0].models[0]\n",
"model = sess_batch.sessions[0].models[0]\n",
"res = model.results"
]
},
Expand Down
2 changes: 1 addition & 1 deletion src/pybmds/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "24.1a4"
__version__ = "24.1a5" # see docs/development for versioning

from .batch import BatchResponse, BatchSession # noqa: F401
from .constants import DistType as ContinuousDistType # noqa: F401
Expand Down
114 changes: 86 additions & 28 deletions src/pybmds/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
import pandas as pd
from tqdm import tqdm

from .datasets.base import DatasetBase
from .constants import Dtype
from .datasets.base import DatasetType
from .models.multi_tumor import Multitumor
from .reporting.styling import Report, write_citation
from .session import Session
Expand All @@ -25,11 +26,24 @@ class BatchBase:
pass


def _make_zip(data: str, archive: Path):
with zipfile.ZipFile(
archive, mode="w", compression=zipfile.ZIP_DEFLATED, compresslevel=9
) as zf:
zf.writestr("data.json", data=data)


def _load_zip(archive: Path) -> str:
with zipfile.ZipFile(archive) as zf:
with zf.open("data.json") as f:
return f.read()


class BatchSession(BatchBase):
def __init__(self, sessions: list[Session] | None = None):
if sessions is None:
sessions = []
self.session: list[Session] = sessions
self.sessions: list[Session] = sessions
self.errors = []

def df_summary(self) -> pd.DataFrame:
Expand All @@ -43,13 +57,13 @@ def df_summary(self) -> pd.DataFrame:
),
clean=False,
)
for idx, session in enumerate(self.session)
for idx, session in enumerate(self.sessions)
]
return pd.concat(dfs).dropna(axis=1, how="all").fillna("")

def df_dataset(self) -> pd.DataFrame:
data: list[dict] = []
for idx, session in enumerate(self.session):
for idx, session in enumerate(self.sessions):
data.extend(
session.dataset.rows(
extras=dict(
Expand All @@ -64,11 +78,16 @@ def df_dataset(self) -> pd.DataFrame:

def df_params(self) -> pd.DataFrame:
data: list[dict] = []
for idx, session in enumerate(self.session):
for idx, session in enumerate(self.sessions):
for model_index, model in enumerate(session.models):
if model.has_results:
func = (
model.results.parameter_rows
if session.dataset.dtype is Dtype.NESTED_DICHOTOMOUS
else model.results.parameters.rows
)
data.extend(
model.results.parameters.rows(
func(
extras=dict(
session_index=idx,
session_id=session.id,
Expand Down Expand Up @@ -122,7 +141,7 @@ def to_docx(
if report is None:
report = Report.build_default()

for session in self.session:
for session in self.sessions:
session.to_docx(
report,
header_level=header_level,
Expand All @@ -133,7 +152,7 @@ def to_docx(
session_inputs_table=session_inputs_table,
)

if citation and len(self.session) > 0:
if citation and len(self.sessions) > 0:
write_citation(report, header_level=header_level)

return report.document
Expand All @@ -144,16 +163,19 @@ def serialize(self) -> str:
Returns:
str: A JSON string
"""
return json.dumps([session.to_dict() for session in self.session])
return json.dumps([session.to_dict() for session in self.sessions])

@classmethod
def execute(
cls, datasets: list[DatasetBase], runner: Callable, nprocs: int | None = None
cls,
datasets: list[DatasetType],
runner: Callable[[DatasetType], BatchResponse],
nprocs: int | None = None,
) -> Self:
"""Execute sessions using multiple processors.
Args:
datasets (list[DatasetBase]): The datasets to execute
datasets (list[DatasetType]): The datasets to execute
runner (Callable[dataset] -> BatchResponse): The method which executes a session
nprocs (Optional[int]): the number of processors to use; defaults to N-1. If 1 is
specified; the batch session is called linearly without a process pool
Expand Down Expand Up @@ -187,9 +209,9 @@ def execute(
if result.success:
if isinstance(result.content, list):
for item in result.content:
batch.session.append(Session.from_serialized(item))
batch.sessions.append(Session.from_serialized(item))
else:
batch.session.append(Session.from_serialized(result.content))
batch.sessions.append(Session.from_serialized(result.content))
else:
batch.errors.append(result.content)

Expand All @@ -216,28 +238,22 @@ def load(cls, archive: Path) -> Self:
Returns:
BatchSession: An instance of this session
"""
with zipfile.ZipFile(archive) as zf:
with zf.open("data.json") as f:
data = f.read()
return BatchSession.deserialize(data)
return BatchSession.deserialize(_load_zip(archive))

def save(self, archive: Path):
"""Save Session to a compressed zipfile
Args:
fn (Path): The zipfile path
"""
with zipfile.ZipFile(
archive, mode="w", compression=zipfile.ZIP_DEFLATED, compresslevel=9
) as zf:
zf.writestr("data.json", data=self.serialize())
return _make_zip(self.serialize(), archive)


class MultitumorBatch(BatchBase):
def __init__(self, sessions: list[Multitumor] | None = None):
if sessions is None:
sessions = []
self.session: list[Multitumor] = sessions
self.sessions: list[Multitumor] = sessions
self.errors = []

def to_docx(
Expand All @@ -260,20 +276,42 @@ def to_docx(
if report is None:
report = Report.build_default()

for session in self.session:
for session in self.sessions:
session.to_docx(
report,
header_level=header_level,
citation=False,
)

if citation and len(self.session) > 0:
if citation and len(self.sessions) > 0:
write_citation(report, header_level=header_level)

return report.document

def serialize(self) -> str:
return json.dumps([session.to_dict() for session in self.session])
return json.dumps([session.to_dict() for session in self.sessions])

@classmethod
def execute(cls, datasets: list[dict], runner: Callable, nprocs: int | None = None) -> Self:
"""Execute sessions using multiple processors.
Args:
datasets (list[dict]): The datasets to execute
runner (Callable[dict] -> Multitumor): The method which executes a session.
nprocs (Optional[int]): the number of processors to use; defaults to N-1. If 1 is
specified; the batch session is called sequentially
Returns:
A MultitumorBatch with sessions executed.
"""
if nprocs is None:
nprocs = max(os.cpu_count() - 1, 1)

if nprocs > 1:
raise NotImplementedError("Not implemented (yet)")

sessions = [runner(dataset) for dataset in tqdm(datasets, desc="Executing...")]
return cls(sessions=sessions)

@classmethod
def deserialize(cls, data: str) -> Self:
Expand All @@ -287,7 +325,7 @@ def df_summary(self) -> pd.DataFrame:
extras=dict(session_index=idx),
clean=False,
)
for idx, session in enumerate(self.session)
for idx, session in enumerate(self.sessions)
]
return pd.concat(dfs).dropna(axis=1, how="all").fillna("")

Expand All @@ -296,7 +334,7 @@ def df_dataset(self) -> pd.DataFrame:
session.datasets_df(
extras=dict(session_index=idx),
)
for idx, session in enumerate(self.session)
for idx, session in enumerate(self.sessions)
]
return pd.concat(dfs).dropna(axis=1, how="all").fillna("")

Expand All @@ -305,7 +343,7 @@ def df_params(self) -> pd.DataFrame:
session.params_df(
extras=dict(session_index=idx),
)
for idx, session in enumerate(self.session)
for idx, session in enumerate(self.sessions)
]
return pd.concat(dfs).dropna(axis=1, how="all").fillna("")

Expand All @@ -320,3 +358,23 @@ def to_excel(self, path: Path | None = None) -> Path | BytesIO:
for name, df in data.items():
df.to_excel(writer, sheet_name=name, index=False)
return f

@classmethod
def load(cls, archive: Path) -> Self:
"""Load a Session from a compressed zipfile
Args:
fn (Path): The zipfile path
Returns:
MultitumorBatch: An instance of this session
"""
return cls.deserialize(_load_zip(archive))

def save(self, archive: Path):
"""Save Session to a compressed zipfile
Args:
fn (Path): The zipfile path
"""
return _make_zip(self.serialize(), archive)
2 changes: 1 addition & 1 deletion tests/test_pybmds/models/test_multi_tumor.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def test_execute(self, mt_datasets, rewrite_data_files, data_path):
df = session.datasets_df()

# docx
docx = session.to_docx()
docx = session.to_docx(all_models=True, bmd_cdf_table=True)

if rewrite_data_files:
(data_path / "reports/multitumor.txt").write_text(text)
Expand Down
Loading

0 comments on commit 4f97dea

Please sign in to comment.