AllenCellModeling · benjijamorris · Oct 11, 2024 · Oct 3, 2024 · Oct 8, 2024 · Oct 8, 2024
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -9,7 +9,7 @@ ci:
 
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.3.0
+    rev: v5.0.0
     hooks:
       # list of supported hooks: https://pre-commit.com/hooks.html
       - id: trailing-whitespace
@@ -26,35 +26,35 @@ repos:
 
   # python code formatting
   - repo: https://github.com/psf/black
-    rev: 22.6.0
+    rev: 24.10.0
     hooks:
       - id: black
         args: [--line-length, "99"]
 
   # python import sorting
   - repo: https://github.com/PyCQA/isort
-    rev: 5.12.0
+    rev: 5.13.2
     hooks:
       - id: isort
         args: ["--profile", "black", "--filter-files"]
 
   # python upgrading syntax to newer version
   - repo: https://github.com/asottile/pyupgrade
-    rev: v2.32.1
+    rev: v3.17.0
     hooks:
       - id: pyupgrade
         args: [--py38-plus]
 
   # python docstring formatting
   - repo: https://github.com/myint/docformatter
-    rev: v1.4
+    rev: 06907d0267368b49b9180eed423fae5697c1e909
     hooks:
       - id: docformatter
         args: [--in-place, --wrap-summaries=99, --wrap-descriptions=99]
 
   # python check (PEP8), programming errors and code complexity
   - repo: https://github.com/PyCQA/flake8
-    rev: 4.0.1
+    rev: 7.1.1
     hooks:
       - id: flake8
         args:
@@ -67,7 +67,7 @@ repos:
 
   # python security linter
   - repo: https://github.com/PyCQA/bandit
-    rev: "1.7.1"
+    rev: "1.7.10"
     hooks:
       - id: bandit
         args: ["-s", "B101"]
@@ -82,26 +82,24 @@ repos:
 
   # shell scripts linter
   - repo: https://github.com/shellcheck-py/shellcheck-py
-    rev: v0.8.0.4
+    rev: v0.10.0.1
     hooks:
       - id: shellcheck
 
   # md formatting
   - repo: https://github.com/executablebooks/mdformat
-    rev: 0.7.14
+    rev: 0.7.17
     hooks:
       - id: mdformat
         args: ["--number"]
         additional_dependencies:
           - mdformat-gfm
           - mdformat-tables
           - mdformat_frontmatter
-          # - mdformat-toc
-          # - mdformat-black
 
   # word spelling linter
   - repo: https://github.com/codespell-project/codespell
-    rev: v2.1.0
+    rev: v2.3.0
     hooks:
       - id: codespell
         args:
@@ -110,13 +108,13 @@ repos:
 
   # jupyter notebook cell output clearing
   - repo: https://github.com/kynan/nbstripout
-    rev: 0.5.0
+    rev: 0.7.1
     hooks:
       - id: nbstripout
 
   # jupyter notebook linting
   - repo: https://github.com/nbQA-dev/nbQA
-    rev: 1.4.0
+    rev: 1.8.7
     hooks:
       - id: nbqa-black
         args: ["--line-length=99"]
@@ -130,17 +128,18 @@ repos:
           ]
 
   - repo: https://github.com/dosisod/refurb
-    rev: v1.3.0
+    rev: v2.0.0
     hooks:
       - id: refurb
         language_version: python3.10
         args:
           - --ignore
           - FURB120
+          - --ignore
+          - FURB123
 
   - repo: https://github.com/asottile/blacken-docs
-    rev: v1.12.1
+    rev: 1.19.0
     hooks:
       - id: blacken-docs
         args: [--line-length=120]
-        additional_dependencies: [black==21.12b0]
diff --git a/configs/model/im2im/ijepa.yaml b/configs/model/im2im/ijepa.yaml
@@ -6,7 +6,7 @@ save_dir: ${paths.output_dir}
 
 encoder:
   _target_: cyto_dl.nn.vits.encoder.JEPAEncoder
-  patch_size: 2 # patch_size * num_patches should equl data._aux.patch_shape
+  patch_size: 2 # patch_size * num_patches should equal data._aux.patch_shape
   num_patches: ${model._aux.num_patches}
   emb_dim: 16
   num_layer: 2

diff --git a/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py b/cyto_dl/api/cyto_dl_model/cyto_dl_base_model.py
@@ -36,9 +36,9 @@ def _get_experiment_type(cls) -> ExperimentType:
     def from_existing_config(cls, config_filepath: Path):
         """Returns a model from an existing config.
 
-        :param config_filepath: path to a .yaml config file that will be used as the basis
-        for this CytoDLBaseModel (must be generated by the CytoDLBaseModel subclass that wants
-        to use it).
+        :param config_filepath: path to a .yaml config file that will be used as the basis for this
+            CytoDLBaseModel (must be generated by the CytoDLBaseModel subclass that wants to use
+            it).
         """
         return cls(OmegaConf.load(config_filepath))
 

diff --git a/cyto_dl/callbacks/outlier_detection.py b/cyto_dl/callbacks/outlier_detection.py
@@ -64,7 +64,7 @@ def fn(_, __, output):
         return fn
 
     def _update_covariance(self, output, layer_name):
-        """record spatial mean and cov of channel activations per image in batch."""
+        """Record spatial mean and cov of channel activations per image in batch."""
         output = self.flatten_activations(output)
         if self.mu[layer_name] is None:
             self.mu[layer_name] = np.zeros(output.shape[1])
@@ -76,7 +76,7 @@ def _update_covariance(self, output, layer_name):
             self.n += 1
 
     def on_train_epoch_start(self, trainer, pl_module):
-        """set forward hook."""
+        """Set forward hook."""
         if trainer.current_epoch == trainer.max_epochs - self.n_epochs:
             named_modules = dict([*pl_module.backbone.named_modules()])
             for layer_name in self.layer_names:
@@ -101,7 +101,7 @@ def _calculate_mahalanobis(self, output, layer_name):
             self.activations[layer_name].append(out)
 
     def _inference_start(self, pl_module):
-        """add mahalanobis calculation hook and calculate inverse covariance matrix."""
+        """Add mahalanobis calculation hook and calculate inverse covariance matrix."""
         if self._run:
             named_modules = dict([*pl_module.backbone.named_modules()])
             for layer_name in self.layer_names:

diff --git a/cyto_dl/dataframe/readers.py b/cyto_dl/dataframe/readers.py
@@ -149,7 +149,7 @@ def read_dataframe(
     include_columns = sorted(list(include_columns))
     required_columns = sorted(list(required_columns))
 
-    if len(include_columns) == 0:
+    if not include_columns:
         include_columns = None
 
     if isinstance(dataframe, str):

diff --git a/cyto_dl/datamodules/dataframe/dataframe_datamodule.py b/cyto_dl/datamodules/dataframe/dataframe_datamodule.py
@@ -163,7 +163,7 @@ def get_dataset(self, split):
         return self.datasets[split][sample]
 
     def make_dataloader(self, split):
-        kwargs = dict(**self.dataloader_kwargs)
+        kwargs = {**self.dataloader_kwargs}
         kwargs["shuffle"] = kwargs.get("shuffle", True) and split == "train"
         kwargs["batch_size"] = self.batch_size
 

diff --git a/cyto_dl/datamodules/dataframe/grouped_dataframe_datamodule.py b/cyto_dl/datamodules/dataframe/grouped_dataframe_datamodule.py
@@ -116,7 +116,7 @@ def __init__(
         self.target_columns = target_columns
 
     def make_dataloader(self, split):
-        kwargs = dict(**self.dataloader_kwargs)
+        kwargs = {**self.dataloader_kwargs}
         kwargs["shuffle"] = kwargs.get("shuffle", True) and split == "train"
         subset = self.get_dataset(split)
 

diff --git a/cyto_dl/datamodules/smartcache.py b/cyto_dl/datamodules/smartcache.py
@@ -1,3 +1,4 @@
+from itertools import chain
 from pathlib import Path
 from typing import Optional, Union
 
@@ -118,14 +119,18 @@ def _get_file_args(self, row):
             for timepoint in timepoints:
                 img_data.append(
                     {
-                        "dimension_order_out": "ZYX"[-self.spatial_dims :]
-                        if not use_neighbors
-                        else "T" + "ZYX"[-self.spatial_dims :],
+                        "dimension_order_out": (
+                            "ZYX"[-self.spatial_dims :]
+                            if not use_neighbors
+                            else "T" + "ZYX"[-self.spatial_dims :]
+                        ),
                         "C": row[self.channel_column],
                         "scene": scene,
-                        "T": timepoint
-                        if not use_neighbors
-                        else [timepoint + i for i in range(self.num_neighbors + 1)],
+                        "T": (
+                            timepoint
+                            if not use_neighbors
+                            else [timepoint + i for i in range(self.num_neighbors + 1)]
+                        ),
                         "original_path": row[self.img_path_column],
                     }
                 )
@@ -136,7 +141,7 @@ def get_per_file_args(self, df):
         timepoints/channels/scenes for each file in the dataframe."""
         with ProgressBar():
             img_data = dask.compute(*[self._get_file_args(row) for row in df.itertuples()])
-        img_data = [item for sublist in img_data for item in sublist]
+        img_data = list(chain.from_iterable(img_data))
         return img_data
 
     def prepare_data(self):

diff --git a/cyto_dl/image/io/aicsimage_loader.py b/cyto_dl/image/io/aicsimage_loader.py
@@ -9,9 +9,9 @@
 class AICSImageLoaderd(Transform):
     """Enumerates scenes and timepoints for dictionary with format.
 
-    {path_key: path, channel_key: channel, scene_key: scene, timepoint_key: timepoint}.
-    Differs from monai_bio_reader in that reading kwargs are passed in the dictionary, instead of
-    fixed at initialization.
+    {path_key: path, channel_key: channel, scene_key: scene, timepoint_key: timepoint}. Differs
+    from monai_bio_reader in that reading kwargs are passed in the dictionary, instead of fixed at
+    initialization.
     """
 
     def __init__(

diff --git a/cyto_dl/image/io/monai_bio_reader.py b/cyto_dl/image/io/monai_bio_reader.py
@@ -30,10 +30,7 @@ def __init__(self, dask_load: bool = True, **reader_kwargs):
 
     def read(self, data: Union[Sequence[PathLike], PathLike]):
         filenames: Sequence[PathLike] = ensure_tuple(data)
-        img_ = []
-        for name in filenames:
-            img_.append(BioImage(f"{name}"))
-
+        img_ = [BioImage(name) for name in filenames]
         return img_ if len(filenames) > 1 else img_[0]
 
     def get_data(self, img) -> Tuple[np.ndarray, Dict]:

diff --git a/cyto_dl/image/io/skimage_reader.py b/cyto_dl/image/io/skimage_reader.py
@@ -25,7 +25,7 @@ def read(self, data: Union[Sequence[PathLike], PathLike]):
         filenames: Sequence[PathLike] = ensure_tuple(data)
         img_ = []
         for name in filenames:
-            this_im = imread(f"{name}")
+            this_im = imread(name)
             if self.channels:
                 this_im = this_im[self.channels]
 

diff --git a/cyto_dl/image/transforms/multiscale_cropper.py b/cyto_dl/image/transforms/multiscale_cropper.py
@@ -92,7 +92,7 @@ def _apply_slice(data, slicee):
     @staticmethod
     def _generate_slice(start_coords: Sequence[int], roi_size: Sequence[int]) -> slice:
         """Creates slice starting at `start_coords` of size `roi_size`"""
-        return [slice(None, None)] + [
+        return [slice(None, None)] + [  # noqa: FURB140
             slice(start, end) for start, end in zip(start_coords, start_coords + roi_size)
         ]
 

diff --git a/cyto_dl/loggers/mlflow.py b/cyto_dl/loggers/mlflow.py
@@ -55,7 +55,7 @@ def log_hyperparams(self, params: Union[Dict[str, Any], Namespace], mode="train"
 
         with tempfile.TemporaryDirectory() as tmp_dir:
             conf_path = Path(tmp_dir) / f"{mode}.yaml"
-            with open(conf_path, "w") as f:
+            with conf_path.open("w") as f:
                 config = OmegaConf.create(params)
                 OmegaConf.save(config=config, f=f)
 
@@ -133,7 +133,7 @@ def _after_save_checkpoint(self, ckpt_callback: ModelCheckpoint) -> None:
                 self.run_id, local_path=best_path, artifact_path=artifact_path
             )
 
-            os.unlink(best_path)
+            best_path.unlink()
 
         else:
             filepath = ckpt_callback.best_model_path
@@ -149,17 +149,20 @@ def _after_save_checkpoint(self, ckpt_callback: ModelCheckpoint) -> None:
                     self.run_id, local_path=last_path, artifact_path=artifact_path
                 )
 
-                os.unlink(last_path)
+                last_path.unlink()
             else:
                 self.experiment.log_artifact(
                     self.run_id, local_path=filepath, artifact_path=artifact_path
                 )
 
 
 def _delete_local_artifact(repo, artifact_path):
-    artifact_path = local_file_uri_to_path(
-        os.path.join(repo._artifact_dir, artifact_path) if artifact_path else repo._artifact_dir
+    artifact_path = Path(
+        local_file_uri_to_path(
+            os.path.join(repo._artifact_dir, artifact_path)
+            if artifact_path
+            else repo._artifact_dir
+        )
     )
-
-    if os.path.isfile(artifact_path):
-        os.remove(artifact_path)
+    if artifact_path.is_file():
+        artifact_path.unlink()
diff --git a/cyto_dl/models/base_model.py b/cyto_dl/models/base_model.py
@@ -23,7 +23,7 @@
 
 
 def _is_primitive(value):
-    if isinstance(value, (type(None), bool, str, int, float)):
+    if value is None or isinstance(value, (bool, str, int, float)):
         return True
 
     if isinstance(value, (tuple, list)):

diff --git a/cyto_dl/models/basic_model.py b/cyto_dl/models/basic_model.py
@@ -59,11 +59,11 @@ def __init__(
 
         super().__init__(metrics=metrics)
 
-        if network is None and pretrained_weights is None:
+        if network is pretrained_weights is None:
             raise ValueError("`network` and `pretrained_weights` can't both be None.")
 
         if pretrained_weights is not None:
-            pretrained_weights = torch.load(pretrained_weights)
+            pretrained_weights = torch.load(pretrained_weights)  # nosec B614
 
         if network is not None:
             self.network = network

diff --git a/cyto_dl/models/classification/timepoint_classification.py b/cyto_dl/models/classification/timepoint_classification.py
@@ -50,7 +50,7 @@ def predict_step(self, batch, batch_idx):
                 batch,
                 "predict",
                 logits,
-                name=f"{batch['track_id'].cpu().item()}",
+                name=str(batch["track_id"].cpu().item()),
             )
 
         timepoints = np.array(batch["timepoints"][0][1:-1].split(",")).astype(int)

diff --git a/cyto_dl/models/handlers/base_handler.py b/cyto_dl/models/handlers/base_handler.py
@@ -76,12 +76,12 @@ def postprocess(self, data):
         mode = self.config["return"].get("mode", "network")
 
         if mode == "path":
-            path = self.config["return"].get("path", "/tmp")  # nosec: B108
+            path = self.config["return"].get("path", "/tmp")  # nosec B108
             response_path = Path(path) / f"{uuid.uuid4()}.pt"
-            torch.save(data, response_path)
+            torch.save(data, response_path)  # nosec B614
             return [str(response_path)]
 
         buf = io.BytesIO()
-        torch.save(data, buf)
+        torch.save(data, buf)  # nosec B614
         buf.seek(0)
         return [buf.read()]
diff --git a/cyto_dl/models/im2im/gan.py b/cyto_dl/models/im2im/gan.py
@@ -95,7 +95,7 @@ def configure_optimizers(self):
         return (opts, scheds)
 
     def _train_forward(self, batch, stage, save_image, run_heads):
-        """during training we are only dealing with patches,so we can calculate per-patch loss,
+        """During training we are only dealing with patches,so we can calculate per-patch loss,
         metrics, postprocessing etc."""
         z = self.backbone(batch[self.hparams.x_key])
         return {
@@ -106,7 +106,7 @@ def _train_forward(self, batch, stage, save_image, run_heads):
         }
 
     def _inference_forward(self, batch, stage, save_image, run_heads):
-        """during inference, we need to calculate per-fov loss/metrics/postprocessing.
+        """During inference, we need to calculate per-fov loss/metrics/postprocessing.
 
         To avoid storing and passing to each head the intermediate results of the backbone, we need
         to run backbone + taskheads patch by patch, then do saving/postprocessing/etc on the entire