use the new types

fgcz · Jul 8, 2024 · 7e1135a · 7e1135a
1 parent 78568fc
commit 7e1135a
Show file tree

Hide file tree

Showing 11 changed files with 69 additions and 56 deletions.
diff --git a/src/depiction/calibration/chemical_noise_bg_2019_boskamp_v2.py b/src/depiction/calibration/chemical_noise_bg_2019_boskamp_v2.py
@@ -11,6 +11,7 @@
 
 from depiction.parallel_ops import ParallelConfig, WriteSpectraParallel
 from depiction.persistence import ImzmlWriteFile, ImzmlReadFile, ImzmlReader, ImzmlWriter
+from depiction.persistence.types import GenericReadFile, GenericWriteFile
 
 
 # TODO experimental/non-prod
@@ -140,8 +141,8 @@ def align_masses(
 
     def align_masses_all(
         self,
-        read_file: ImzmlReadFile,
-        write_file: ImzmlWriteFile,
+        read_file: GenericReadFile,
+        write_file: GenericWriteFile,
         parallel_config: ParallelConfig,
     ) -> None:
         """Applies `align_masses` to all spectra in the given file and writes the results to the output file."""

diff --git a/src/depiction/calibration/isotope_pattern_matcher.py b/src/depiction/calibration/isotope_pattern_matcher.py
@@ -8,6 +8,7 @@
 import numpy as np
 from numpy.typing import NDArray
 
+from depiction.persistence.types import GenericReader, GenericReadFile
 from depiction.spectrum.peak_picking.basic_peak_picker import BasicPeakPicker
 from depiction.misc.numpy_util import NumpyUtil
 from depiction.parallel_ops import ParallelConfig, ReadSpectraParallel
@@ -101,7 +102,7 @@ def compute_averagine_agreement_at_positions(
 
     def compute_averagine_agreement_at_mz_positions_for_file(
         self,
-        read_file: ImzmlReadFile,
+        read_file: GenericReadFile,
         parallel_config: ParallelConfig,
         peak_picker: BasicPeakPicker,
         n_limit: int,
@@ -111,7 +112,7 @@ def compute_averagine_agreement_at_mz_positions_for_file(
     ) -> list[tuple[NDArray[float], NDArray[int]]]:
         # TODO possibly move this method in the future (since it mixes peak_picker into this class)
 
-        def operation_file(reader: ImzmlReader, spectra_ids: list[int]) -> list[tuple[NDArray[float], NDArray[int]]]:
+        def operation_file(reader: GenericReader, spectra_ids: list[int]) -> list[tuple[NDArray[float], NDArray[int]]]:
             results = []
             for spectrum_id in spectra_ids:
                 mz_arr, int_arr = reader.get_spectrum(spectrum_id)

diff --git a/src/depiction/calibration/perform_calibration.py b/src/depiction/calibration/perform_calibration.py
@@ -13,7 +13,7 @@
 from depiction.calibration.calibration_method import CalibrationMethod
 from depiction.parallel_ops import ParallelConfig, ReadSpectraParallel, WriteSpectraParallel
 from depiction.parallel_ops.parallel_map import ParallelMap
-from depiction.persistence import ImzmlReadFile, ImzmlWriteFile, ImzmlReader, ImzmlWriter
+from depiction.persistence.types import GenericReadFile, GenericWriteFile, GenericReader, GenericWriter
 
 
 class PerformCalibration:
@@ -76,7 +76,7 @@ def _validate_per_spectra_array(self, array: DataArray, coordinates_2d: NDArray[
             raise ValueError(errors)
 
     def calibrate_image(
-        self, read_peaks: ImzmlReadFile, write_file: ImzmlWriteFile, read_full: Optional[ImzmlReadFile] = None
+        self, read_peaks: GenericReadFile, write_file: GenericWriteFile, read_full: Optional[GenericReadFile] = None
     ) -> None:
         if read_full is None:
             read_full = read_peaks
@@ -99,7 +99,7 @@ def calibrate_image(
         logger.info("Applying models...")
         self._apply_all_models(read_file=read_full, write_file=write_file, all_model_coefs=model_coefs)
 
-    def _extract_all_features(self, read_peaks: ImzmlReadFile) -> DataArray:
+    def _extract_all_features(self, read_peaks: GenericReadFile) -> DataArray:
         read_parallel = ReadSpectraParallel.from_config(self._parallel_config)
         all_features = read_parallel.map_chunked(
             read_file=read_peaks,
@@ -114,7 +114,7 @@ def _extract_all_features(self, read_peaks: ImzmlReadFile) -> DataArray:
         )
 
     def _apply_all_models(
-        self, read_file: ImzmlReadFile, write_file: ImzmlWriteFile, all_model_coefs: DataArray
+        self, read_file: GenericReadFile, write_file: GenericWriteFile, all_model_coefs: DataArray
     ) -> None:
         write_parallel = WriteSpectraParallel.from_config(self._parallel_config)
         write_parallel.map_chunked_to_file(
@@ -155,7 +155,7 @@ def _write_data_array(self, array: DataArray, group: str) -> None:
 
     @staticmethod
     def _extract_chunk_features(
-        reader: ImzmlReader,
+        reader: GenericReader,
         spectra_indices: list[int],
         calibration: CalibrationMethod,
     ) -> DataArray:
@@ -170,9 +170,9 @@ def _extract_chunk_features(
 
     @staticmethod
     def _calibrate_spectra(
-        reader: ImzmlReader,
+        reader: GenericReader,
         spectra_indices: list[int],
-        writer: ImzmlWriter,
+        writer: GenericWriter,
         calibration: CalibrationMethod,
         all_model_coefs: DataArray,
     ) -> None:

diff --git a/src/depiction/estimate_ppm_error.py b/src/depiction/estimate_ppm_error.py
@@ -1,11 +1,15 @@
-from typing import Optional
+from __future__ import annotations
+
 from collections.abc import Sequence
+from typing import Optional, TYPE_CHECKING
 
 import numpy as np
 
 from depiction.parallel_ops import ParallelConfig
 from depiction.parallel_ops.read_spectra_parallel import ReadSpectraParallel
-from depiction.persistence import ImzmlReader, ImzmlReadFile
+
+if TYPE_CHECKING:
+    from depiction.persistence.types import GenericReadFile, GenericReader
 
 
 class EstimatePPMError:
@@ -16,7 +20,7 @@ def __init__(self, parallel_config: Optional[ParallelConfig] = None) -> None:
             parallel_config = ParallelConfig.no_parallelism()
         self._parallel_config = parallel_config
 
-    def estimate(self, read_file: ImzmlReadFile) -> dict[str, float]:
+    def estimate(self, read_file: GenericReadFile) -> dict[str, float]:
         """
         Estimates the PPM error for the given imzML file.
         Returns a dictionary containing the median and std of the PPM error medians (for each spectrum).
@@ -34,7 +38,7 @@ def estimate(self, read_file: ImzmlReadFile) -> dict[str, float]:
 
     @staticmethod
     def _get_ppm_values(
-        reader: ImzmlReader,
+        reader: GenericReader,
         spectra_ids: Sequence[int],
     ) -> tuple[list[float], float, float]:
         result_ppm = []

diff --git a/src/depiction/misc/experimental/resample_mass_axis.py b/src/depiction/misc/experimental/resample_mass_axis.py
@@ -1,11 +1,14 @@
+from __future__ import annotations
+
 from dataclasses import dataclass
 
 import numpy as np
 from numpy.typing import NDArray
 from scipy.interpolate import CubicSpline
 
 from depiction.parallel_ops import ParallelConfig, WriteSpectraParallel
-from depiction.persistence import ImzmlReadFile, ImzmlWriteFile, ImzmlReader, ImzmlWriter, ImzmlModeEnum
+from depiction.persistence import ImzmlModeEnum
+from depiction.persistence.types import GenericWriteFile, GenericReadFile, GenericWriter, GenericReader
 
 
 @dataclass
@@ -20,8 +23,8 @@ def evaluate_spectrum(self, mz_arr: NDArray[float], int_arr: NDArray[float]) ->
 
     def evaluate_file(
         self,
-        read_file: ImzmlReadFile,
-        write_file: ImzmlWriteFile,
+        read_file: GenericReadFile,
+        write_file: GenericWriteFile,
         parallel_config: ParallelConfig,
         allow_processed: bool = False,
     ) -> None:
@@ -40,7 +43,7 @@ def evaluate_file(
 
     @classmethod
     def _evaluate_file_chunk(
-        cls, reader: ImzmlReader, spectra_ids: list[int], writer: ImzmlWriter, target_mz_arr: NDArray[float]
+        cls, reader: GenericReader, spectra_ids: list[int], writer: GenericWriter, target_mz_arr: NDArray[float]
     ) -> None:
         resampler = ResampleMassAxis(target_mz_arr=target_mz_arr)
 

diff --git a/src/depiction/parallel_ops/read_spectra_parallel.py b/src/depiction/parallel_ops/read_spectra_parallel.py
@@ -15,7 +15,7 @@
 
 if TYPE_CHECKING:
     from numpy.typing import NDArray
-    from depiction.persistence import ImzmlReadFile, ImzmlReader
+    from depiction.persistence.types import GenericReadFile, GenericReader
 
     T = TypeVar("T")
     V = TypeVar("V")
@@ -47,8 +47,8 @@ def config(self) -> ParallelConfig:
 
     def map_chunked(
         self,
-        read_file: ImzmlReadFile,
-        operation: Callable[[ImzmlReader, list[int], ...], T] | Callable[[ImzmlReader, list[int], int, ...], T],
+        read_file: GenericReadFile,
+        operation: Callable[[GenericReader, list[int], ...], T] | Callable[[GenericReader, list[int], int, ...], T],
         spectra_indices: NDArray[int] | None = None,
         bind_args: dict[str, Any] | None = None,
         reduce_fn: Callable[[list[T]], V] = list,
@@ -58,8 +58,8 @@ def map_chunked(
         :param read_file: the file to read the spectra from
         :param operation: the operation to apply to each chunk of spectra
             there are two possible signatures for the operation:
-            - operation(reader: ImzmlReader, spectra_ids: list[int], **kwargs) -> T
-            - operation(reader: ImzmlReader, spectra_ids: list[int], task_index: int, **kwargs) -> T
+            - operation(reader: GenericReader, spectra_ids: list[int], **kwargs) -> T
+            - operation(reader: GenericReader, spectra_ids: list[int], task_index: int, **kwargs) -> T
             where:
             - reader: the reader object to read the spectra from
             - spectra_ids: the indices of the spectra to process

diff --git a/src/depiction/parallel_ops/write_spectra_parallel.py b/src/depiction/parallel_ops/write_spectra_parallel.py
@@ -10,15 +10,14 @@
 from depiction.persistence import (
     ImzmlReadFile,
     ImzmlWriteFile,
-    ImzmlReader,
-    ImzmlWriter,
     ImzmlModeEnum,
 )
 from depiction.tools.merge_imzml import MergeImzml
 
 if TYPE_CHECKING:
     from numpy.typing import NDArray
     from depiction.parallel_ops.parallel_config import ParallelConfig
+    from depiction.persistence.types import GenericReadFile, GenericWriteFile, GenericWriter, GenericReader
 
 
 class WriteSpectraParallel:
@@ -31,11 +30,11 @@ def from_config(cls, config: ParallelConfig) -> WriteSpectraParallel:
 
     def map_chunked_to_files(
         self,
-        read_file: ImzmlReadFile,
-        write_files: list[ImzmlWriteFile],
+        read_file: GenericReadFile,
+        write_files: list[GenericWriteFile],
         operation: (
-            Callable[[ImzmlReader, list[int], list[ImzmlWriter], ...], None]
-            | Callable[[ImzmlReader, list[int], list[ImzmlWriteFile], ...], None]
+            Callable[[GenericReader, list[int], list[GenericWriter], ...], None]
+            | Callable[[GenericReader, list[int], list[GenericWriteFile], ...], None]
         ),
         spectra_indices: NDArray[int] | None = None,
         bind_args: dict[str, Any] | None = None,
@@ -82,16 +81,16 @@ def map_chunked_to_files(
 
     def map_chunked_external_to_files(
         self,
-        read_file: ImzmlReadFile,
-        write_files: list[ImzmlWriteFile],
+        read_file: GenericReadFile,
+        write_files: list[GenericWriteFile],
         operation: Callable[[str, list[str]], None],
         spectra_indices: NDArray[int] | None = None,
         bind_args: dict[str, Any] | None = None,
     ) -> None:
         def op(
-            reader: ImzmlReader,
+            reader: GenericReader,
             spectra_ids: list[int],
-            write_files: list[ImzmlWriteFile],
+            write_files: list[GenericWriteFile],
             **kwargs: dict[str, Any],
         ) -> None:
             # TODO maybe kwarg handling could be done a bit more clean here in the future
@@ -122,8 +121,8 @@ def op(
     def _get_split_modes_and_paths(
         self,
         work_directory: Path,
-        read_file: ImzmlReadFile,
-        write_files: list[ImzmlWriteFile],
+        read_file: GenericReadFile,
+        write_files: list[GenericWriteFile],
         spectra_indices: NDArray[int] | None,
     ) -> list[tuple[ImzmlModeEnum, list[Path]]]:
         # determine the number of tasks
@@ -143,12 +142,12 @@ def _get_split_modes_and_paths(
 
     @staticmethod
     def _write_transformed_chunked_operation(
-        reader: ImzmlReader,
+        reader: GenericReader,
         spectra_indices: list[int],
         task_index: int,
         operation: (
-            Callable[[ImzmlReader, list[int], list[ImzmlWriter], ...], None]
-            | Callable[[ImzmlReader, list[int], list[ImzmlWriteFile], ...], None]
+            Callable[[GenericReader, list[int], list[GenericWriter], ...], None]
+            | Callable[[GenericReader, list[int], list[GenericWriteFile], ...], None]
         ),
         open_write_files: bool,
         split_modes_and_paths: list[tuple[ImzmlModeEnum, list[Path]]],
@@ -179,7 +178,7 @@ def _write_transformed_chunked_operation(
     def _merge_results(
         self,
         split_modes_and_paths: list[tuple[ImzmlModeEnum, list[str]]],
-        write_files: list[ImzmlWriteFile],
+        write_files: list[GenericWriteFile],
     ) -> None:
         """Merges the results of the parallel operations
         :param split_modes_and_paths: the split modes and paths
@@ -193,14 +192,14 @@ def _merge_results(
 
     def map_chunked_to_file(
         self,
-        read_file: ImzmlReadFile,
-        write_file: ImzmlWriteFile,
-        operation: Callable[[ImzmlReader, list[int], ImzmlWriter], None],
+        read_file: GenericReadFile,
+        write_file: GenericWriteFile,
+        operation: Callable[[GenericReader, list[int], GenericWriter], None],
         spectra_indices: NDArray[int] | None = None,
         bind_args: dict[str, Any] | None = None,
     ) -> None:
         def wrap_operation(
-            reader: ImzmlReader, spectra_ids: list[int], writers: list[ImzmlWriter], **kwargs: dict[str, Any]
+            reader: GenericReader, spectra_ids: list[int], writers: list[GenericWriter], **kwargs: dict[str, Any]
         ) -> None:
             operation(reader, spectra_ids, writers[0], **kwargs)
 

diff --git a/src/depiction/spectrum/baseline/tophat_baseline.py b/src/depiction/spectrum/baseline/tophat_baseline.py
@@ -1,12 +1,13 @@
+from __future__ import annotations
 from dataclasses import dataclass
 from typing import Literal
 
 import numpy as np
 from numba import njit
 from numpy.typing import NDArray
 
+from depiction.persistence.types import GenericReadFile
 from depiction.spectrum.baseline.baseline import Baseline
-from depiction.persistence import ImzmlReadFile
 
 
 @dataclass(frozen=True)
@@ -44,7 +45,7 @@ def get_element_size(self, mz_arr: NDArray[float]) -> int:
         else:
             raise ValueError(f"Invalid {self.window_unit=}")
 
-    def optimize_window_size(self, read_file: ImzmlReadFile, n_spectra: int, rng_seed: int = 0) -> int:
+    def optimize_window_size(self, read_file: GenericReadFile, n_spectra: int, rng_seed: int = 0) -> int:
         """Optimizes the window size for the provided file, by considering some random spectra. It's possible to set the
         value even to 1, if only one spectrum should be considered.
         :param read_file: The file to optimize the window size for.

diff --git a/src/depiction/spectrum/evaluate_bins.py b/src/depiction/spectrum/evaluate_bins.py
@@ -1,11 +1,12 @@
+from __future__ import annotations
 import enum
 
 import numba
 import numpy as np
 from numpy.typing import NDArray
 
 from depiction.parallel_ops import ParallelConfig, WriteSpectraParallel
-from depiction.persistence import ImzmlReadFile, ImzmlWriteFile, ImzmlReader, ImzmlWriter
+from depiction.persistence.types import GenericReadFile, GenericWriteFile, GenericReader, GenericWriter
 
 
 class BinStatistic(enum.Enum):
@@ -47,7 +48,7 @@ def evaluate(self, mz_arr: NDArray[float], int_arr: NDArray[float]) -> NDArray[f
         )
 
     def evaluate_file(
-        self, read_file: ImzmlReadFile, write_file: ImzmlWriteFile, parallel_config: ParallelConfig
+        self, read_file: GenericReadFile, write_file: GenericWriteFile, parallel_config: ParallelConfig
     ) -> None:
         write_parallel = WriteSpectraParallel.from_config(parallel_config)
         write_parallel.map_chunked_to_file(
@@ -62,9 +63,9 @@ def evaluate_file(
 
     @staticmethod
     def _compute_chunk(
-        reader: ImzmlReader,
+        reader: GenericReader,
         spectra_ids: list[int],
-        writer: ImzmlWriter,
+        writer: GenericWriter,
         bin_edges: NDArray[float],
         statistic: int,
     ) -> None: