From 7e1135a32d07c304cdbb7512e197273dcc586aba Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Mon, 8 Jul 2024 08:50:55 +0200 Subject: [PATCH] use the new types --- .../chemical_noise_bg_2019_boskamp_v2.py | 5 ++- .../calibration/isotope_pattern_matcher.py | 5 ++- .../calibration/perform_calibration.py | 14 +++---- src/depiction/estimate_ppm_error.py | 12 ++++-- .../misc/experimental/resample_mass_axis.py | 11 ++++-- .../parallel_ops/read_spectra_parallel.py | 10 ++--- .../parallel_ops/write_spectra_parallel.py | 39 +++++++++---------- .../spectrum/baseline/tophat_baseline.py | 5 ++- src/depiction/spectrum/evaluate_bins.py | 9 +++-- .../spectrum/evaluate_mean_spectrum.py | 14 ++++--- src/depiction/tools/align_imzml.py | 1 + 11 files changed, 69 insertions(+), 56 deletions(-) diff --git a/src/depiction/calibration/chemical_noise_bg_2019_boskamp_v2.py b/src/depiction/calibration/chemical_noise_bg_2019_boskamp_v2.py index aee3ca1..208bbbe 100644 --- a/src/depiction/calibration/chemical_noise_bg_2019_boskamp_v2.py +++ b/src/depiction/calibration/chemical_noise_bg_2019_boskamp_v2.py @@ -11,6 +11,7 @@ from depiction.parallel_ops import ParallelConfig, WriteSpectraParallel from depiction.persistence import ImzmlWriteFile, ImzmlReadFile, ImzmlReader, ImzmlWriter +from depiction.persistence.types import GenericReadFile, GenericWriteFile # TODO experimental/non-prod @@ -140,8 +141,8 @@ def align_masses( def align_masses_all( self, - read_file: ImzmlReadFile, - write_file: ImzmlWriteFile, + read_file: GenericReadFile, + write_file: GenericWriteFile, parallel_config: ParallelConfig, ) -> None: """Applies `align_masses` to all spectra in the given file and writes the results to the output file.""" diff --git a/src/depiction/calibration/isotope_pattern_matcher.py b/src/depiction/calibration/isotope_pattern_matcher.py index 0d7cb98..13ca1fa 100644 --- a/src/depiction/calibration/isotope_pattern_matcher.py +++ b/src/depiction/calibration/isotope_pattern_matcher.py @@ -8,6 +8,7 @@ import numpy as np from numpy.typing import NDArray +from depiction.persistence.types import GenericReader, GenericReadFile from depiction.spectrum.peak_picking.basic_peak_picker import BasicPeakPicker from depiction.misc.numpy_util import NumpyUtil from depiction.parallel_ops import ParallelConfig, ReadSpectraParallel @@ -101,7 +102,7 @@ def compute_averagine_agreement_at_positions( def compute_averagine_agreement_at_mz_positions_for_file( self, - read_file: ImzmlReadFile, + read_file: GenericReadFile, parallel_config: ParallelConfig, peak_picker: BasicPeakPicker, n_limit: int, @@ -111,7 +112,7 @@ def compute_averagine_agreement_at_mz_positions_for_file( ) -> list[tuple[NDArray[float], NDArray[int]]]: # TODO possibly move this method in the future (since it mixes peak_picker into this class) - def operation_file(reader: ImzmlReader, spectra_ids: list[int]) -> list[tuple[NDArray[float], NDArray[int]]]: + def operation_file(reader: GenericReader, spectra_ids: list[int]) -> list[tuple[NDArray[float], NDArray[int]]]: results = [] for spectrum_id in spectra_ids: mz_arr, int_arr = reader.get_spectrum(spectrum_id) diff --git a/src/depiction/calibration/perform_calibration.py b/src/depiction/calibration/perform_calibration.py index d60b2ab..a4328eb 100644 --- a/src/depiction/calibration/perform_calibration.py +++ b/src/depiction/calibration/perform_calibration.py @@ -13,7 +13,7 @@ from depiction.calibration.calibration_method import CalibrationMethod from depiction.parallel_ops import ParallelConfig, ReadSpectraParallel, WriteSpectraParallel from depiction.parallel_ops.parallel_map import ParallelMap -from depiction.persistence import ImzmlReadFile, ImzmlWriteFile, ImzmlReader, ImzmlWriter +from depiction.persistence.types import GenericReadFile, GenericWriteFile, GenericReader, GenericWriter class PerformCalibration: @@ -76,7 +76,7 @@ def _validate_per_spectra_array(self, array: DataArray, coordinates_2d: NDArray[ raise ValueError(errors) def calibrate_image( - self, read_peaks: ImzmlReadFile, write_file: ImzmlWriteFile, read_full: Optional[ImzmlReadFile] = None + self, read_peaks: GenericReadFile, write_file: GenericWriteFile, read_full: Optional[GenericReadFile] = None ) -> None: if read_full is None: read_full = read_peaks @@ -99,7 +99,7 @@ def calibrate_image( logger.info("Applying models...") self._apply_all_models(read_file=read_full, write_file=write_file, all_model_coefs=model_coefs) - def _extract_all_features(self, read_peaks: ImzmlReadFile) -> DataArray: + def _extract_all_features(self, read_peaks: GenericReadFile) -> DataArray: read_parallel = ReadSpectraParallel.from_config(self._parallel_config) all_features = read_parallel.map_chunked( read_file=read_peaks, @@ -114,7 +114,7 @@ def _extract_all_features(self, read_peaks: ImzmlReadFile) -> DataArray: ) def _apply_all_models( - self, read_file: ImzmlReadFile, write_file: ImzmlWriteFile, all_model_coefs: DataArray + self, read_file: GenericReadFile, write_file: GenericWriteFile, all_model_coefs: DataArray ) -> None: write_parallel = WriteSpectraParallel.from_config(self._parallel_config) write_parallel.map_chunked_to_file( @@ -155,7 +155,7 @@ def _write_data_array(self, array: DataArray, group: str) -> None: @staticmethod def _extract_chunk_features( - reader: ImzmlReader, + reader: GenericReader, spectra_indices: list[int], calibration: CalibrationMethod, ) -> DataArray: @@ -170,9 +170,9 @@ def _extract_chunk_features( @staticmethod def _calibrate_spectra( - reader: ImzmlReader, + reader: GenericReader, spectra_indices: list[int], - writer: ImzmlWriter, + writer: GenericWriter, calibration: CalibrationMethod, all_model_coefs: DataArray, ) -> None: diff --git a/src/depiction/estimate_ppm_error.py b/src/depiction/estimate_ppm_error.py index 5654275..673dc89 100644 --- a/src/depiction/estimate_ppm_error.py +++ b/src/depiction/estimate_ppm_error.py @@ -1,11 +1,15 @@ -from typing import Optional +from __future__ import annotations + from collections.abc import Sequence +from typing import Optional, TYPE_CHECKING import numpy as np from depiction.parallel_ops import ParallelConfig from depiction.parallel_ops.read_spectra_parallel import ReadSpectraParallel -from depiction.persistence import ImzmlReader, ImzmlReadFile + +if TYPE_CHECKING: + from depiction.persistence.types import GenericReadFile, GenericReader class EstimatePPMError: @@ -16,7 +20,7 @@ def __init__(self, parallel_config: Optional[ParallelConfig] = None) -> None: parallel_config = ParallelConfig.no_parallelism() self._parallel_config = parallel_config - def estimate(self, read_file: ImzmlReadFile) -> dict[str, float]: + def estimate(self, read_file: GenericReadFile) -> dict[str, float]: """ Estimates the PPM error for the given imzML file. Returns a dictionary containing the median and std of the PPM error medians (for each spectrum). @@ -34,7 +38,7 @@ def estimate(self, read_file: ImzmlReadFile) -> dict[str, float]: @staticmethod def _get_ppm_values( - reader: ImzmlReader, + reader: GenericReader, spectra_ids: Sequence[int], ) -> tuple[list[float], float, float]: result_ppm = [] diff --git a/src/depiction/misc/experimental/resample_mass_axis.py b/src/depiction/misc/experimental/resample_mass_axis.py index b89629b..c6952c3 100644 --- a/src/depiction/misc/experimental/resample_mass_axis.py +++ b/src/depiction/misc/experimental/resample_mass_axis.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from dataclasses import dataclass import numpy as np @@ -5,7 +7,8 @@ from scipy.interpolate import CubicSpline from depiction.parallel_ops import ParallelConfig, WriteSpectraParallel -from depiction.persistence import ImzmlReadFile, ImzmlWriteFile, ImzmlReader, ImzmlWriter, ImzmlModeEnum +from depiction.persistence import ImzmlModeEnum +from depiction.persistence.types import GenericWriteFile, GenericReadFile, GenericWriter, GenericReader @dataclass @@ -20,8 +23,8 @@ def evaluate_spectrum(self, mz_arr: NDArray[float], int_arr: NDArray[float]) -> def evaluate_file( self, - read_file: ImzmlReadFile, - write_file: ImzmlWriteFile, + read_file: GenericReadFile, + write_file: GenericWriteFile, parallel_config: ParallelConfig, allow_processed: bool = False, ) -> None: @@ -40,7 +43,7 @@ def evaluate_file( @classmethod def _evaluate_file_chunk( - cls, reader: ImzmlReader, spectra_ids: list[int], writer: ImzmlWriter, target_mz_arr: NDArray[float] + cls, reader: GenericReader, spectra_ids: list[int], writer: GenericWriter, target_mz_arr: NDArray[float] ) -> None: resampler = ResampleMassAxis(target_mz_arr=target_mz_arr) diff --git a/src/depiction/parallel_ops/read_spectra_parallel.py b/src/depiction/parallel_ops/read_spectra_parallel.py index ab49e7f..453ff6c 100644 --- a/src/depiction/parallel_ops/read_spectra_parallel.py +++ b/src/depiction/parallel_ops/read_spectra_parallel.py @@ -15,7 +15,7 @@ if TYPE_CHECKING: from numpy.typing import NDArray - from depiction.persistence import ImzmlReadFile, ImzmlReader + from depiction.persistence.types import GenericReadFile, GenericReader T = TypeVar("T") V = TypeVar("V") @@ -47,8 +47,8 @@ def config(self) -> ParallelConfig: def map_chunked( self, - read_file: ImzmlReadFile, - operation: Callable[[ImzmlReader, list[int], ...], T] | Callable[[ImzmlReader, list[int], int, ...], T], + read_file: GenericReadFile, + operation: Callable[[GenericReader, list[int], ...], T] | Callable[[GenericReader, list[int], int, ...], T], spectra_indices: NDArray[int] | None = None, bind_args: dict[str, Any] | None = None, reduce_fn: Callable[[list[T]], V] = list, @@ -58,8 +58,8 @@ def map_chunked( :param read_file: the file to read the spectra from :param operation: the operation to apply to each chunk of spectra there are two possible signatures for the operation: - - operation(reader: ImzmlReader, spectra_ids: list[int], **kwargs) -> T - - operation(reader: ImzmlReader, spectra_ids: list[int], task_index: int, **kwargs) -> T + - operation(reader: GenericReader, spectra_ids: list[int], **kwargs) -> T + - operation(reader: GenericReader, spectra_ids: list[int], task_index: int, **kwargs) -> T where: - reader: the reader object to read the spectra from - spectra_ids: the indices of the spectra to process diff --git a/src/depiction/parallel_ops/write_spectra_parallel.py b/src/depiction/parallel_ops/write_spectra_parallel.py index 8608d07..0d45c40 100644 --- a/src/depiction/parallel_ops/write_spectra_parallel.py +++ b/src/depiction/parallel_ops/write_spectra_parallel.py @@ -10,8 +10,6 @@ from depiction.persistence import ( ImzmlReadFile, ImzmlWriteFile, - ImzmlReader, - ImzmlWriter, ImzmlModeEnum, ) from depiction.tools.merge_imzml import MergeImzml @@ -19,6 +17,7 @@ if TYPE_CHECKING: from numpy.typing import NDArray from depiction.parallel_ops.parallel_config import ParallelConfig + from depiction.persistence.types import GenericReadFile, GenericWriteFile, GenericWriter, GenericReader class WriteSpectraParallel: @@ -31,11 +30,11 @@ def from_config(cls, config: ParallelConfig) -> WriteSpectraParallel: def map_chunked_to_files( self, - read_file: ImzmlReadFile, - write_files: list[ImzmlWriteFile], + read_file: GenericReadFile, + write_files: list[GenericWriteFile], operation: ( - Callable[[ImzmlReader, list[int], list[ImzmlWriter], ...], None] - | Callable[[ImzmlReader, list[int], list[ImzmlWriteFile], ...], None] + Callable[[GenericReader, list[int], list[GenericWriter], ...], None] + | Callable[[GenericReader, list[int], list[GenericWriteFile], ...], None] ), spectra_indices: NDArray[int] | None = None, bind_args: dict[str, Any] | None = None, @@ -82,16 +81,16 @@ def map_chunked_to_files( def map_chunked_external_to_files( self, - read_file: ImzmlReadFile, - write_files: list[ImzmlWriteFile], + read_file: GenericReadFile, + write_files: list[GenericWriteFile], operation: Callable[[str, list[str]], None], spectra_indices: NDArray[int] | None = None, bind_args: dict[str, Any] | None = None, ) -> None: def op( - reader: ImzmlReader, + reader: GenericReader, spectra_ids: list[int], - write_files: list[ImzmlWriteFile], + write_files: list[GenericWriteFile], **kwargs: dict[str, Any], ) -> None: # TODO maybe kwarg handling could be done a bit more clean here in the future @@ -122,8 +121,8 @@ def op( def _get_split_modes_and_paths( self, work_directory: Path, - read_file: ImzmlReadFile, - write_files: list[ImzmlWriteFile], + read_file: GenericReadFile, + write_files: list[GenericWriteFile], spectra_indices: NDArray[int] | None, ) -> list[tuple[ImzmlModeEnum, list[Path]]]: # determine the number of tasks @@ -143,12 +142,12 @@ def _get_split_modes_and_paths( @staticmethod def _write_transformed_chunked_operation( - reader: ImzmlReader, + reader: GenericReader, spectra_indices: list[int], task_index: int, operation: ( - Callable[[ImzmlReader, list[int], list[ImzmlWriter], ...], None] - | Callable[[ImzmlReader, list[int], list[ImzmlWriteFile], ...], None] + Callable[[GenericReader, list[int], list[GenericWriter], ...], None] + | Callable[[GenericReader, list[int], list[GenericWriteFile], ...], None] ), open_write_files: bool, split_modes_and_paths: list[tuple[ImzmlModeEnum, list[Path]]], @@ -179,7 +178,7 @@ def _write_transformed_chunked_operation( def _merge_results( self, split_modes_and_paths: list[tuple[ImzmlModeEnum, list[str]]], - write_files: list[ImzmlWriteFile], + write_files: list[GenericWriteFile], ) -> None: """Merges the results of the parallel operations :param split_modes_and_paths: the split modes and paths @@ -193,14 +192,14 @@ def _merge_results( def map_chunked_to_file( self, - read_file: ImzmlReadFile, - write_file: ImzmlWriteFile, - operation: Callable[[ImzmlReader, list[int], ImzmlWriter], None], + read_file: GenericReadFile, + write_file: GenericWriteFile, + operation: Callable[[GenericReader, list[int], GenericWriter], None], spectra_indices: NDArray[int] | None = None, bind_args: dict[str, Any] | None = None, ) -> None: def wrap_operation( - reader: ImzmlReader, spectra_ids: list[int], writers: list[ImzmlWriter], **kwargs: dict[str, Any] + reader: GenericReader, spectra_ids: list[int], writers: list[GenericWriter], **kwargs: dict[str, Any] ) -> None: operation(reader, spectra_ids, writers[0], **kwargs) diff --git a/src/depiction/spectrum/baseline/tophat_baseline.py b/src/depiction/spectrum/baseline/tophat_baseline.py index 34110dd..f9af0cc 100644 --- a/src/depiction/spectrum/baseline/tophat_baseline.py +++ b/src/depiction/spectrum/baseline/tophat_baseline.py @@ -1,3 +1,4 @@ +from __future__ import annotations from dataclasses import dataclass from typing import Literal @@ -5,8 +6,8 @@ from numba import njit from numpy.typing import NDArray +from depiction.persistence.types import GenericReadFile from depiction.spectrum.baseline.baseline import Baseline -from depiction.persistence import ImzmlReadFile @dataclass(frozen=True) @@ -44,7 +45,7 @@ def get_element_size(self, mz_arr: NDArray[float]) -> int: else: raise ValueError(f"Invalid {self.window_unit=}") - def optimize_window_size(self, read_file: ImzmlReadFile, n_spectra: int, rng_seed: int = 0) -> int: + def optimize_window_size(self, read_file: GenericReadFile, n_spectra: int, rng_seed: int = 0) -> int: """Optimizes the window size for the provided file, by considering some random spectra. It's possible to set the value even to 1, if only one spectrum should be considered. :param read_file: The file to optimize the window size for. diff --git a/src/depiction/spectrum/evaluate_bins.py b/src/depiction/spectrum/evaluate_bins.py index d5fa65f..a23d871 100644 --- a/src/depiction/spectrum/evaluate_bins.py +++ b/src/depiction/spectrum/evaluate_bins.py @@ -1,3 +1,4 @@ +from __future__ import annotations import enum import numba @@ -5,7 +6,7 @@ from numpy.typing import NDArray from depiction.parallel_ops import ParallelConfig, WriteSpectraParallel -from depiction.persistence import ImzmlReadFile, ImzmlWriteFile, ImzmlReader, ImzmlWriter +from depiction.persistence.types import GenericReadFile, GenericWriteFile, GenericReader, GenericWriter class BinStatistic(enum.Enum): @@ -47,7 +48,7 @@ def evaluate(self, mz_arr: NDArray[float], int_arr: NDArray[float]) -> NDArray[f ) def evaluate_file( - self, read_file: ImzmlReadFile, write_file: ImzmlWriteFile, parallel_config: ParallelConfig + self, read_file: GenericReadFile, write_file: GenericWriteFile, parallel_config: ParallelConfig ) -> None: write_parallel = WriteSpectraParallel.from_config(parallel_config) write_parallel.map_chunked_to_file( @@ -62,9 +63,9 @@ def evaluate_file( @staticmethod def _compute_chunk( - reader: ImzmlReader, + reader: GenericReader, spectra_ids: list[int], - writer: ImzmlWriter, + writer: GenericWriter, bin_edges: NDArray[float], statistic: int, ) -> None: diff --git a/src/depiction/spectrum/evaluate_mean_spectrum.py b/src/depiction/spectrum/evaluate_mean_spectrum.py index 861c1a8..e75fdb0 100644 --- a/src/depiction/spectrum/evaluate_mean_spectrum.py +++ b/src/depiction/spectrum/evaluate_mean_spectrum.py @@ -1,14 +1,16 @@ # TODO this might need to be refactored in the future, especially how binning is mixed into this +from __future__ import annotations import functools from typing import Optional import numpy as np from numpy.typing import NDArray -from depiction.spectrum.evaluate_bins import EvaluateBins from depiction.parallel_ops import ParallelConfig from depiction.parallel_ops.read_spectra_parallel import ReadSpectraParallel -from depiction.persistence import ImzmlReader, ImzmlReadFile, ImzmlModeEnum +from depiction.persistence import ImzmlModeEnum +from depiction.persistence.types import GenericReadFile, GenericReader +from depiction.spectrum.evaluate_bins import EvaluateBins class EvaluateMeanSpectrum: @@ -24,7 +26,7 @@ def __init__( self._parallel_config = parallel_config self._eval_bins = eval_bins - def evaluate_file(self, input_file: ImzmlReadFile) -> tuple[NDArray[float], NDArray[float]]: + def evaluate_file(self, input_file: GenericReadFile) -> tuple[NDArray[float], NDArray[float]]: if input_file.imzml_mode != ImzmlModeEnum.CONTINUOUS and self._eval_bins is None: raise ValueError("Input file must be in 'continuous' mode.") @@ -42,7 +44,7 @@ def evaluate_file(self, input_file: ImzmlReadFile) -> tuple[NDArray[float], NDAr int_arr = total_sum / input_file.n_spectra return mz_arr, int_arr - def _get_result_mz_arr(self, input_file: ImzmlReadFile) -> NDArray[float]: + def _get_result_mz_arr(self, input_file: GenericReadFile) -> NDArray[float]: """Returns the m/z array for the result.""" if self._eval_bins is None: with input_file.reader() as reader: @@ -54,7 +56,7 @@ def _get_result_mz_arr(self, input_file: ImzmlReadFile) -> NDArray[float]: @classmethod def _get_spectra_sum( cls, - input_file: ImzmlReadFile, + input_file: GenericReadFile, parallel_config: ParallelConfig, eval_bins: Optional[EvaluateBins], ) -> NDArray[float]: @@ -68,7 +70,7 @@ def _get_spectra_sum( @staticmethod def _compute_chunk_sum( - reader: ImzmlReader, spectra_ids: list[int], eval_bins: Optional[EvaluateBins] + reader: GenericReader, spectra_ids: list[int], eval_bins: Optional[EvaluateBins] ) -> NDArray[float]: if eval_bins is None: chunk_sum = np.array(reader.get_spectrum_int(spectra_ids[0]), copy=True) diff --git a/src/depiction/tools/align_imzml.py b/src/depiction/tools/align_imzml.py index 0d8ad15..e96cb06 100644 --- a/src/depiction/tools/align_imzml.py +++ b/src/depiction/tools/align_imzml.py @@ -8,6 +8,7 @@ from tqdm import tqdm from depiction.estimate_ppm_error import EstimatePPMError +from depiction.persistence.types import GenericReadFile from depiction.spectrum.evaluate_bins import EvaluateBins from depiction.parallel_ops.parallel_config import ParallelConfig from depiction.parallel_ops.write_spectra_parallel import WriteSpectraParallel