Skip to content

Commit

Permalink
use the new types
Browse files Browse the repository at this point in the history
  • Loading branch information
leoschwarz committed Jul 8, 2024
1 parent 78568fc commit 7e1135a
Show file tree
Hide file tree
Showing 11 changed files with 69 additions and 56 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from depiction.parallel_ops import ParallelConfig, WriteSpectraParallel
from depiction.persistence import ImzmlWriteFile, ImzmlReadFile, ImzmlReader, ImzmlWriter
from depiction.persistence.types import GenericReadFile, GenericWriteFile


# TODO experimental/non-prod
Expand Down Expand Up @@ -140,8 +141,8 @@ def align_masses(

def align_masses_all(
self,
read_file: ImzmlReadFile,
write_file: ImzmlWriteFile,
read_file: GenericReadFile,
write_file: GenericWriteFile,
parallel_config: ParallelConfig,
) -> None:
"""Applies `align_masses` to all spectra in the given file and writes the results to the output file."""
Expand Down
5 changes: 3 additions & 2 deletions src/depiction/calibration/isotope_pattern_matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import numpy as np
from numpy.typing import NDArray

from depiction.persistence.types import GenericReader, GenericReadFile
from depiction.spectrum.peak_picking.basic_peak_picker import BasicPeakPicker
from depiction.misc.numpy_util import NumpyUtil
from depiction.parallel_ops import ParallelConfig, ReadSpectraParallel
Expand Down Expand Up @@ -101,7 +102,7 @@ def compute_averagine_agreement_at_positions(

def compute_averagine_agreement_at_mz_positions_for_file(
self,
read_file: ImzmlReadFile,
read_file: GenericReadFile,
parallel_config: ParallelConfig,
peak_picker: BasicPeakPicker,
n_limit: int,
Expand All @@ -111,7 +112,7 @@ def compute_averagine_agreement_at_mz_positions_for_file(
) -> list[tuple[NDArray[float], NDArray[int]]]:
# TODO possibly move this method in the future (since it mixes peak_picker into this class)

def operation_file(reader: ImzmlReader, spectra_ids: list[int]) -> list[tuple[NDArray[float], NDArray[int]]]:
def operation_file(reader: GenericReader, spectra_ids: list[int]) -> list[tuple[NDArray[float], NDArray[int]]]:
results = []
for spectrum_id in spectra_ids:
mz_arr, int_arr = reader.get_spectrum(spectrum_id)
Expand Down
14 changes: 7 additions & 7 deletions src/depiction/calibration/perform_calibration.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from depiction.calibration.calibration_method import CalibrationMethod
from depiction.parallel_ops import ParallelConfig, ReadSpectraParallel, WriteSpectraParallel
from depiction.parallel_ops.parallel_map import ParallelMap
from depiction.persistence import ImzmlReadFile, ImzmlWriteFile, ImzmlReader, ImzmlWriter
from depiction.persistence.types import GenericReadFile, GenericWriteFile, GenericReader, GenericWriter


class PerformCalibration:
Expand Down Expand Up @@ -76,7 +76,7 @@ def _validate_per_spectra_array(self, array: DataArray, coordinates_2d: NDArray[
raise ValueError(errors)

def calibrate_image(
self, read_peaks: ImzmlReadFile, write_file: ImzmlWriteFile, read_full: Optional[ImzmlReadFile] = None
self, read_peaks: GenericReadFile, write_file: GenericWriteFile, read_full: Optional[GenericReadFile] = None
) -> None:
if read_full is None:
read_full = read_peaks
Expand All @@ -99,7 +99,7 @@ def calibrate_image(
logger.info("Applying models...")
self._apply_all_models(read_file=read_full, write_file=write_file, all_model_coefs=model_coefs)

def _extract_all_features(self, read_peaks: ImzmlReadFile) -> DataArray:
def _extract_all_features(self, read_peaks: GenericReadFile) -> DataArray:
read_parallel = ReadSpectraParallel.from_config(self._parallel_config)
all_features = read_parallel.map_chunked(
read_file=read_peaks,
Expand All @@ -114,7 +114,7 @@ def _extract_all_features(self, read_peaks: ImzmlReadFile) -> DataArray:
)

def _apply_all_models(
self, read_file: ImzmlReadFile, write_file: ImzmlWriteFile, all_model_coefs: DataArray
self, read_file: GenericReadFile, write_file: GenericWriteFile, all_model_coefs: DataArray
) -> None:
write_parallel = WriteSpectraParallel.from_config(self._parallel_config)
write_parallel.map_chunked_to_file(
Expand Down Expand Up @@ -155,7 +155,7 @@ def _write_data_array(self, array: DataArray, group: str) -> None:

@staticmethod
def _extract_chunk_features(
reader: ImzmlReader,
reader: GenericReader,
spectra_indices: list[int],
calibration: CalibrationMethod,
) -> DataArray:
Expand All @@ -170,9 +170,9 @@ def _extract_chunk_features(

@staticmethod
def _calibrate_spectra(
reader: ImzmlReader,
reader: GenericReader,
spectra_indices: list[int],
writer: ImzmlWriter,
writer: GenericWriter,
calibration: CalibrationMethod,
all_model_coefs: DataArray,
) -> None:
Expand Down
12 changes: 8 additions & 4 deletions src/depiction/estimate_ppm_error.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
from typing import Optional
from __future__ import annotations

from collections.abc import Sequence
from typing import Optional, TYPE_CHECKING

import numpy as np

from depiction.parallel_ops import ParallelConfig
from depiction.parallel_ops.read_spectra_parallel import ReadSpectraParallel
from depiction.persistence import ImzmlReader, ImzmlReadFile

if TYPE_CHECKING:
from depiction.persistence.types import GenericReadFile, GenericReader


class EstimatePPMError:
Expand All @@ -16,7 +20,7 @@ def __init__(self, parallel_config: Optional[ParallelConfig] = None) -> None:
parallel_config = ParallelConfig.no_parallelism()
self._parallel_config = parallel_config

def estimate(self, read_file: ImzmlReadFile) -> dict[str, float]:
def estimate(self, read_file: GenericReadFile) -> dict[str, float]:
"""
Estimates the PPM error for the given imzML file.
Returns a dictionary containing the median and std of the PPM error medians (for each spectrum).
Expand All @@ -34,7 +38,7 @@ def estimate(self, read_file: ImzmlReadFile) -> dict[str, float]:

@staticmethod
def _get_ppm_values(
reader: ImzmlReader,
reader: GenericReader,
spectra_ids: Sequence[int],
) -> tuple[list[float], float, float]:
result_ppm = []
Expand Down
11 changes: 7 additions & 4 deletions src/depiction/misc/experimental/resample_mass_axis.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
from __future__ import annotations

from dataclasses import dataclass

import numpy as np
from numpy.typing import NDArray
from scipy.interpolate import CubicSpline

from depiction.parallel_ops import ParallelConfig, WriteSpectraParallel
from depiction.persistence import ImzmlReadFile, ImzmlWriteFile, ImzmlReader, ImzmlWriter, ImzmlModeEnum
from depiction.persistence import ImzmlModeEnum
from depiction.persistence.types import GenericWriteFile, GenericReadFile, GenericWriter, GenericReader


@dataclass
Expand All @@ -20,8 +23,8 @@ def evaluate_spectrum(self, mz_arr: NDArray[float], int_arr: NDArray[float]) ->

def evaluate_file(
self,
read_file: ImzmlReadFile,
write_file: ImzmlWriteFile,
read_file: GenericReadFile,
write_file: GenericWriteFile,
parallel_config: ParallelConfig,
allow_processed: bool = False,
) -> None:
Expand All @@ -40,7 +43,7 @@ def evaluate_file(

@classmethod
def _evaluate_file_chunk(
cls, reader: ImzmlReader, spectra_ids: list[int], writer: ImzmlWriter, target_mz_arr: NDArray[float]
cls, reader: GenericReader, spectra_ids: list[int], writer: GenericWriter, target_mz_arr: NDArray[float]
) -> None:
resampler = ResampleMassAxis(target_mz_arr=target_mz_arr)

Expand Down
10 changes: 5 additions & 5 deletions src/depiction/parallel_ops/read_spectra_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

if TYPE_CHECKING:
from numpy.typing import NDArray
from depiction.persistence import ImzmlReadFile, ImzmlReader
from depiction.persistence.types import GenericReadFile, GenericReader

T = TypeVar("T")
V = TypeVar("V")
Expand Down Expand Up @@ -47,8 +47,8 @@ def config(self) -> ParallelConfig:

def map_chunked(
self,
read_file: ImzmlReadFile,
operation: Callable[[ImzmlReader, list[int], ...], T] | Callable[[ImzmlReader, list[int], int, ...], T],
read_file: GenericReadFile,
operation: Callable[[GenericReader, list[int], ...], T] | Callable[[GenericReader, list[int], int, ...], T],
spectra_indices: NDArray[int] | None = None,
bind_args: dict[str, Any] | None = None,
reduce_fn: Callable[[list[T]], V] = list,
Expand All @@ -58,8 +58,8 @@ def map_chunked(
:param read_file: the file to read the spectra from
:param operation: the operation to apply to each chunk of spectra
there are two possible signatures for the operation:
- operation(reader: ImzmlReader, spectra_ids: list[int], **kwargs) -> T
- operation(reader: ImzmlReader, spectra_ids: list[int], task_index: int, **kwargs) -> T
- operation(reader: GenericReader, spectra_ids: list[int], **kwargs) -> T
- operation(reader: GenericReader, spectra_ids: list[int], task_index: int, **kwargs) -> T
where:
- reader: the reader object to read the spectra from
- spectra_ids: the indices of the spectra to process
Expand Down
39 changes: 19 additions & 20 deletions src/depiction/parallel_ops/write_spectra_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,14 @@
from depiction.persistence import (
ImzmlReadFile,
ImzmlWriteFile,
ImzmlReader,
ImzmlWriter,
ImzmlModeEnum,
)
from depiction.tools.merge_imzml import MergeImzml

if TYPE_CHECKING:
from numpy.typing import NDArray
from depiction.parallel_ops.parallel_config import ParallelConfig
from depiction.persistence.types import GenericReadFile, GenericWriteFile, GenericWriter, GenericReader


class WriteSpectraParallel:
Expand All @@ -31,11 +30,11 @@ def from_config(cls, config: ParallelConfig) -> WriteSpectraParallel:

def map_chunked_to_files(
self,
read_file: ImzmlReadFile,
write_files: list[ImzmlWriteFile],
read_file: GenericReadFile,
write_files: list[GenericWriteFile],
operation: (
Callable[[ImzmlReader, list[int], list[ImzmlWriter], ...], None]
| Callable[[ImzmlReader, list[int], list[ImzmlWriteFile], ...], None]
Callable[[GenericReader, list[int], list[GenericWriter], ...], None]
| Callable[[GenericReader, list[int], list[GenericWriteFile], ...], None]
),
spectra_indices: NDArray[int] | None = None,
bind_args: dict[str, Any] | None = None,
Expand Down Expand Up @@ -82,16 +81,16 @@ def map_chunked_to_files(

def map_chunked_external_to_files(
self,
read_file: ImzmlReadFile,
write_files: list[ImzmlWriteFile],
read_file: GenericReadFile,
write_files: list[GenericWriteFile],
operation: Callable[[str, list[str]], None],
spectra_indices: NDArray[int] | None = None,
bind_args: dict[str, Any] | None = None,
) -> None:
def op(
reader: ImzmlReader,
reader: GenericReader,
spectra_ids: list[int],
write_files: list[ImzmlWriteFile],
write_files: list[GenericWriteFile],
**kwargs: dict[str, Any],
) -> None:
# TODO maybe kwarg handling could be done a bit more clean here in the future
Expand Down Expand Up @@ -122,8 +121,8 @@ def op(
def _get_split_modes_and_paths(
self,
work_directory: Path,
read_file: ImzmlReadFile,
write_files: list[ImzmlWriteFile],
read_file: GenericReadFile,
write_files: list[GenericWriteFile],
spectra_indices: NDArray[int] | None,
) -> list[tuple[ImzmlModeEnum, list[Path]]]:
# determine the number of tasks
Expand All @@ -143,12 +142,12 @@ def _get_split_modes_and_paths(

@staticmethod
def _write_transformed_chunked_operation(
reader: ImzmlReader,
reader: GenericReader,
spectra_indices: list[int],
task_index: int,
operation: (
Callable[[ImzmlReader, list[int], list[ImzmlWriter], ...], None]
| Callable[[ImzmlReader, list[int], list[ImzmlWriteFile], ...], None]
Callable[[GenericReader, list[int], list[GenericWriter], ...], None]
| Callable[[GenericReader, list[int], list[GenericWriteFile], ...], None]
),
open_write_files: bool,
split_modes_and_paths: list[tuple[ImzmlModeEnum, list[Path]]],
Expand Down Expand Up @@ -179,7 +178,7 @@ def _write_transformed_chunked_operation(
def _merge_results(
self,
split_modes_and_paths: list[tuple[ImzmlModeEnum, list[str]]],
write_files: list[ImzmlWriteFile],
write_files: list[GenericWriteFile],
) -> None:
"""Merges the results of the parallel operations
:param split_modes_and_paths: the split modes and paths
Expand All @@ -193,14 +192,14 @@ def _merge_results(

def map_chunked_to_file(
self,
read_file: ImzmlReadFile,
write_file: ImzmlWriteFile,
operation: Callable[[ImzmlReader, list[int], ImzmlWriter], None],
read_file: GenericReadFile,
write_file: GenericWriteFile,
operation: Callable[[GenericReader, list[int], GenericWriter], None],
spectra_indices: NDArray[int] | None = None,
bind_args: dict[str, Any] | None = None,
) -> None:
def wrap_operation(
reader: ImzmlReader, spectra_ids: list[int], writers: list[ImzmlWriter], **kwargs: dict[str, Any]
reader: GenericReader, spectra_ids: list[int], writers: list[GenericWriter], **kwargs: dict[str, Any]
) -> None:
operation(reader, spectra_ids, writers[0], **kwargs)

Expand Down
5 changes: 3 additions & 2 deletions src/depiction/spectrum/baseline/tophat_baseline.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Literal

import numpy as np
from numba import njit
from numpy.typing import NDArray

from depiction.persistence.types import GenericReadFile
from depiction.spectrum.baseline.baseline import Baseline
from depiction.persistence import ImzmlReadFile


@dataclass(frozen=True)
Expand Down Expand Up @@ -44,7 +45,7 @@ def get_element_size(self, mz_arr: NDArray[float]) -> int:
else:
raise ValueError(f"Invalid {self.window_unit=}")

def optimize_window_size(self, read_file: ImzmlReadFile, n_spectra: int, rng_seed: int = 0) -> int:
def optimize_window_size(self, read_file: GenericReadFile, n_spectra: int, rng_seed: int = 0) -> int:
"""Optimizes the window size for the provided file, by considering some random spectra. It's possible to set the
value even to 1, if only one spectrum should be considered.
:param read_file: The file to optimize the window size for.
Expand Down
9 changes: 5 additions & 4 deletions src/depiction/spectrum/evaluate_bins.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from __future__ import annotations
import enum

import numba
import numpy as np
from numpy.typing import NDArray

from depiction.parallel_ops import ParallelConfig, WriteSpectraParallel
from depiction.persistence import ImzmlReadFile, ImzmlWriteFile, ImzmlReader, ImzmlWriter
from depiction.persistence.types import GenericReadFile, GenericWriteFile, GenericReader, GenericWriter


class BinStatistic(enum.Enum):
Expand Down Expand Up @@ -47,7 +48,7 @@ def evaluate(self, mz_arr: NDArray[float], int_arr: NDArray[float]) -> NDArray[f
)

def evaluate_file(
self, read_file: ImzmlReadFile, write_file: ImzmlWriteFile, parallel_config: ParallelConfig
self, read_file: GenericReadFile, write_file: GenericWriteFile, parallel_config: ParallelConfig
) -> None:
write_parallel = WriteSpectraParallel.from_config(parallel_config)
write_parallel.map_chunked_to_file(
Expand All @@ -62,9 +63,9 @@ def evaluate_file(

@staticmethod
def _compute_chunk(
reader: ImzmlReader,
reader: GenericReader,
spectra_ids: list[int],
writer: ImzmlWriter,
writer: GenericWriter,
bin_edges: NDArray[float],
statistic: int,
) -> None:
Expand Down
Loading

0 comments on commit 7e1135a

Please sign in to comment.