diff --git a/pyproject.toml b/pyproject.toml index 980d787..28ae232 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,6 +61,7 @@ dev = [ "tifffile>=2024.2.12", "xmltodict", "licensecheck", + "ms-peak-picker" ] # potentially relevant in the future again: diff --git a/src/depiction/spectrum/peak_picking/ms_peak_picker_wrapper.py b/src/depiction/spectrum/peak_picking/ms_peak_picker_wrapper.py new file mode 100644 index 0000000..293186c --- /dev/null +++ b/src/depiction/spectrum/peak_picking/ms_peak_picker_wrapper.py @@ -0,0 +1,42 @@ +# TODO better name + +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING + +import loguru +import ms_peak_picker +import numpy as np +import scipy + +from depiction.spectrum.peak_picking.basic_peak_picker import BasicPeakPicker + +if TYPE_CHECKING: + from numpy.typing import NDArray + from depiction.spectrum.peak_filtering import PeakFilteringType + + +@dataclass +class MSPeakPicker: + """Interfaces to the ms-peak-picker Python package. + TODO proper description and better name + """ + + fit_type: str = "quadratic" + peak_filtering: PeakFilteringType | None = None + + def pick_peaks(self, mz_arr: NDArray[float], int_arr: NDArray[float]) -> tuple[NDArray[float], NDArray[float]]: + peak_list = ms_peak_picker.pick_peaks(mz_arr, int_arr, fit_type=self.fit_type) + peak_mz = np.array([peak.mz for peak in peak_list]) + peak_int = np.array([peak.intensity for peak in peak_list]) + + if self.peak_filtering is not None: + peak_mz, peak_int = self.peak_filtering.filter_peaks( + spectrum_mz_arr=mz_arr, + spectrum_int_arr=int_arr, + peak_mz_arr=peak_mz, + peak_int_arr=peak_int, + ) + + return peak_mz, peak_int diff --git a/src/depiction_targeted_preproc/pipeline_config/model.py b/src/depiction_targeted_preproc/pipeline_config/model.py index c7a8739..0cbfb30 100644 --- a/src/depiction_targeted_preproc/pipeline_config/model.py +++ b/src/depiction_targeted_preproc/pipeline_config/model.py @@ -28,7 +28,15 @@ class PeakPickerBasicInterpolated(BaseModel): # but postpone for later) -PeakPicker = Annotated[None | PeakPickerBasicInterpolated, Field(discriminator="peak_picker_type")] +class PeakPickerMSPeakPicker(BaseModel): + peak_picker_type: Literal["MSPeakPicker"] + fit_type: Literal["quadratic"] = "quadratic" + + +PeakPicker = Annotated[ + None | PeakPickerBasicInterpolated | PeakPickerMSPeakPicker, + Field(discriminator="peak_picker_type") +] class CalibrationRegressShift(BaseModel): diff --git a/src/depiction_targeted_preproc/workflow/proc/pick_peaks.py b/src/depiction_targeted_preproc/workflow/proc/pick_peaks.py index 3f0d80a..cfdfb95 100644 --- a/src/depiction_targeted_preproc/workflow/proc/pick_peaks.py +++ b/src/depiction_targeted_preproc/workflow/proc/pick_peaks.py @@ -6,11 +6,11 @@ from loguru import logger from depiction.parallel_ops import ParallelConfig +from depiction.persistence import ImzmlModeEnum, ImzmlWriteFile, ImzmlReadFile from depiction.spectrum.peak_filtering import FilterNHighestIntensityPartitioned from depiction.spectrum.peak_picking import BasicInterpolatedPeakPicker -from depiction.persistence import ImzmlModeEnum, ImzmlWriteFile, ImzmlReadFile +from depiction.spectrum.peak_picking.ms_peak_picker_wrapper import MSPeakPicker from depiction.tools.pick_peaks import PickPeaks - from depiction_targeted_preproc.pipeline_config import model from depiction_targeted_preproc.pipeline_config.model import PipelineParameters @@ -21,30 +21,34 @@ def proc_pick_peaks( output_imzml_path: Annotated[Path, typer.Option()], ) -> None: config = PipelineParameters.parse_yaml(config_path) + + # TODO configurable + peak_filtering = FilterNHighestIntensityPartitioned(max_count=200, n_partitions=8), + parallel_config = ParallelConfig(n_jobs=config.n_jobs, task_size=None) + match config.peak_picker: case None: logger.info("Peak picking is deactivated") shutil.copy(input_imzml_path, output_imzml_path) shutil.copy(input_imzml_path.with_suffix(".ibd"), output_imzml_path.with_suffix(".ibd")) + return case model.PeakPickerBasicInterpolated() as peak_picker_config: peak_picker = BasicInterpolatedPeakPicker( min_prominence=peak_picker_config.min_prominence, min_distance=peak_picker_config.min_distance, min_distance_unit=peak_picker_config.min_distance_unit, - # TODO configurable - peak_filtering=FilterNHighestIntensityPartitioned(max_count=200, n_partitions=8), - ) - parallel_config = ParallelConfig(n_jobs=config.n_jobs, task_size=None) - pick_peaks = PickPeaks( - peak_picker=peak_picker, - parallel_config=parallel_config, + peak_filtering=peak_filtering, ) - read_file = ImzmlReadFile(input_imzml_path) - write_file = ImzmlWriteFile(output_imzml_path, imzml_mode=ImzmlModeEnum.PROCESSED) - pick_peaks.evaluate_file(read_file, write_file) + case model.PeakPickerMSPeakPicker() as peak_picker_config: + peak_picker = MSPeakPicker(fit_type=peak_picker_config.fit_type, peak_filtering=peak_filtering) case _: raise ValueError(f"Unsupported peak picker type: {config.peak_picker.peak_picker_type}") + pick_peaks = PickPeaks(peak_picker=peak_picker, parallel_config=parallel_config) + read_file = ImzmlReadFile(input_imzml_path) + write_file = ImzmlWriteFile(output_imzml_path, imzml_mode=ImzmlModeEnum.PROCESSED) + pick_peaks.evaluate_file(read_file, write_file) + def main() -> None: typer.run(proc_pick_peaks)