Skip to content

Commit

Permalink
add one more peak picker that might be more robust
Browse files Browse the repository at this point in the history
  • Loading branch information
leoschwarz committed Jun 6, 2024
1 parent 4389b1a commit 8725b0b
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 13 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ dev = [
"tifffile>=2024.2.12",
"xmltodict",
"licensecheck",
"ms-peak-picker"
]

# potentially relevant in the future again:
Expand Down
42 changes: 42 additions & 0 deletions src/depiction/spectrum/peak_picking/ms_peak_picker_wrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# TODO better name

from __future__ import annotations

from dataclasses import dataclass
from typing import TYPE_CHECKING

import loguru
import ms_peak_picker
import numpy as np
import scipy

from depiction.spectrum.peak_picking.basic_peak_picker import BasicPeakPicker

if TYPE_CHECKING:
from numpy.typing import NDArray
from depiction.spectrum.peak_filtering import PeakFilteringType


@dataclass
class MSPeakPicker:
"""Interfaces to the ms-peak-picker Python package.
TODO proper description and better name
"""

fit_type: str = "quadratic"
peak_filtering: PeakFilteringType | None = None

def pick_peaks(self, mz_arr: NDArray[float], int_arr: NDArray[float]) -> tuple[NDArray[float], NDArray[float]]:
peak_list = ms_peak_picker.pick_peaks(mz_arr, int_arr, fit_type=self.fit_type)
peak_mz = np.array([peak.mz for peak in peak_list])
peak_int = np.array([peak.intensity for peak in peak_list])

if self.peak_filtering is not None:
peak_mz, peak_int = self.peak_filtering.filter_peaks(
spectrum_mz_arr=mz_arr,
spectrum_int_arr=int_arr,
peak_mz_arr=peak_mz,
peak_int_arr=peak_int,
)

return peak_mz, peak_int
10 changes: 9 additions & 1 deletion src/depiction_targeted_preproc/pipeline_config/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,15 @@ class PeakPickerBasicInterpolated(BaseModel):
# but postpone for later)


PeakPicker = Annotated[None | PeakPickerBasicInterpolated, Field(discriminator="peak_picker_type")]
class PeakPickerMSPeakPicker(BaseModel):
peak_picker_type: Literal["MSPeakPicker"]
fit_type: Literal["quadratic"] = "quadratic"


PeakPicker = Annotated[
None | PeakPickerBasicInterpolated | PeakPickerMSPeakPicker,
Field(discriminator="peak_picker_type")
]


class CalibrationRegressShift(BaseModel):
Expand Down
28 changes: 16 additions & 12 deletions src/depiction_targeted_preproc/workflow/proc/pick_peaks.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
from loguru import logger

from depiction.parallel_ops import ParallelConfig
from depiction.persistence import ImzmlModeEnum, ImzmlWriteFile, ImzmlReadFile
from depiction.spectrum.peak_filtering import FilterNHighestIntensityPartitioned
from depiction.spectrum.peak_picking import BasicInterpolatedPeakPicker
from depiction.persistence import ImzmlModeEnum, ImzmlWriteFile, ImzmlReadFile
from depiction.spectrum.peak_picking.ms_peak_picker_wrapper import MSPeakPicker
from depiction.tools.pick_peaks import PickPeaks

from depiction_targeted_preproc.pipeline_config import model
from depiction_targeted_preproc.pipeline_config.model import PipelineParameters

Expand All @@ -21,30 +21,34 @@ def proc_pick_peaks(
output_imzml_path: Annotated[Path, typer.Option()],
) -> None:
config = PipelineParameters.parse_yaml(config_path)

# TODO configurable
peak_filtering = FilterNHighestIntensityPartitioned(max_count=200, n_partitions=8),
parallel_config = ParallelConfig(n_jobs=config.n_jobs, task_size=None)

match config.peak_picker:
case None:
logger.info("Peak picking is deactivated")
shutil.copy(input_imzml_path, output_imzml_path)
shutil.copy(input_imzml_path.with_suffix(".ibd"), output_imzml_path.with_suffix(".ibd"))
return
case model.PeakPickerBasicInterpolated() as peak_picker_config:
peak_picker = BasicInterpolatedPeakPicker(
min_prominence=peak_picker_config.min_prominence,
min_distance=peak_picker_config.min_distance,
min_distance_unit=peak_picker_config.min_distance_unit,
# TODO configurable
peak_filtering=FilterNHighestIntensityPartitioned(max_count=200, n_partitions=8),
)
parallel_config = ParallelConfig(n_jobs=config.n_jobs, task_size=None)
pick_peaks = PickPeaks(
peak_picker=peak_picker,
parallel_config=parallel_config,
peak_filtering=peak_filtering,
)
read_file = ImzmlReadFile(input_imzml_path)
write_file = ImzmlWriteFile(output_imzml_path, imzml_mode=ImzmlModeEnum.PROCESSED)
pick_peaks.evaluate_file(read_file, write_file)
case model.PeakPickerMSPeakPicker() as peak_picker_config:
peak_picker = MSPeakPicker(fit_type=peak_picker_config.fit_type, peak_filtering=peak_filtering)
case _:
raise ValueError(f"Unsupported peak picker type: {config.peak_picker.peak_picker_type}")

pick_peaks = PickPeaks(peak_picker=peak_picker, parallel_config=parallel_config)
read_file = ImzmlReadFile(input_imzml_path)
write_file = ImzmlWriteFile(output_imzml_path, imzml_mode=ImzmlModeEnum.PROCESSED)
pick_peaks.evaluate_file(read_file, write_file)


def main() -> None:
typer.run(proc_pick_peaks)
Expand Down

0 comments on commit 8725b0b

Please sign in to comment.