Skip to content

Commit

Permalink
add new filter_peaks tool
Browse files Browse the repository at this point in the history
  • Loading branch information
leoschwarz committed Jul 3, 2024
1 parent 809ea83 commit 0ddba7a
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 11 deletions.
46 changes: 46 additions & 0 deletions src/depiction/tools/cli/cli_filter_peaks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from __future__ import annotations

from pathlib import Path

import cyclopts
import yaml

from depiction.persistence import ImzmlReadFile, ImzmlWriteFile, ImzmlModeEnum
from depiction.tools.filter_peaks import FilterPeaksConfig, filter_peaks, FilterNHighestIntensityPartitionedConfig

app = cyclopts.App()


@app.command
def run_config(
config: Path,
input_imzml: Path,
output_imzml: Path,
) -> None:
parsed = FilterPeaksConfig.validate(yaml.safe_load(config.read_text()))
filter_peaks(
config=parsed,
input_file=ImzmlReadFile(input_imzml),
output_file=ImzmlWriteFile(output_imzml, imzml_mode=ImzmlModeEnum.PROCESSED),
)


@app.default
def run(
input_imzml: Path,
output_imzml: Path,
*,
n_jobs: int | None = None,
) -> None:
# TODO this is hardcoded like before in the workflow
peaks_filter = FilterNHighestIntensityPartitionedConfig(max_count=500, n_partitions=8)
config = FilterPeaksConfig.validate(dict(filters=[peaks_filter], n_jobs=n_jobs))
filter_peaks(
config=config,
input_file=ImzmlReadFile(input_imzml),
output_file=ImzmlWriteFile(output_imzml, imzml_mode=ImzmlModeEnum.PROCESSED),
)


if __name__ == "__main__":
app()
54 changes: 54 additions & 0 deletions src/depiction/tools/filter_peaks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from __future__ import annotations

from typing import Literal

from pydantic import BaseModel

from depiction.parallel_ops import ParallelConfig, WriteSpectraParallel
from depiction.persistence import ImzmlReadFile, ImzmlWriteFile, ImzmlReader, ImzmlWriter
from depiction.spectrum.peak_filtering import ChainFilters, FilterNHighestIntensityPartitioned, PeakFilteringType


class FilterNHighestIntensityPartitionedConfig(BaseModel):
method: Literal["FilterNHighestIntensityPartitioned"]
max_count: int
n_partitions: int


class FilterPeaksConfig(BaseModel, use_enum_values=True, validate_default=True):
filters: list[FilterNHighestIntensityPartitionedConfig]
n_jobs: int | None = None


def _get_filter_object(config: FilterPeaksConfig) -> PeakFilteringType:
filters = []
for filter in config.filters:
match filter.method:
case FilterNHighestIntensityPartitionedConfig(max_count=max_count, n_partitions=n_partitions):
filters.append(FilterNHighestIntensityPartitioned(max_count=max_count, n_partitions=n_partitions))
case _:
raise ValueError(f"Unknown filter method: {filter.method}")
if len(filters) == 1:
return filters[0]
else:
return ChainFilters(filters)


def _filter_chunk(
reader: ImzmlReader, indices: list[int], writer: ImzmlWriter, peaks_filter: PeakFilteringType
) -> None:
for spectrum_id in indices:
mz_arr, int_arr, coords = reader.get_spectrum_with_coords(spectrum_id)
mz_arr, int_arr = peaks_filter.filter_peaks(mz_arr, int_arr, mz_arr, int_arr)
writer.add_spectrum(mz_arr, int_arr, coords)


def filter_peaks(config: FilterPeaksConfig, input_file: ImzmlReadFile, output_file: ImzmlWriteFile) -> None:
"""Filters the peaks in `input_file` and writes them to `output_file` according to the `config`."""
peaks_filter = _get_filter_object(config)
# TODO n_jobs handling
parallel_config = ParallelConfig(n_jobs=config.n_jobs or 10)
write_parallel = WriteSpectraParallel.from_config(parallel_config)
write_parallel.map_chunked_to_file(
read_file=input_file, write_file=output_file, operation=_filter_chunk, bind_args={"peaks_filter": peaks_filter}
)
11 changes: 0 additions & 11 deletions src/depiction_targeted_preproc/workflow/rules/rules_proc.smk
Original file line number Diff line number Diff line change
Expand Up @@ -46,17 +46,6 @@ rule proc_pick_peaks:
# " --input-imzml-path {input.imzml[0]} --config-path {input.config} "
# " --output-imzml-path {output.imzml[0]}"
#
#
## TODO very experimental to be removed later again
# rule proc_filter_peaks:
# input:
# imzml=multiext("{sample}/corrected.peaks_all",".imzML",".ibd"),
# output:
# imzml=multiext("{sample}/corrected.peaks",".imzML",".ibd"),
# shell:
# "python -m depiction_targeted_preproc.workflow.proc.filter_peaks "
# " --input-imzml-path {input.imzml[0]} "
# " --output-imzml-path {output.imzml[0]}"


# TODO this should be solved more efficiently in the future, but for now it is solved by calling the script twice
Expand Down

0 comments on commit 0ddba7a

Please sign in to comment.