From 0d54161aa1727f07f73fdd3df7b52839ec655491 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Wed, 3 Jul 2024 11:54:50 +0200 Subject: [PATCH] first prototype of a config-based cli tool --- src/depiction/tools/cli/correct_baseline.py | 52 +++++++++++++++---- .../workflow/proc/correct_baseline.py | 2 +- .../workflow/proc/correct_baseline_config.py | 32 ++++++++++++ .../workflow/rules/rules_proc.smk | 34 ++++++++++-- 4 files changed, 105 insertions(+), 15 deletions(-) create mode 100644 src/depiction_targeted_preproc/workflow/proc/correct_baseline_config.py diff --git a/src/depiction/tools/cli/correct_baseline.py b/src/depiction/tools/cli/correct_baseline.py index 834e9d9..6444a05 100644 --- a/src/depiction/tools/cli/correct_baseline.py +++ b/src/depiction/tools/cli/correct_baseline.py @@ -3,44 +3,78 @@ import shutil from typing import Annotated, Literal, TYPE_CHECKING -import typer +import cyclopts +import yaml from loguru import logger +from pydantic import BaseModel from typer import Argument, Option from depiction.parallel_ops import ParallelConfig from depiction.persistence import ImzmlReadFile, ImzmlWriteFile from depiction.tools.correct_baseline import BaselineVariants, CorrectBaseline -if TYPE_CHECKING: - from pathlib import Path +from pathlib import Path +app = cyclopts.App() -def correct_baseline( + +class BaselineCorrectionConfig(BaseModel): + n_jobs: int + baseline_variant: BaselineVariants = BaselineVariants.TopHat + window_size: int | float = 5000.0 + window_unit: Literal["ppm", "index"] = "ppm" + + +@app.command +def config( + input_imzml: Annotated[Path, Argument()], + output_imzml: Annotated[Path, Argument()], + config: Annotated[Path, Argument()], +) -> None: + parsed = BaselineCorrectionConfig.validate(yaml.safe_load(config.read_text())) + correct_baseline(config=parsed, input_imzml=input_imzml, output_imzml=output_imzml) + + +@app.default +def main_args( input_imzml: Annotated[Path, Argument()], output_imzml: Annotated[Path, Argument()], n_jobs: Annotated[int, Option()] = None, baseline_variant: Annotated[BaselineVariants, Option()] = BaselineVariants.TopHat, window_size: Annotated[int | float, Option()] = 5000, window_unit: Annotated[Literal["ppm", "index"], Option()] = "ppm", -) -> None: +): + parsed = BaselineCorrectionConfig( + n_jobs=n_jobs, baseline_type=baseline_variant, window_size=window_size, window_unit=window_unit + ) + parsed.validate() + correct_baseline(config=parsed, input_imzml=input_imzml, output_imzml=output_imzml) + + +def correct_baseline(config: BaselineCorrectionConfig, input_imzml: Path, output_imzml: Path) -> None: """Removes the baseline from the input imzML file and writes the result to the output imzML file.""" output_imzml.parent.mkdir(parents=True, exist_ok=True) - if baseline_variant == BaselineVariants.Zero: + if config.baseline_variant == BaselineVariants.Zero: logger.info("Baseline correction is deactivated, copying input to output") shutil.copyfile(input_imzml, output_imzml) shutil.copyfile(input_imzml.with_suffix(".ibd"), output_imzml.with_suffix(".ibd")) else: - if n_jobs is None: + if config.n_jobs is None: # TODO define some sane default for None and -1 n_jobs e.g. use all available up to a limit (None) or use all (1-r) n_jobs = 10 + else: + n_jobs = config.n_jobs parallel_config = ParallelConfig(n_jobs=n_jobs) input_file = ImzmlReadFile(input_imzml) output_file = ImzmlWriteFile(output_imzml, imzml_mode=input_file.imzml_mode) correct_baseline = CorrectBaseline.from_variant( - parallel_config=parallel_config, variant=baseline_variant, window_size=window_size, window_unit=window_unit + parallel_config=parallel_config, + variant=config.baseline_variant, + window_size=config.window_size, + window_unit=config.window_unit, ) correct_baseline.evaluate_file(input_file, output_file) if __name__ == "__main__": - typer.run(correct_baseline) + app() diff --git a/src/depiction_targeted_preproc/workflow/proc/correct_baseline.py b/src/depiction_targeted_preproc/workflow/proc/correct_baseline.py index 4898ed5..f6d6b25 100644 --- a/src/depiction_targeted_preproc/workflow/proc/correct_baseline.py +++ b/src/depiction_targeted_preproc/workflow/proc/correct_baseline.py @@ -23,7 +23,7 @@ def proc_correct_baseline( window["window_size"] = window_size window["window_unit"] = window_unit case _: - raise ValueError(f"Unsupported baseline adjustment type: {config.baseline_adjustment.baseline_type}") + raise ValueError(f"Unsupported baseline adjustment type: {config.baseline_adjustment.baseline_variant}") correct_baseline( input_imzml=input_imzml_path, diff --git a/src/depiction_targeted_preproc/workflow/proc/correct_baseline_config.py b/src/depiction_targeted_preproc/workflow/proc/correct_baseline_config.py new file mode 100644 index 0000000..b3afe86 --- /dev/null +++ b/src/depiction_targeted_preproc/workflow/proc/correct_baseline_config.py @@ -0,0 +1,32 @@ +# TODO this should be handled better in the future, but for illustrative purposes I'm doing it here +from pathlib import Path + +import cyclopts +import yaml + +from depiction_targeted_preproc.pipeline_config.model import PipelineParameters, BaselineAdjustmentTophat + +app = cyclopts.App() + + +@app.default +def correct_baseline_config(input_config: Path, output_config: Path) -> None: + config = PipelineParameters.parse_yaml(input_config) + args = {"n_jobs": config.n_jobs, "baseline_variant": config.baseline_adjustment.baseline_type or "Zero"} + # TODO fix later + if args["baseline_variant"] == "Tophat": + args["baseline_variant"] = "TopHat" + match config.baseline_adjustment: + case BaselineAdjustmentTophat(window_size=window_size, window_unit=window_unit): + args["window_size"] = window_size + args["window_unit"] = window_unit + case _: + pass + output_path = Path(output_config) + output_path.parent.mkdir(parents=True, exist_ok=True) + with output_path.open("w") as f: + yaml.dump(args, f) + + +if __name__ == "__main__": + app() diff --git a/src/depiction_targeted_preproc/workflow/rules/rules_proc.smk b/src/depiction_targeted_preproc/workflow/rules/rules_proc.smk index 8a2ddc0..028ec79 100644 --- a/src/depiction_targeted_preproc/workflow/rules/rules_proc.smk +++ b/src/depiction_targeted_preproc/workflow/rules/rules_proc.smk @@ -1,13 +1,37 @@ -rule proc_correct_baseline: + + +rule proc_correct_baseline_config: input: - imzml=multiext("{sample}/raw", ".imzML", ".ibd"), config="{sample}/pipeline_params.yml", + output: + config="{sample}/config/proc_correct_baseline.yml", + shell: + "python -m depiction_targeted_preproc.workflow.proc.correct_baseline_config" + " --input-config {input.config} --output-config {output.config}" + + +rule proc_correct_baseline_run: + input: + imzml=multiext("{sample}/raw", ".imzML", ".ibd"), + config="{sample}/config/proc_correct_baseline.yml", output: imzml=temp(multiext("{sample}/corrected.original", ".imzML", ".ibd")), shell: - "python -m depiction_targeted_preproc.workflow.proc.correct_baseline " - " --input-imzml-path {input.imzml[0]} --config-path {input.config} " - " --output-imzml-path {output.imzml[0]}" + "python -m depiction.tools.cli.correct_baseline config" + " --config {input.config}" + " --input-imzml {input.imzml[0]} --output-imzml {output.imzml[0]}" + + +# rule proc_correct_baseline: +# input: +# imzml=multiext("{sample}/raw", ".imzML", ".ibd"), +# config="{sample}/pipeline_params.yml", +# output: +# imzml=temp(multiext("{sample}/corrected.original", ".imzML", ".ibd")), +# shell: +# "python -m depiction_targeted_preproc.workflow.proc.correct_baseline " +# " --input-imzml-path {input.imzml[0]} --config-path {input.config} " +# " --output-imzml-path {output.imzml[0]}" rule proc_pick_peaks: