From 42845375d5b45add9dfa295c8083de54745ce56a Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Fri, 7 Jun 2024 11:59:08 +0200 Subject: [PATCH] use the new mass lists --- src/depiction_targeted_preproc/example/run.py | 11 ++++-- .../example_compare/run_compare.py | 12 +++++-- .../workflow/exp/mass_list_preparation.py | 36 +++++++++++++++++++ .../workflow/experimental.smk | 16 ++++++++- .../workflow/rules/rules_proc.smk | 4 +-- .../workflow/rules/rules_qc.smk | 8 ++--- .../workflow/rules/rules_vis.smk | 4 +-- 7 files changed, 77 insertions(+), 14 deletions(-) create mode 100644 src/depiction_targeted_preproc/workflow/exp/mass_list_preparation.py diff --git a/src/depiction_targeted_preproc/example/run.py b/src/depiction_targeted_preproc/example/run.py index 62097f8..ddcc114 100644 --- a/src/depiction_targeted_preproc/example/run.py +++ b/src/depiction_targeted_preproc/example/run.py @@ -97,7 +97,14 @@ def export_results( shutil.copy(work_dir / sample_name / file, output_dir / sample_name / file) -def initial_setup(input_imzml: Path, input_mass_list: Path, params_file: Path, dir: Path, force: bool = False) -> None: +def initial_setup( + input_imzml: Path, + input_mass_list: Path, + params_file: Path, + dir: Path, + force: bool = False, + mass_list_filename: str = "images_default_mass_list.csv", +) -> None: if not force and (dir / "raw.imzML").exists(): logger.info("Skipping initial setup, directory already exists: {dir}", dir=dir) else: @@ -105,7 +112,7 @@ def initial_setup(input_imzml: Path, input_mass_list: Path, params_file: Path, d dir.mkdir(exist_ok=True, parents=True) shutil.copy(input_imzml, dir / "raw.imzML") shutil.copy(input_imzml.with_suffix(".ibd"), dir / "raw.ibd") - shutil.copy(input_mass_list, dir / "images_default_mass_list.csv") + shutil.copy(input_mass_list, dir / mass_list_filename) shutil.copy(params_file, dir / "pipeline_params.yml") diff --git a/src/depiction_targeted_preproc/example_compare/run_compare.py b/src/depiction_targeted_preproc/example_compare/run_compare.py index 8787b47..b255ef6 100644 --- a/src/depiction_targeted_preproc/example_compare/run_compare.py +++ b/src/depiction_targeted_preproc/example_compare/run_compare.py @@ -12,7 +12,7 @@ def get_configs() -> dict[str, Path]: def prepare_tasks(input_imzml_path: Path, work_dir: Path) -> list[Path]: - input_mass_list = input_imzml_path.parent / "mass_list_vend.csv" + input_mass_list = input_imzml_path.parent / "mass_list.raw.csv" folders = set_up_work_dir(work_dir, input_imzml_path, input_mass_list) requested_files = get_all_output_files(folders) @@ -34,7 +34,7 @@ def main() -> None: for imzml in imzmls: requested_files += prepare_tasks(data_raw_dir / imzml, work_dir=work_dir) - SnakemakeInvoke().invoke(work_dir=work_dir, result_files=requested_files, n_cores=1) + SnakemakeInvoke().invoke(work_dir=work_dir, result_files=requested_files, n_cores=2) def get_all_output_files(folders: list[Path]) -> list[Path]: @@ -55,7 +55,13 @@ def set_up_work_dir(work_dir: Path, input_imzml: Path, input_mass_list: Path) -> sample_name = input_imzml.stem for config_name, config_path in configs.items(): dir = work_dir / sample_name / config_name - initial_setup(input_imzml=input_imzml, input_mass_list=input_mass_list, params_file=config_path, dir=dir) + initial_setup( + input_imzml=input_imzml, + input_mass_list=input_mass_list, + params_file=config_path, + dir=dir, + mass_list_filename="mass_list.raw.csv", + ) folders.append(dir) return folders diff --git a/src/depiction_targeted_preproc/workflow/exp/mass_list_preparation.py b/src/depiction_targeted_preproc/workflow/exp/mass_list_preparation.py new file mode 100644 index 0000000..cb2e1d6 --- /dev/null +++ b/src/depiction_targeted_preproc/workflow/exp/mass_list_preparation.py @@ -0,0 +1,36 @@ +from pathlib import Path +from typing import Annotated + +import polars as pl +import typer +from typer import Option + + +def exp_mass_list_preparation( + input_csv_path: Annotated[Path, Option()], + out_calibration_csv_path: Annotated[Path, Option()], + out_standards_csv_path: Annotated[Path, Option()], + out_visualization_csv_path: Annotated[Path, Option()], +) -> None: + input_df = pl.read_csv(input_csv_path) + + # rename cols + input_df = input_df.rename({"Marker": "label", "PC-MT (M+H)+": "mass"}).drop("No.") + + # add tol column + visualization_df = input_df.with_columns(tol=pl.lit(0.25)) + + # for the calibration remove the CHCA peaks, they have names starting with CHCA + calibration_df = visualization_df.filter(~pl.col("name").str.starts_with("CHCA")) + + # for the standards csv only keep the "standard" peaks + standards_df = visualization_df.filter(pl.col("name").str.to_lowercase().contains("standard")) + + # write the results + calibration_df.write_csv(out_calibration_csv_path) + standards_df.write_csv(out_standards_csv_path) + visualization_df.write_csv(out_visualization_csv_path) + + +if __name__ == "__main__": + typer.run(exp_mass_list_preparation) diff --git a/src/depiction_targeted_preproc/workflow/experimental.smk b/src/depiction_targeted_preproc/workflow/experimental.smk index ea39989..c8deef1 100644 --- a/src/depiction_targeted_preproc/workflow/experimental.smk +++ b/src/depiction_targeted_preproc/workflow/experimental.smk @@ -15,4 +15,18 @@ rule exp_compare_cluster_stats: shell: "python -m depiction_targeted_preproc.workflow.exp.compare_cluster_stats" " {input.csv}" - " --output-pdf {output}" \ No newline at end of file + " --output-pdf {output}" + +rule exp_mass_list_preparation: + input: + csv="{sample}/mass_list.raw.csv" + output: + calibration_csv="{sample}/mass_list.calibration.csv", + standards_csv="{sample}/mass_list.standards.csv", + visualization="{sample}/mass_list.visualization.csv" + shell: + "python -m depiction_targeted_preproc.workflow.exp.mass_list_preparation" + " --input-csv-path {input.csv}" + " --out-calibration-csv {output.calibration_csv}" + " --out-standards-csv {output.standards_csv}" + " --out-visualization {output.visualization}" \ No newline at end of file diff --git a/src/depiction_targeted_preproc/workflow/rules/rules_proc.smk b/src/depiction_targeted_preproc/workflow/rules/rules_proc.smk index 19e3b40..00168bc 100644 --- a/src/depiction_targeted_preproc/workflow/rules/rules_proc.smk +++ b/src/depiction_targeted_preproc/workflow/rules/rules_proc.smk @@ -27,7 +27,7 @@ rule proc_calibrate_remove_global_shift: input: imzml=multiext("{sample}/corrected.peaks",".imzML",".ibd"), config="{sample}/pipeline_params.yml", - mass_list="{sample}/images_default_mass_list.csv", + mass_list="{sample}/mass_list.standards.csv", output: imzml=temp(multiext("{sample}/calibrated.tmp",".imzML",".ibd")), shell: @@ -40,7 +40,7 @@ rule proc_calibrate_actual: input: imzml=multiext("{sample}/calibrated.tmp",".imzML",".ibd"), config="{sample}/pipeline_params.yml", - mass_list="{sample}/images_default_mass_list.csv", + mass_list="{sample}/mass_list.calibration.csv", output: imzml=multiext("{sample}/calibrated",".imzML",".ibd"), calib_data="{sample}/calib_data.hdf5", diff --git a/src/depiction_targeted_preproc/workflow/rules/rules_qc.smk b/src/depiction_targeted_preproc/workflow/rules/rules_qc.smk index abaf316..0af0029 100644 --- a/src/depiction_targeted_preproc/workflow/rules/rules_qc.smk +++ b/src/depiction_targeted_preproc/workflow/rules/rules_qc.smk @@ -1,7 +1,7 @@ rule qc_table_marker_distances_baseline: input: imzml=multiext("{sample}/corrected.peaks",".imzML",".ibd"), - mass_list="{sample}/images_default_mass_list.csv", + mass_list="{sample}/mass_list.visualization.csv", output: table="{sample}/qc/table_marker_distances_baseline.parquet" shell: @@ -12,7 +12,7 @@ rule qc_table_marker_distances_baseline: rule qc_table_marker_distances_calib: input: imzml_peaks="{sample}/calibrated.imzML", - mass_list="{sample}/images_default_mass_list.csv", + mass_list="{sample}/mass_list.visualization.csv", output: table="{sample}/qc/table_marker_distances_calib.parquet" shell: @@ -76,7 +76,7 @@ rule qc_plot_peak_density_grouped: rule qc_plot_calibration_map: input: calib_data="{sample}/calib_data.hdf5", - mass_list="{sample}/images_default_mass_list.csv", + mass_list="{sample}/mass_list.visualization.csv", output: pdf="{sample}/qc/plot_calibration_map.pdf" shell: @@ -89,7 +89,7 @@ rule qc_plot_sample_spectra_before_after: input: imzml_baseline="{sample}/peaks.imzML", imzml_calib="{sample}/calibrated.imzML", - mass_list="{sample}/images_default_mass_list.csv", + mass_list="{sample}/mass_list.visualization.csv", output: pdf="{sample}/qc/plot_sample_spectra_before_after.pdf" shell: diff --git a/src/depiction_targeted_preproc/workflow/rules/rules_vis.smk b/src/depiction_targeted_preproc/workflow/rules/rules_vis.smk index 34d544d..2c4cd7e 100644 --- a/src/depiction_targeted_preproc/workflow/rules/rules_vis.smk +++ b/src/depiction_targeted_preproc/workflow/rules/rules_vis.smk @@ -2,9 +2,9 @@ rule vis_images: input: imzml=multiext("{sample}/calibrated",".imzML",".ibd"), config="{sample}/pipeline_params.yml", - mass_list="{sample}/images_{label}_mass_list.csv" + mass_list="{sample}/mass_list.visualization.csv" output: - hdf5="{sample}/images_{label}.hdf5" + hdf5="{sample}/images_default.hdf5" shell: "python -m depiction_targeted_preproc.workflow.vis.images " " --imzml-path {input.imzml[0]} --mass-list-path {input.mass_list} "