From bfccab1ef6562ca1e9ff72341e1994f54ed9d975 Mon Sep 17 00:00:00 2001 From: Leonardo Schwarz Date: Mon, 1 Jul 2024 15:03:01 +0200 Subject: [PATCH] perform checksum check as part of the app fixes #3 --- src/depiction/persistence/imzml_read_file.py | 4 ++-- src/depiction_targeted_preproc/app/entrypoint.py | 12 ++++++++++++ tests/unit/persistence/test_imzml_read_file.py | 2 +- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/depiction/persistence/imzml_read_file.py b/src/depiction/persistence/imzml_read_file.py index e30d220..ec48513 100644 --- a/src/depiction/persistence/imzml_read_file.py +++ b/src/depiction/persistence/imzml_read_file.py @@ -164,8 +164,8 @@ def summary(self, checksums: bool = True) -> str: f"{mz_range_line}" ) - def print_summary(self, checksums: bool = True) -> None: - print(self.summary(checksums=checksums)) + def print_summary(self, checksums: bool = True, file=None) -> None: + print(self.summary(checksums=checksums), file=file) @cached_property def pixel_size(self) -> PixelSize | None: diff --git a/src/depiction_targeted_preproc/app/entrypoint.py b/src/depiction_targeted_preproc/app/entrypoint.py index 72125a9..9e7b352 100644 --- a/src/depiction_targeted_preproc/app/entrypoint.py +++ b/src/depiction_targeted_preproc/app/entrypoint.py @@ -11,6 +11,7 @@ from loguru import logger from depiction.misc.find_file_util import find_one_by_extension +from depiction.persistence import ImzmlReadFile from depiction_targeted_preproc.pipeline.setup import setup_workdir from depiction_targeted_preproc.pipeline_config.artifacts_mapping import ARTIFACT_FILES_MAPPING, get_result_files from depiction_targeted_preproc.pipeline_config.model import ( @@ -42,6 +43,9 @@ def entrypoint( if not workunit_yaml_file.exists(): raise RuntimeError(f"Workunit yaml file not found: {workunit_yaml_file}") + # Ensure the input file's checksum passes + check_imzml_file(ImzmlReadFile(input_imzml_file)) + # Parse the params params = parse_parameters(workunit_yaml_file) @@ -77,6 +81,14 @@ def export_pipeline_params(work_dir: Path, output_dir: Path, sample_name: str) - shutil.copy(work_dir / sample_name / "pipeline_params.yml", output_dir / sample_name / "pipeline_params.yml") +def check_imzml_file(imzml_file: ImzmlReadFile) -> None: + # TODO this is not very efficient, but is better than not checking the file at all + logger.info(f"Checking the integrity of the input file: {imzml_file.imzml_file}") + logger.info(imzml_file.summary()) + if not imzml_file.is_checksum_valid: + raise RuntimeError(f"Checksum validation failed for the input file: {imzml_file.imzml_file}") + + def zip_results(output_dir: Path, sample_name: str) -> None: with ZipFile(output_dir / f"{sample_name}.zip", "w") as zipf: for file in (output_dir / sample_name).rglob("*"): diff --git a/tests/unit/persistence/test_imzml_read_file.py b/tests/unit/persistence/test_imzml_read_file.py index 5d4d640..fff6fb7 100644 --- a/tests/unit/persistence/test_imzml_read_file.py +++ b/tests/unit/persistence/test_imzml_read_file.py @@ -228,7 +228,7 @@ def test_summary_when_continuous(self, method_reader) -> None: def test_print_summary(self, mock_summary, mock_print) -> None: self.mock_read_file.print_summary() mock_summary.assert_called_once_with(checksums=True) - mock_print.assert_called_once_with(mock_summary.return_value) + mock_print.assert_called_once_with(mock_summary.return_value, file=None) @patch.object(ImzmlReadFile, "reader") def test_cached_properties(self, method_reader) -> None: