From a0eac94c14abee313bba38a4867b3e6b35a7b7be Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Wed, 2 Aug 2023 17:11:36 +0200 Subject: [PATCH 01/13] added working MHub implementation of the node21 baseline --- models/gc_node21_baseline/config/default.yml | 25 ++++ .../gc_node21_baseline/dockerfiles/Dockerfile | 47 +++++++ models/gc_node21_baseline/scripts/run.py | 42 ++++++ .../utils/Node21BaselineRunner.py | 32 +++++ .../utils/PanImgConverters.py | 122 ++++++++++++++++++ models/gc_node21_baseline/utils/__init__.py | 2 + 6 files changed, 270 insertions(+) create mode 100644 models/gc_node21_baseline/config/default.yml create mode 100644 models/gc_node21_baseline/dockerfiles/Dockerfile create mode 100644 models/gc_node21_baseline/scripts/run.py create mode 100644 models/gc_node21_baseline/utils/Node21BaselineRunner.py create mode 100644 models/gc_node21_baseline/utils/PanImgConverters.py create mode 100644 models/gc_node21_baseline/utils/__init__.py diff --git a/models/gc_node21_baseline/config/default.yml b/models/gc_node21_baseline/config/default.yml new file mode 100644 index 00000000..1286f53e --- /dev/null +++ b/models/gc_node21_baseline/config/default.yml @@ -0,0 +1,25 @@ +general: + data_base_dir: /app/data + version: 1.0 + description: GC Node21 Baseline nodule prediction from chest X-Rays (dicom to json) + +execute: +- DicomImporter +- MhaConverter +- Node21BaselineRunner +- DataOrganizer + +modules: + DicomImporter: + source_dir: input_data + import_dir: sorted_data + sort_data: True + meta: + mod: cr + + MhaConverter: + # TODO should be set to panimg backend + + DataOrganizer: + targets: + - json-->[i:sid]/nodules.json diff --git a/models/gc_node21_baseline/dockerfiles/Dockerfile b/models/gc_node21_baseline/dockerfiles/Dockerfile new file mode 100644 index 00000000..b1fb4cd9 --- /dev/null +++ b/models/gc_node21_baseline/dockerfiles/Dockerfile @@ -0,0 +1,47 @@ +FROM mhubai/base:latest + +# Update authors label +LABEL authors="sil.vandeleemput@radboudumc.nl" + +# Install PyTorch 2.0.1 (CUDA enabled) +RUN pip3 install --no-cache-dir torch==2.0.1+cu118 torchvision==0.15.2+cu118 -f https://download.pytorch.org/whl/torch_stable.html + +# Install git-lfs (required for unpacking model weights) +RUN apt update && apt install -y --no-install-recommends git-lfs && rm -rf /var/lib/apt/lists/* + +# Install node21 baseline algorithm +# - Git shallow clone to tmp directory +# - Extract relevant files to /opt/algorithm/ +# - Remove tmp directory +RUN git clone --depth 1 https://github.com/node21challenge/node21_detection_baseline /tmp/algorithm && \ + mkdir -p /opt/algorithm && \ + mv /tmp/algorithm/process.py /opt/algorithm/process.py && \ + mv /tmp/algorithm/postprocessing.py /opt/algorithm/postprocessing.py && \ + mv /tmp/algorithm/training_utils /opt/algorithm/training_utils && \ + mv /tmp/algorithm/model.pth /opt/algorithm/model.pth && \ + mkdir -p /opt/algorithm/hub/checkpoints && \ + mv /tmp/algorithm/resnet50-19c8e357.pth /opt/algorithm/hub/checkpoints/resnet50-19c8e357.pth && \ + rm -rf /tmp/algorithm + +# Install Node21 additional dependencies +RUN pip3 install --no-cache-dir evalutils==0.2.4 + +# TODO Clone the main branch of MHubAI/models +#RUN git stash \ +# && git fetch https://github.com/MHubAI/models.git main \ +# && git merge FETCH_HEAD \ +# && git sparse-checkout set "models/gc_node21_baseline" \ +# && git fetch https://github.com/MHubAI/models.git main + +# Configure app /opt/algorithm to be the main workdir and torch hub directory to pick up the model weights correctly +WORKDIR "/opt/algorithm" +ENV TORCH_HOME /opt/algorithm + +# Add /opt/algorithm to the PYTHON_PATH to be able to import the processor code +ENV PYTHON_PATH "/app:/opt/algorithm" + +#ENTRYPOINT ["python3", "-m", "mhubio.run"] +#CMD ["--config", "/app/models/gc_node21_baseline/config/default.yml"] + +# Default run script +CMD ["python3", "/app/models/gc_node21_baseline/scripts/run.py"] diff --git a/models/gc_node21_baseline/scripts/run.py b/models/gc_node21_baseline/scripts/run.py new file mode 100644 index 00000000..52285f11 --- /dev/null +++ b/models/gc_node21_baseline/scripts/run.py @@ -0,0 +1,42 @@ +""" +---------------------------------------------------------- +Mhub / DIAG - Run the GC Node21 baseline Algorithm locally +---------------------------------------------------------- + +------------------------------------------------------------- +Author: Sil van de Leemput +Email: sil.vandeleemput@radboudumc.nl +------------------------------------------------------------- +""" + +import sys, os +sys.path.append('.') + +from mhubio.core import Config, DataType, FileType, CT, SEG +from mhubio.modules.importer.DataSorter import DataSorter +from mhubio.modules.importer.DicomImporter import DicomImporter +from mhubio.modules.organizer.DataOrganizer import DataOrganizer +from models.gc_node21_baseline.utils.Node21BaselineRunner import Node21BaselineRunner + +from models.gc_node21_baseline.utils import MhaPanImgConverter + +# clean-up +import shutil +shutil.rmtree("/app/data/sorted_data", ignore_errors=True) +shutil.rmtree("/app/tmp", ignore_errors=True) +shutil.rmtree("/app/data/output_data", ignore_errors=True) + +# config +config = Config('/app/models/gc_node21_baseline/config/default.yml') + +# import (dicom) +DicomImporter(config).execute() + +# convert (cr:dicom -> cr:mha) +MhaPanImgConverter(config).execute() + +# execute model (cr:mha -> json) +Node21BaselineRunner(config).execute() + +# organize data into output folder +DataOrganizer(config, set_file_permissions=sys.platform.startswith('linux')).execute() diff --git a/models/gc_node21_baseline/utils/Node21BaselineRunner.py b/models/gc_node21_baseline/utils/Node21BaselineRunner.py new file mode 100644 index 00000000..28a85a3a --- /dev/null +++ b/models/gc_node21_baseline/utils/Node21BaselineRunner.py @@ -0,0 +1,32 @@ +""" +------------------------------------------------------------- +Mhub / DIAG - Run Module for the GC Node21 baseline Algorithm +------------------------------------------------------------- + +------------------------------------------------------------- +Author: Sil van de Leemput +Email: sil.vandeleemput@radboudumc.nl +------------------------------------------------------------- +""" +from mhubio.core import Instance, InstanceData, IO, Module, Meta + +import SimpleITK +import json +from pathlib import Path + +from process import Noduledetection + +# TODO should move to mhubio/core/templates.py +CR = Meta(mode="CR") # CR Computed Radiography + +class Node21BaselineRunner(Module): + + @IO.Instance() + @IO.Input('in_data', 'mha|nrrd|nifti:mod=cr', the='input chest X-Ray') + @IO.Output('out_data', 'nodules.json', 'json:model=Node21Baseline', 'in_data', the='Stoic baseline bounding box nodule predictions') + def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData) -> None: + input_image = SimpleITK.ReadImage(in_data.abspath) + tmp_path = Path("/app/tmp") + predictions = Noduledetection(input_dir=tmp_path, output_dir=tmp_path).predict(input_image=input_image) + with open(out_data.abspath, "w") as f: + json.dump(predictions, f, indent=4) diff --git a/models/gc_node21_baseline/utils/PanImgConverters.py b/models/gc_node21_baseline/utils/PanImgConverters.py new file mode 100644 index 00000000..21b91f5d --- /dev/null +++ b/models/gc_node21_baseline/utils/PanImgConverters.py @@ -0,0 +1,122 @@ +""" +------------------------------------------------------------- +MHub - PanImg Conversion Modules Dicom2Mha and WSI-Dicom2Tiff +------------------------------------------------------------- + +------------------------------------------------------------- +Author: Sil van de Leemput +Email: sil.vandeleemput@radboudumc.nl +------------------------------------------------------------- +""" + + +from typing import Optional + +from mhubio.modules.convert.DataConverter import DataConverter +from mhubio.core import Instance, InstanceData, DataType, FileType + +import os +from pathlib import Path +import shutil + +from panimg.exceptions import UnconsumedFilesException +from panimg.image_builders.dicom import image_builder_dicom +from panimg.image_builders.tiff import image_builder_tiff +from panimg.image_builders.metaio_nrrd import image_builder_nrrd + +import SimpleITK + + +class MhaPanImgConverter(DataConverter): + """ + Conversion module. + Convert instance data from dicom or nrrd to mha. + """ + + def convert(self, instance: Instance) -> Optional[InstanceData]: + + # create a converted instance + has_instance_dicom = instance.hasType(DataType(FileType.DICOM)) + has_instance_nrrd = instance.hasType(DataType(FileType.NRRD)) + + assert has_instance_dicom or has_instance_nrrd, f"CONVERT ERROR: required datatype (dicom or nrrd) not available in instance {str(instance)}." + + # select input data, dicom has priority over nrrd + input_data = instance.data.filter(DataType(FileType.DICOM) if has_instance_dicom else DataType(FileType.NRRD)).first() + + # out data + mha_data = InstanceData("image.mha", DataType(FileType.MHA, input_data.type.meta)) + mha_data.instance = instance + + # paths + inp_data_dir = Path(input_data.abspath) + out_mha_file = Path(mha_data.abspath) + + # sanity check + assert(inp_data_dir.is_dir()) + + # DICOM CT to MHA conversion (if the file doesn't exist yet) + if out_mha_file.is_file(): + print("CONVERT ERROR: File already exists: ", out_mha_file) + return None + else: + # run conversion using panimg + input_files = {f for f in inp_data_dir.glob(["*.nrrd", "*.dcm"][has_instance_dicom]) if f.is_file()} + img_builder = image_builder_dicom if has_instance_dicom else image_builder_nrrd + try: + for result in img_builder(files=input_files): + sitk_image = result.image # SimpleITK image + SimpleITK.WriteImage(sitk_image, str(out_mha_file)) + except UnconsumedFilesException as e: + # e.file_errors is keyed with a Path to a file that could not be consumed, + # with a list of all the errors found with loading it, + # the user can then choose what to do with that information + print("CONVERT ERROR: UnconsumedFilesException during PanImg conversion: ", e.file_errors) + return None + + return mha_data + + +class TiffPanImgConverter(DataConverter): + """ + Conversion module. + Convert instance data from WSI-dicom to tiff. + """ + + def convert(self, instance: Instance) -> Optional[InstanceData]: + + # create a converted instance + assert instance.hasType(DataType(FileType.DICOM)), f"CONVERT ERROR: required datatype (dicom) not available in instance {str(instance)}." + dicom_data = instance.data.filter(DataType(FileType.DICOM)).first() + + # out data + tiff_data = InstanceData("image.tiff", DataType(FileType.TIFF, dicom_data.type.meta)) + tiff_data.instance = instance + + # paths + inp_dicom_dir = Path(dicom_data.abspath) + out_tiff_file = Path(tiff_data.abspath) + + # sanity check + assert(inp_dicom_dir.is_dir()) + + # WSI-DICOM to TIFF conversion (if the file doesn't exist yet) + if out_tiff_file.is_file(): + print("CONVERT ERROR: File already exists: ", out_tiff_file) + return None + else: + # run conversion using panimg + dcm_input_files = {f for f in inp_dicom_dir.glob("*.dcm") if f.is_file()} + print(f"Running WSI DICOM -> TIFF conversion on {len(dcm_input_files)} dcm files") + try: + for result in image_builder_tiff(files=dcm_input_files): + tiff_image = result.file # Path to the tiff file + shutil.move(str(tiff_image), str(out_tiff_file)) + except UnconsumedFilesException as e: + # e.file_errors is keyed with a Path to a file that could not be consumed, + # with a list of all the errors found with loading it, + # the user can then choose what to do with that information + print("CONVERT ERROR: UnconsumedFilesException during PanImg conversion: ", e.file_errors) + return None + + return tiff_data diff --git a/models/gc_node21_baseline/utils/__init__.py b/models/gc_node21_baseline/utils/__init__.py new file mode 100644 index 00000000..3a4c5e68 --- /dev/null +++ b/models/gc_node21_baseline/utils/__init__.py @@ -0,0 +1,2 @@ +from .Node21BaselineRunner import * +from .PanImgConverters import * From 890f2e6bab2dc05a26aeea9f3623caecb4a10b60 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Sat, 5 Aug 2023 00:43:58 +0200 Subject: [PATCH 02/13] minor modification Node21 output JSON description --- models/gc_node21_baseline/utils/Node21BaselineRunner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/gc_node21_baseline/utils/Node21BaselineRunner.py b/models/gc_node21_baseline/utils/Node21BaselineRunner.py index 28a85a3a..8704e385 100644 --- a/models/gc_node21_baseline/utils/Node21BaselineRunner.py +++ b/models/gc_node21_baseline/utils/Node21BaselineRunner.py @@ -23,7 +23,7 @@ class Node21BaselineRunner(Module): @IO.Instance() @IO.Input('in_data', 'mha|nrrd|nifti:mod=cr', the='input chest X-Ray') - @IO.Output('out_data', 'nodules.json', 'json:model=Node21Baseline', 'in_data', the='Stoic baseline bounding box nodule predictions') + @IO.Output('out_data', 'nodules.json', 'json:model=Node21Baseline', 'in_data', the='Node21 baseline nodule prediction') def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData) -> None: input_image = SimpleITK.ReadImage(in_data.abspath) tmp_path = Path("/app/tmp") From b283d7d138b13d1c5e1bb19b76016f0fca364ef0 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Thu, 14 Sep 2023 15:53:38 +0200 Subject: [PATCH 03/13] Update and clean Node21 model related files * Dockerfile * Use mhubio.run as entrypoint * Fixed commit for node21 repo * Updated TODO on MHub model code integration * default.yml * Changed mhaconverter backend to panimg * Node21BaselineRunner.py * Added comments * Removed * run.py * PanImgConverters.py --- models/gc_node21_baseline/config/default.yml | 2 +- .../gc_node21_baseline/dockerfiles/Dockerfile | 15 +-- models/gc_node21_baseline/scripts/run.py | 42 ------ .../utils/Node21BaselineRunner.py | 9 +- .../utils/PanImgConverters.py | 122 ------------------ models/gc_node21_baseline/utils/__init__.py | 1 - 6 files changed, 16 insertions(+), 175 deletions(-) delete mode 100644 models/gc_node21_baseline/scripts/run.py delete mode 100644 models/gc_node21_baseline/utils/PanImgConverters.py diff --git a/models/gc_node21_baseline/config/default.yml b/models/gc_node21_baseline/config/default.yml index 1286f53e..eb76b437 100644 --- a/models/gc_node21_baseline/config/default.yml +++ b/models/gc_node21_baseline/config/default.yml @@ -18,7 +18,7 @@ modules: mod: cr MhaConverter: - # TODO should be set to panimg backend + engine: panimg DataOrganizer: targets: diff --git a/models/gc_node21_baseline/dockerfiles/Dockerfile b/models/gc_node21_baseline/dockerfiles/Dockerfile index b1fb4cd9..f262afb5 100644 --- a/models/gc_node21_baseline/dockerfiles/Dockerfile +++ b/models/gc_node21_baseline/dockerfiles/Dockerfile @@ -10,10 +10,11 @@ RUN pip3 install --no-cache-dir torch==2.0.1+cu118 torchvision==0.15.2+cu118 -f RUN apt update && apt install -y --no-install-recommends git-lfs && rm -rf /var/lib/apt/lists/* # Install node21 baseline algorithm -# - Git shallow clone to tmp directory +# - Git clone to tmp directory (main branch, commit 6e57f5c564eb1d527e0f030de9755179b213731a) # - Extract relevant files to /opt/algorithm/ # - Remove tmp directory -RUN git clone --depth 1 https://github.com/node21challenge/node21_detection_baseline /tmp/algorithm && \ +RUN git clone https://github.com/node21challenge/node21_detection_baseline.git /tmp/algorithm && \ + cd /tmp/algorithm && git reset --hard 6e57f5c564eb1d527e0f030de9755179b213731a && cd /app && \ mkdir -p /opt/algorithm && \ mv /tmp/algorithm/process.py /opt/algorithm/process.py && \ mv /tmp/algorithm/postprocessing.py /opt/algorithm/postprocessing.py && \ @@ -26,7 +27,7 @@ RUN git clone --depth 1 https://github.com/node21challenge/node21_detection_base # Install Node21 additional dependencies RUN pip3 install --no-cache-dir evalutils==0.2.4 -# TODO Clone the main branch of MHubAI/models +# TODO: FIXME: temporary fix waiting for the latest base image update #RUN git stash \ # && git fetch https://github.com/MHubAI/models.git main \ # && git merge FETCH_HEAD \ @@ -40,8 +41,6 @@ ENV TORCH_HOME /opt/algorithm # Add /opt/algorithm to the PYTHON_PATH to be able to import the processor code ENV PYTHON_PATH "/app:/opt/algorithm" -#ENTRYPOINT ["python3", "-m", "mhubio.run"] -#CMD ["--config", "/app/models/gc_node21_baseline/config/default.yml"] - -# Default run script -CMD ["python3", "/app/models/gc_node21_baseline/scripts/run.py"] +# Specify default entrypoint +ENTRYPOINT ["python3", "-m", "mhubio.run"] +CMD ["--config", "/app/models/gc_node21_baseline/config/default.yml"] diff --git a/models/gc_node21_baseline/scripts/run.py b/models/gc_node21_baseline/scripts/run.py deleted file mode 100644 index 52285f11..00000000 --- a/models/gc_node21_baseline/scripts/run.py +++ /dev/null @@ -1,42 +0,0 @@ -""" ----------------------------------------------------------- -Mhub / DIAG - Run the GC Node21 baseline Algorithm locally ----------------------------------------------------------- - -------------------------------------------------------------- -Author: Sil van de Leemput -Email: sil.vandeleemput@radboudumc.nl -------------------------------------------------------------- -""" - -import sys, os -sys.path.append('.') - -from mhubio.core import Config, DataType, FileType, CT, SEG -from mhubio.modules.importer.DataSorter import DataSorter -from mhubio.modules.importer.DicomImporter import DicomImporter -from mhubio.modules.organizer.DataOrganizer import DataOrganizer -from models.gc_node21_baseline.utils.Node21BaselineRunner import Node21BaselineRunner - -from models.gc_node21_baseline.utils import MhaPanImgConverter - -# clean-up -import shutil -shutil.rmtree("/app/data/sorted_data", ignore_errors=True) -shutil.rmtree("/app/tmp", ignore_errors=True) -shutil.rmtree("/app/data/output_data", ignore_errors=True) - -# config -config = Config('/app/models/gc_node21_baseline/config/default.yml') - -# import (dicom) -DicomImporter(config).execute() - -# convert (cr:dicom -> cr:mha) -MhaPanImgConverter(config).execute() - -# execute model (cr:mha -> json) -Node21BaselineRunner(config).execute() - -# organize data into output folder -DataOrganizer(config, set_file_permissions=sys.platform.startswith('linux')).execute() diff --git a/models/gc_node21_baseline/utils/Node21BaselineRunner.py b/models/gc_node21_baseline/utils/Node21BaselineRunner.py index 8704e385..2e9717f4 100644 --- a/models/gc_node21_baseline/utils/Node21BaselineRunner.py +++ b/models/gc_node21_baseline/utils/Node21BaselineRunner.py @@ -14,10 +14,12 @@ import json from pathlib import Path +# Import Node21 baseline nodule detection algorithm from the node21_detection_baseline repo from process import Noduledetection # TODO should move to mhubio/core/templates.py -CR = Meta(mode="CR") # CR Computed Radiography +CR = Meta(mode="CR") # CR Computed Radiography + class Node21BaselineRunner(Module): @@ -25,8 +27,13 @@ class Node21BaselineRunner(Module): @IO.Input('in_data', 'mha|nrrd|nifti:mod=cr', the='input chest X-Ray') @IO.Output('out_data', 'nodules.json', 'json:model=Node21Baseline', 'in_data', the='Node21 baseline nodule prediction') def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData) -> None: + # Read input image input_image = SimpleITK.ReadImage(in_data.abspath) + + # Run nodule detection algorithm on the input image and generate predictions tmp_path = Path("/app/tmp") predictions = Noduledetection(input_dir=tmp_path, output_dir=tmp_path).predict(input_image=input_image) + + # Export the predictions to a JSON file with open(out_data.abspath, "w") as f: json.dump(predictions, f, indent=4) diff --git a/models/gc_node21_baseline/utils/PanImgConverters.py b/models/gc_node21_baseline/utils/PanImgConverters.py deleted file mode 100644 index 21b91f5d..00000000 --- a/models/gc_node21_baseline/utils/PanImgConverters.py +++ /dev/null @@ -1,122 +0,0 @@ -""" -------------------------------------------------------------- -MHub - PanImg Conversion Modules Dicom2Mha and WSI-Dicom2Tiff -------------------------------------------------------------- - -------------------------------------------------------------- -Author: Sil van de Leemput -Email: sil.vandeleemput@radboudumc.nl -------------------------------------------------------------- -""" - - -from typing import Optional - -from mhubio.modules.convert.DataConverter import DataConverter -from mhubio.core import Instance, InstanceData, DataType, FileType - -import os -from pathlib import Path -import shutil - -from panimg.exceptions import UnconsumedFilesException -from panimg.image_builders.dicom import image_builder_dicom -from panimg.image_builders.tiff import image_builder_tiff -from panimg.image_builders.metaio_nrrd import image_builder_nrrd - -import SimpleITK - - -class MhaPanImgConverter(DataConverter): - """ - Conversion module. - Convert instance data from dicom or nrrd to mha. - """ - - def convert(self, instance: Instance) -> Optional[InstanceData]: - - # create a converted instance - has_instance_dicom = instance.hasType(DataType(FileType.DICOM)) - has_instance_nrrd = instance.hasType(DataType(FileType.NRRD)) - - assert has_instance_dicom or has_instance_nrrd, f"CONVERT ERROR: required datatype (dicom or nrrd) not available in instance {str(instance)}." - - # select input data, dicom has priority over nrrd - input_data = instance.data.filter(DataType(FileType.DICOM) if has_instance_dicom else DataType(FileType.NRRD)).first() - - # out data - mha_data = InstanceData("image.mha", DataType(FileType.MHA, input_data.type.meta)) - mha_data.instance = instance - - # paths - inp_data_dir = Path(input_data.abspath) - out_mha_file = Path(mha_data.abspath) - - # sanity check - assert(inp_data_dir.is_dir()) - - # DICOM CT to MHA conversion (if the file doesn't exist yet) - if out_mha_file.is_file(): - print("CONVERT ERROR: File already exists: ", out_mha_file) - return None - else: - # run conversion using panimg - input_files = {f for f in inp_data_dir.glob(["*.nrrd", "*.dcm"][has_instance_dicom]) if f.is_file()} - img_builder = image_builder_dicom if has_instance_dicom else image_builder_nrrd - try: - for result in img_builder(files=input_files): - sitk_image = result.image # SimpleITK image - SimpleITK.WriteImage(sitk_image, str(out_mha_file)) - except UnconsumedFilesException as e: - # e.file_errors is keyed with a Path to a file that could not be consumed, - # with a list of all the errors found with loading it, - # the user can then choose what to do with that information - print("CONVERT ERROR: UnconsumedFilesException during PanImg conversion: ", e.file_errors) - return None - - return mha_data - - -class TiffPanImgConverter(DataConverter): - """ - Conversion module. - Convert instance data from WSI-dicom to tiff. - """ - - def convert(self, instance: Instance) -> Optional[InstanceData]: - - # create a converted instance - assert instance.hasType(DataType(FileType.DICOM)), f"CONVERT ERROR: required datatype (dicom) not available in instance {str(instance)}." - dicom_data = instance.data.filter(DataType(FileType.DICOM)).first() - - # out data - tiff_data = InstanceData("image.tiff", DataType(FileType.TIFF, dicom_data.type.meta)) - tiff_data.instance = instance - - # paths - inp_dicom_dir = Path(dicom_data.abspath) - out_tiff_file = Path(tiff_data.abspath) - - # sanity check - assert(inp_dicom_dir.is_dir()) - - # WSI-DICOM to TIFF conversion (if the file doesn't exist yet) - if out_tiff_file.is_file(): - print("CONVERT ERROR: File already exists: ", out_tiff_file) - return None - else: - # run conversion using panimg - dcm_input_files = {f for f in inp_dicom_dir.glob("*.dcm") if f.is_file()} - print(f"Running WSI DICOM -> TIFF conversion on {len(dcm_input_files)} dcm files") - try: - for result in image_builder_tiff(files=dcm_input_files): - tiff_image = result.file # Path to the tiff file - shutil.move(str(tiff_image), str(out_tiff_file)) - except UnconsumedFilesException as e: - # e.file_errors is keyed with a Path to a file that could not be consumed, - # with a list of all the errors found with loading it, - # the user can then choose what to do with that information - print("CONVERT ERROR: UnconsumedFilesException during PanImg conversion: ", e.file_errors) - return None - - return tiff_data diff --git a/models/gc_node21_baseline/utils/__init__.py b/models/gc_node21_baseline/utils/__init__.py index 3a4c5e68..98c5b503 100644 --- a/models/gc_node21_baseline/utils/__init__.py +++ b/models/gc_node21_baseline/utils/__init__.py @@ -1,2 +1 @@ from .Node21BaselineRunner import * -from .PanImgConverters import * From 75cea2bed4723106884aebb7563b2dee96d6032d Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Tue, 10 Oct 2023 16:22:56 +0200 Subject: [PATCH 04/13] fix revert to default workdir --- models/gc_node21_baseline/dockerfiles/Dockerfile | 7 +++---- models/gc_node21_baseline/utils/Node21BaselineRunner.py | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/models/gc_node21_baseline/dockerfiles/Dockerfile b/models/gc_node21_baseline/dockerfiles/Dockerfile index f262afb5..4a756a55 100644 --- a/models/gc_node21_baseline/dockerfiles/Dockerfile +++ b/models/gc_node21_baseline/dockerfiles/Dockerfile @@ -34,12 +34,11 @@ RUN pip3 install --no-cache-dir evalutils==0.2.4 # && git sparse-checkout set "models/gc_node21_baseline" \ # && git fetch https://github.com/MHubAI/models.git main -# Configure app /opt/algorithm to be the main workdir and torch hub directory to pick up the model weights correctly -WORKDIR "/opt/algorithm" +# Configure app /opt/algorithm to be the torch hub directory to pick up the model weights correctly ENV TORCH_HOME /opt/algorithm -# Add /opt/algorithm to the PYTHON_PATH to be able to import the processor code -ENV PYTHON_PATH "/app:/opt/algorithm" +# Add Node21 source path to the PYTHONPATH to be able to import the processor code +ENV PYTHONPATH "/app:/opt/algorithm" # Specify default entrypoint ENTRYPOINT ["python3", "-m", "mhubio.run"] diff --git a/models/gc_node21_baseline/utils/Node21BaselineRunner.py b/models/gc_node21_baseline/utils/Node21BaselineRunner.py index 2e9717f4..b76d97f1 100644 --- a/models/gc_node21_baseline/utils/Node21BaselineRunner.py +++ b/models/gc_node21_baseline/utils/Node21BaselineRunner.py @@ -8,12 +8,12 @@ Email: sil.vandeleemput@radboudumc.nl ------------------------------------------------------------- """ -from mhubio.core import Instance, InstanceData, IO, Module, Meta - import SimpleITK import json from pathlib import Path +from mhubio.core import Instance, InstanceData, IO, Module, Meta + # Import Node21 baseline nodule detection algorithm from the node21_detection_baseline repo from process import Noduledetection From 8e17cccc2ef443bf650a95a7928584d74425824c Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Mon, 23 Oct 2023 16:14:55 +0200 Subject: [PATCH 05/13] add meta.json --- models/gc_node21_baseline/meta.json | 134 ++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 models/gc_node21_baseline/meta.json diff --git a/models/gc_node21_baseline/meta.json b/models/gc_node21_baseline/meta.json new file mode 100644 index 00000000..a102adab --- /dev/null +++ b/models/gc_node21_baseline/meta.json @@ -0,0 +1,134 @@ +{ + "id": "37ec076c-6cca-4601-a7fc-bdbe7eecbdcf", + "name": "node21_baseline", + "title": "NODE21 challenge baseline", + "summary": { + "description": "The NODE21 challenge was to detect and generate nodules in chest radiographs, this particular model focuses on detecting nodules.", + "inputs": [ + { + "label": "Chest radiograph", + "description": "Chest radiograph", + "format": "DICOM", + "modality": "CR", + "bodypartexamined": "Chest", + "slicethickness": "", + "non-contrast": false, + "contrast": false + } + ], + "outputs": [ + { + "type": "Prediction", + "valueType": "JSON", + "label": "Nodule bounding boxes", + "description": "Multiple bounding boxes for the nodule locations for the input chest radiograph. Each bounding box also has an associated nodule likelihood (probability).", + "classes": [] + } + ], + "model": { + "architecture": "Faster R-CNN", + "training": "supervised", + "cmpapproach": "2D" + }, + "data": { + "training": { + "vol_samples": 4382 + }, + "evaluation": { + "vol_samples": 500 + }, + "public": false, + "external": false + } + }, + "details": { + "name": "NODE21 baseline", + "version": "6e57f5c564eb1d527e0f030de9755179b213731a", + "devteam": "E. Sogancioglu & K. Murphy", + "type": "Classification", + "date": { + "weights": "2021-11-01", + "code": "2022-02-01", + "pub": "" + }, + "cite": "", + "license": { + "code": "Apache 2.0", + "weights": "Apache 2.0" + }, + "publications": [], + "github": "https://github.com/node21challenge/node21_detection_baseline", + "zenodo": "", + "colab": "", + "slicer": false + }, + "info": { + "use": { + "title": "Intended use", + "text": "Prediction of the location and likelihood of nodules in frontal chest radiographs.", + "references": [], + "tables": [] + }, + "analyses": { + "title": "Evaluation", + "text": "The submitted algorithms was evaluated on the experimental test set. Specifically the AUC score and the sensitivity at various average false positive rates using FROC (1/4, 1/2, 1) were computed. The final metric used to rank the leaderboard will be calculated as follows: rank_metric = (0.75 * AUC) + (0.25 * Sensitivity at 1/4 FP/image) ", + "references": [ + { + "label": "NODE21 challenge details", + "uri": "https://node21.grand-challenge.org/Details/" + } + ], + "tables": [] + }, + "evaluation": { + "title": "Evaluation data", + "text": "The test set consists of two private datasets of 281 frontal chest X-rays, 166 of which are positive (with nodules)", + "references": [ + { + "label": "NODE21 data section", + "uri": "https://node21.grand-challenge.org/Data/" + } + ], + "tables": [] + }, + "training": { + "title": "Training data", + "text": "For the NODE21 challenge four publicly available training datasets were combined: 1. JSRT 2. PadChest 3. Chestx-ray14 4. Open-I", + "references": [ + { + "label": "NODE21 training data (combined)", + "uri": "https://zenodo.org/record/5548363" + }, + { + "label": "JSRT", + "uri": "https://dx.doi.org/10.2214/ajr.174.1.1740071" + }, + { + "label": "PadChest", + "uri": "https://dx.doi.org/10.1016/j.media.2020.101797" + }, + { + "label": "Chestx-ray14", + "uri": "https://dx.doi.org/10.1109/cvpr.2017.369" + }, + { + "label": "Open-I", + "uri": "https://dx.doi.org/10.5626/JCSE.2012.6.2.168" + } + ], + "tables": [] + }, + "ethics": { + "title": "", + "text": "", + "references": [], + "tables": [] + }, + "limitations": { + "title": "Limitations", + "text": "This algorithm was developed for research purposes only.", + "references": [], + "tables": [] + } + } +} \ No newline at end of file From 9e9c1dfff6664b56d3e57243df850ad63c5fe608 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Thu, 23 Nov 2023 21:30:45 +0100 Subject: [PATCH 06/13] add mhub model definition import to Dockerfile --- models/gc_node21_baseline/dockerfiles/Dockerfile | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/models/gc_node21_baseline/dockerfiles/Dockerfile b/models/gc_node21_baseline/dockerfiles/Dockerfile index 4a756a55..304be8a4 100644 --- a/models/gc_node21_baseline/dockerfiles/Dockerfile +++ b/models/gc_node21_baseline/dockerfiles/Dockerfile @@ -27,12 +27,9 @@ RUN git clone https://github.com/node21challenge/node21_detection_baseline.git / # Install Node21 additional dependencies RUN pip3 install --no-cache-dir evalutils==0.2.4 -# TODO: FIXME: temporary fix waiting for the latest base image update -#RUN git stash \ -# && git fetch https://github.com/MHubAI/models.git main \ -# && git merge FETCH_HEAD \ -# && git sparse-checkout set "models/gc_node21_baseline" \ -# && git fetch https://github.com/MHubAI/models.git main +# Import the MHub model definiton +ARG MHUB_MODELS_REPO +RUN buildutils/import_mhub_model.sh gc_node21_baseline ${MHUB_MODELS_REPO} # Configure app /opt/algorithm to be the torch hub directory to pick up the model weights correctly ENV TORCH_HOME /opt/algorithm From 06e1e25c4f2c78b4fb98fbb120217aa83d471c73 Mon Sep 17 00:00:00 2001 From: Miriam Groeneveld Date: Thu, 18 Apr 2024 11:27:02 +0200 Subject: [PATCH 07/13] Updated meta.json --- models/gc_node21_baseline/meta.json | 80 ++++++++++++++++++----------- 1 file changed, 50 insertions(+), 30 deletions(-) diff --git a/models/gc_node21_baseline/meta.json b/models/gc_node21_baseline/meta.json index a102adab..5ca7c5bb 100644 --- a/models/gc_node21_baseline/meta.json +++ b/models/gc_node21_baseline/meta.json @@ -1,9 +1,9 @@ { "id": "37ec076c-6cca-4601-a7fc-bdbe7eecbdcf", - "name": "node21_baseline", + "name": "gc_node21_baseline", "title": "NODE21 challenge baseline", "summary": { - "description": "The NODE21 challenge was to detect and generate nodules in chest radiographs, this particular model focuses on detecting nodules.", + "description": "This model detects the location of nodules in Chest radiographs, and generates bounding boxes around these nodules.", "inputs": [ { "label": "Chest radiograph", @@ -32,10 +32,10 @@ }, "data": { "training": { - "vol_samples": 4382 + "vol_samples": 4882 }, "evaluation": { - "vol_samples": 500 + "vol_samples": 579 }, "public": false, "external": false @@ -43,20 +43,25 @@ }, "details": { "name": "NODE21 baseline", - "version": "6e57f5c564eb1d527e0f030de9755179b213731a", - "devteam": "E. Sogancioglu & K. Murphy", - "type": "Classification", + "version": "v1.0addedtag", + "devteam": "DIAGNijmegen (Diagnostic Image Analysis Group, Radboud UMC, The Netherlands)", + "type": "Faster R-CNN architecture using ResNet50 as the backbone.", "date": { "weights": "2021-11-01", "code": "2022-02-01", "pub": "" }, - "cite": "", + "cite": "E. Sogancioglu et al., Nodule detection and generation on chest X-rays: NODE21 Challenge, in IEEE Transactions on Medical Imaging, doi: 10.1109/TMI.2024.3382042.", "license": { "code": "Apache 2.0", "weights": "Apache 2.0" }, - "publications": [], + "publications": [ + { + "title": "Nodule detection and generation on chest X-rays: NODE21 Challenge", + "uri": "https://ieeexplore.ieee.org/document/10479589" + } + ], "github": "https://github.com/node21challenge/node21_detection_baseline", "zenodo": "", "colab": "", @@ -65,24 +70,51 @@ "info": { "use": { "title": "Intended use", - "text": "Prediction of the location and likelihood of nodules in frontal chest radiographs.", - "references": [], + "text": "The algorithm processes a frontal radiograph of the chest and predicts the location and likelihood of nodules.", + "references": [{ + "label": "Node21 baseline algorithm on grand-challenge", + "uri": "https://grand-challenge.org/algorithms/node21_baseline_detector/" + }], "tables": [] }, "analyses": { "title": "Evaluation", - "text": "The submitted algorithms was evaluated on the experimental test set. Specifically the AUC score and the sensitivity at various average false positive rates using FROC (1/4, 1/2, 1) were computed. The final metric used to rank the leaderboard will be calculated as follows: rank_metric = (0.75 * AUC) + (0.25 * Sensitivity at 1/4 FP/image) ", + "text": "The evaluation of this model was performed in two parts, firstly as part of the Node21 challenge [1], and secondly as part of the experiments for the publication. Specifically the AUC score and the sensitivity at various average false positive rates using FROC (1/4, 1/2, 1) were computed. The final metric used to rank the leaderboard will be calculated as follows: rank_metric = (0.75 * AUC) + (0.25 * Sensitivity at 1/4 FP/image) [2]", "references": [ { "label": "NODE21 challenge details", "uri": "https://node21.grand-challenge.org/Details/" + }, + { + "label": "NODE21 baseline algorithm evaluation results on grand-challenge.", + "uri": "https://node21.grand-challenge.org/evaluation/a626f004-1c38-45e1-9e35-89ccfb807e2d/" } ], - "tables": [] + "tables": [ + { + "label": "Evaluation results on the first NODE21 testing cohort of 281 cases as reported in the NODE21 challenge.", + "entries": { + "AUC": 0.839, + "sensitivity_5": 0.532, + "sensitivity_25": 0.443, + "sensitivity_125": 0.350, + "final_ranking": 0.740 + } + }, + { + "label": "Evaluation results on the second NODE21 testing cohort of 298 cases as described in the publication.", + "entries": { + "AUC": 0.816, + "sensitivity_5": 0.714, + "sensitivity_25": 0.635, + "sensitivity_125": 0.504 + } + } + ] }, "evaluation": { "title": "Evaluation data", - "text": "The test set consists of two private datasets of 281 frontal chest X-rays, 166 of which are positive (with nodules)", + "text": "The model was evaluated with two separate, private datasets [1]. The first dataset consists of 281 frontal chest X-rays, 166 of which are positive (with nodules). The second dataset used in the experiments described in the publication consist of 298 frontal radiographs with or without nodules. They originate from multiple medical centers and have been acquired with multiple different x-ray machines.", "references": [ { "label": "NODE21 data section", @@ -93,27 +125,15 @@ }, "training": { "title": "Training data", - "text": "For the NODE21 challenge four publicly available training datasets were combined: 1. JSRT 2. PadChest 3. Chestx-ray14 4. Open-I", + "text": "The model was trained on the NODE21 training dataset [1] that was preprocessed with the publicly available OpenCXR library [2]. This dataset consist of 4882 radiographs, of which 1476 contain nodules. In order to tackle the data imbalance issue, images with nodules were oversampled until the number of negative images was reached. The model was trained for 30 epochs, and early stopping was used in case of no improvement in the validation set performance for 5 consecutive epochs", "references": [ { - "label": "NODE21 training data (combined)", + "label": "NODE21 training data", "uri": "https://zenodo.org/record/5548363" }, { - "label": "JSRT", - "uri": "https://dx.doi.org/10.2214/ajr.174.1.1740071" - }, - { - "label": "PadChest", - "uri": "https://dx.doi.org/10.1016/j.media.2020.101797" - }, - { - "label": "Chestx-ray14", - "uri": "https://dx.doi.org/10.1109/cvpr.2017.369" - }, - { - "label": "Open-I", - "uri": "https://dx.doi.org/10.5626/JCSE.2012.6.2.168" + "label": "OpenCXR library", + "uri": "https://github.com/DIAGNijmegen/opencxr" } ], "tables": [] From 325e8a5fff4cc3a2781afb3533a78ab9046be888 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Thu, 18 Apr 2024 11:43:29 +0200 Subject: [PATCH 08/13] meta.json - updated version number --- models/gc_node21_baseline/meta.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/gc_node21_baseline/meta.json b/models/gc_node21_baseline/meta.json index 5ca7c5bb..19d94bcc 100644 --- a/models/gc_node21_baseline/meta.json +++ b/models/gc_node21_baseline/meta.json @@ -43,7 +43,7 @@ }, "details": { "name": "NODE21 baseline", - "version": "v1.0addedtag", + "version": "1.0", "devteam": "DIAGNijmegen (Diagnostic Image Analysis Group, Radboud UMC, The Netherlands)", "type": "Faster R-CNN architecture using ResNet50 as the backbone.", "date": { From 777ff39486dd64eac603afc5c63f798e680faea1 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Thu, 18 Apr 2024 11:46:19 +0200 Subject: [PATCH 09/13] meta.json - changed table entries from float into string --- models/gc_node21_baseline/meta.json | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/models/gc_node21_baseline/meta.json b/models/gc_node21_baseline/meta.json index 19d94bcc..6e649b38 100644 --- a/models/gc_node21_baseline/meta.json +++ b/models/gc_node21_baseline/meta.json @@ -94,20 +94,20 @@ { "label": "Evaluation results on the first NODE21 testing cohort of 281 cases as reported in the NODE21 challenge.", "entries": { - "AUC": 0.839, - "sensitivity_5": 0.532, - "sensitivity_25": 0.443, - "sensitivity_125": 0.350, - "final_ranking": 0.740 + "AUC": "0.839", + "sensitivity_5": "0.532", + "sensitivity_25": "0.443", + "sensitivity_125": "0.350", + "final_ranking": "0.740" } }, { "label": "Evaluation results on the second NODE21 testing cohort of 298 cases as described in the publication.", "entries": { - "AUC": 0.816, - "sensitivity_5": 0.714, - "sensitivity_25": 0.635, - "sensitivity_125": 0.504 + "AUC": "0.816", + "sensitivity_5": "0.714", + "sensitivity_25": "0.635", + "sensitivity_125": "0.504" } } ] From 5cc1190ee0064976994ceed4159f4cb94d635a8e Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Thu, 18 Apr 2024 11:50:54 +0200 Subject: [PATCH 10/13] Dockerfile - fixed source pull to version tag --- models/gc_node21_baseline/dockerfiles/Dockerfile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/models/gc_node21_baseline/dockerfiles/Dockerfile b/models/gc_node21_baseline/dockerfiles/Dockerfile index 304be8a4..63af653a 100644 --- a/models/gc_node21_baseline/dockerfiles/Dockerfile +++ b/models/gc_node21_baseline/dockerfiles/Dockerfile @@ -10,11 +10,10 @@ RUN pip3 install --no-cache-dir torch==2.0.1+cu118 torchvision==0.15.2+cu118 -f RUN apt update && apt install -y --no-install-recommends git-lfs && rm -rf /var/lib/apt/lists/* # Install node21 baseline algorithm -# - Git clone to tmp directory (main branch, commit 6e57f5c564eb1d527e0f030de9755179b213731a) +# - Git clone to tmp directory (main branch, tag v1.0addedtag) # - Extract relevant files to /opt/algorithm/ # - Remove tmp directory -RUN git clone https://github.com/node21challenge/node21_detection_baseline.git /tmp/algorithm && \ - cd /tmp/algorithm && git reset --hard 6e57f5c564eb1d527e0f030de9755179b213731a && cd /app && \ +RUN git clone --depth 1 --branch v1.0addedtag https://github.com/node21challenge/node21_detection_baseline.git /tmp/algorithm && \ mkdir -p /opt/algorithm && \ mv /tmp/algorithm/process.py /opt/algorithm/process.py && \ mv /tmp/algorithm/postprocessing.py /opt/algorithm/postprocessing.py && \ From 2bafc52d45ec658475f936dc4157d2247af2d0bd Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Thu, 18 Apr 2024 12:59:31 +0200 Subject: [PATCH 11/13] runner.py - add outputdatas for nodule probabilities and bounding boxes --- .../utils/Node21BaselineRunner.py | 44 ++++++++++++++++--- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/models/gc_node21_baseline/utils/Node21BaselineRunner.py b/models/gc_node21_baseline/utils/Node21BaselineRunner.py index b76d97f1..c9ef7b31 100644 --- a/models/gc_node21_baseline/utils/Node21BaselineRunner.py +++ b/models/gc_node21_baseline/utils/Node21BaselineRunner.py @@ -12,21 +12,38 @@ import json from pathlib import Path -from mhubio.core import Instance, InstanceData, IO, Module, Meta +from mhubio.core import Instance, InstanceData, IO, Module, Meta, ValueOutput, OutputDataCollection # Import Node21 baseline nodule detection algorithm from the node21_detection_baseline repo from process import Noduledetection -# TODO should move to mhubio/core/templates.py -CR = Meta(mode="CR") # CR Computed Radiography + +@ValueOutput.Name('noduleprob') +@ValueOutput.Label('Nodule probability score.') +@ValueOutput.Meta(Meta(min=0.0, max=1.0, type="probability")) +@ValueOutput.Type(float) +@ValueOutput.Description('The predicted probability for a single lung nodule detected by the Node21Baseline algorithm.') +class NoduleProbability(ValueOutput): + pass + + +@ValueOutput.Name('nodulebbox') +@ValueOutput.Label('Nodule 2D bounding box.') +@ValueOutput.Meta(Meta(format='json')) +@ValueOutput.Type(str) +@ValueOutput.Description('The predicted 2D bounding box for a single lung nodule detected by the Node21Baseline algorithm.') +class NoduleBoundingBox(ValueOutput): + pass class Node21BaselineRunner(Module): @IO.Instance() @IO.Input('in_data', 'mha|nrrd|nifti:mod=cr', the='input chest X-Ray') - @IO.Output('out_data', 'nodules.json', 'json:model=Node21Baseline', 'in_data', the='Node21 baseline nodule prediction') - def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData) -> None: + @IO.Output('out_data', 'nodules.json', 'json:model=Node21Baseline', 'in_data', the='Node21 baseline nodule prediction in JSON format') + @IO.OutputDatas('nodule_probs', NoduleProbability) + @IO.OutputDatas('nodule_bounding_boxes', NoduleBoundingBox) + def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData, nodule_probs: OutputDataCollection, nodule_bounding_boxes: OutputDataCollection) -> None: # Read input image input_image = SimpleITK.ReadImage(in_data.abspath) @@ -37,3 +54,20 @@ def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData # Export the predictions to a JSON file with open(out_data.abspath, "w") as f: json.dump(predictions, f, indent=4) + + # Export the relevant data + for nodule_idx, box in enumerate(predictions["boxes"]): + probability, corners = box["probability"], box["corners"] + + nodule_prob = NoduleProbability() + nodule_prob.description += f" (for nodule {nodule_idx})" + nodule_prob.meta = Meta(id=nodule_idx, min=0.0, max=1.0, type="probability") + nodule_prob.value = probability + + nodule_bounding_box = NoduleBoundingBox() + nodule_bounding_box.description += f" (for nodule {nodule_idx})" + nodule_bounding_box.meta = Meta(id=nodule_idx, format="json") + nodule_bounding_box.value = json.dumps(corners) + + nodule_probs.add(nodule_prob) + nodule_bounding_boxes.add(nodule_bounding_box) From fb17009dc03dcdb7a07e95b8122798ff2f151b90 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Thu, 18 Apr 2024 13:34:03 +0200 Subject: [PATCH 12/13] wrap algorithm in cli to allow proper capture within mhub framework --- .../utils/Node21BaselineRunner.py | 35 ++++++++----- models/gc_node21_baseline/utils/cli.py | 50 +++++++++++++++++++ 2 files changed, 72 insertions(+), 13 deletions(-) create mode 100644 models/gc_node21_baseline/utils/cli.py diff --git a/models/gc_node21_baseline/utils/Node21BaselineRunner.py b/models/gc_node21_baseline/utils/Node21BaselineRunner.py index c9ef7b31..d5c410d8 100644 --- a/models/gc_node21_baseline/utils/Node21BaselineRunner.py +++ b/models/gc_node21_baseline/utils/Node21BaselineRunner.py @@ -8,14 +8,14 @@ Email: sil.vandeleemput@radboudumc.nl ------------------------------------------------------------- """ -import SimpleITK import json +import sys from pathlib import Path from mhubio.core import Instance, InstanceData, IO, Module, Meta, ValueOutput, OutputDataCollection -# Import Node21 baseline nodule detection algorithm from the node21_detection_baseline repo -from process import Noduledetection + +CLI_PATH = Path(__file__).parent.absolute() / "cli.py" @ValueOutput.Name('noduleprob') @@ -44,16 +44,25 @@ class Node21BaselineRunner(Module): @IO.OutputDatas('nodule_probs', NoduleProbability) @IO.OutputDatas('nodule_bounding_boxes', NoduleBoundingBox) def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData, nodule_probs: OutputDataCollection, nodule_bounding_boxes: OutputDataCollection) -> None: - # Read input image - input_image = SimpleITK.ReadImage(in_data.abspath) - - # Run nodule detection algorithm on the input image and generate predictions - tmp_path = Path("/app/tmp") - predictions = Noduledetection(input_dir=tmp_path, output_dir=tmp_path).predict(input_image=input_image) - - # Export the predictions to a JSON file - with open(out_data.abspath, "w") as f: - json.dump(predictions, f, indent=4) + # build command (order matters!) + cmd = [ + sys.executable, + str(CLI_PATH), + in_data.abspath, + out_data.abspath + ] + + # run the command as subprocess + self.subprocess(cmd, text=True) + + # Confirm the expected output file was generated + if not Path(out_data.abspath).is_file(): + raise FileNotFoundError(f"Node21BaseLineRunner - Could not find the expected " + f"output file: {out_data.abspath}, something went wrong running the CLI.") + + # Read the predictions to a JSON file + with open(out_data.abspath, "r") as f: + predictions = json.load(f) # Export the relevant data for nodule_idx, box in enumerate(predictions["boxes"]): diff --git a/models/gc_node21_baseline/utils/cli.py b/models/gc_node21_baseline/utils/cli.py new file mode 100644 index 00000000..920a99cc --- /dev/null +++ b/models/gc_node21_baseline/utils/cli.py @@ -0,0 +1,50 @@ +""" +--------------------------------------------------- +Mhub / DIAG - CLI for the Node21 baseline Algorithm + The model algorith was wrapped in a CLI to ensure + the mhub framework is able to properly capture + the stdout generated by the algorithm +--------------------------------------------------- + +--------------------------------------------------- +Author: Sil van de Leemput +Email: sil.vandeleemput@radboudumc.nl +--------------------------------------------------- +""" + +import argparse +from pathlib import Path +import json + +import SimpleITK + +# Import Node21 baseline nodule detection algorithm from the node21_detection_baseline repo +from process import Noduledetection + + +def run_classifier(input_cxr: Path, output_json_file: Path): + # Read input image + input_image = SimpleITK.ReadImage(str(input_cxr)) + + # Run nodule detection algorithm on the input image and generate predictions + tmp_path = Path("/app/tmp") + predictions = Noduledetection(input_dir=tmp_path, output_dir=tmp_path).predict(input_image=input_image) + + # Export the predictions to a JSON file + with open(output_json_file, "w") as f: + json.dump(predictions, f, indent=4) + + +def run_classifier_cli(): + parser = argparse.ArgumentParser("CLI to run the Node21 baseline classifier") + parser.add_argument("input_cxr", type=str, help="input CXR image (MHA)") + parser.add_argument("output_json_file", type=str, help="Output nodule bounding boxes and probabilities predictions (JSON)") + args = parser.parse_args() + run_classifier( + input_cxr=Path(args.input_cxr), + output_json_file=Path(args.output_json_file) + ) + + +if __name__ == "__main__": + run_classifier_cli() From e27ec43512c780dae4f9e2020d9667a4343efec3 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Wed, 24 Apr 2024 10:08:19 +0200 Subject: [PATCH 13/13] Dockerfile, meta.json - Updated source to v1.1, moved cli to src dir, added improved output nodule sorting --- models/gc_node21_baseline/dockerfiles/Dockerfile | 4 ++-- models/gc_node21_baseline/meta.json | 2 +- models/gc_node21_baseline/{utils => src}/cli.py | 11 +++++++++++ .../gc_node21_baseline/utils/Node21BaselineRunner.py | 2 +- 4 files changed, 15 insertions(+), 4 deletions(-) rename models/gc_node21_baseline/{utils => src}/cli.py (77%) diff --git a/models/gc_node21_baseline/dockerfiles/Dockerfile b/models/gc_node21_baseline/dockerfiles/Dockerfile index 63af653a..f1e94fe7 100644 --- a/models/gc_node21_baseline/dockerfiles/Dockerfile +++ b/models/gc_node21_baseline/dockerfiles/Dockerfile @@ -10,10 +10,10 @@ RUN pip3 install --no-cache-dir torch==2.0.1+cu118 torchvision==0.15.2+cu118 -f RUN apt update && apt install -y --no-install-recommends git-lfs && rm -rf /var/lib/apt/lists/* # Install node21 baseline algorithm -# - Git clone to tmp directory (main branch, tag v1.0addedtag) +# - Git clone to tmp directory (main branch, tag v1.1) # - Extract relevant files to /opt/algorithm/ # - Remove tmp directory -RUN git clone --depth 1 --branch v1.0addedtag https://github.com/node21challenge/node21_detection_baseline.git /tmp/algorithm && \ +RUN git clone --depth 1 --branch v1.1 https://github.com/node21challenge/node21_detection_baseline.git /tmp/algorithm && \ mkdir -p /opt/algorithm && \ mv /tmp/algorithm/process.py /opt/algorithm/process.py && \ mv /tmp/algorithm/postprocessing.py /opt/algorithm/postprocessing.py && \ diff --git a/models/gc_node21_baseline/meta.json b/models/gc_node21_baseline/meta.json index 6e649b38..144c727b 100644 --- a/models/gc_node21_baseline/meta.json +++ b/models/gc_node21_baseline/meta.json @@ -43,7 +43,7 @@ }, "details": { "name": "NODE21 baseline", - "version": "1.0", + "version": "1.1", "devteam": "DIAGNijmegen (Diagnostic Image Analysis Group, Radboud UMC, The Netherlands)", "type": "Faster R-CNN architecture using ResNet50 as the backbone.", "date": { diff --git a/models/gc_node21_baseline/utils/cli.py b/models/gc_node21_baseline/src/cli.py similarity index 77% rename from models/gc_node21_baseline/utils/cli.py rename to models/gc_node21_baseline/src/cli.py index 920a99cc..97e5a49f 100644 --- a/models/gc_node21_baseline/utils/cli.py +++ b/models/gc_node21_baseline/src/cli.py @@ -30,6 +30,17 @@ def run_classifier(input_cxr: Path, output_json_file: Path): tmp_path = Path("/app/tmp") predictions = Noduledetection(input_dir=tmp_path, output_dir=tmp_path).predict(input_image=input_image) + # sort predictions on probability first (descending), corner positions second (ascending) + # this was implemented because the old sorting only sorts on probability (descending) and can give different + # results if two probabilities are the same + predictions["boxes"] = list( + sorted( + predictions["boxes"], + key=lambda x : tuple([-x["probability"]] + [tuple(c) for c in x["corners"]]), + reverse=False + ) + ) + # Export the predictions to a JSON file with open(output_json_file, "w") as f: json.dump(predictions, f, indent=4) diff --git a/models/gc_node21_baseline/utils/Node21BaselineRunner.py b/models/gc_node21_baseline/utils/Node21BaselineRunner.py index d5c410d8..06a06cc3 100644 --- a/models/gc_node21_baseline/utils/Node21BaselineRunner.py +++ b/models/gc_node21_baseline/utils/Node21BaselineRunner.py @@ -15,7 +15,7 @@ from mhubio.core import Instance, InstanceData, IO, Module, Meta, ValueOutput, OutputDataCollection -CLI_PATH = Path(__file__).parent.absolute() / "cli.py" +CLI_PATH = Path(__file__).parent.parent.absolute() / "src" / "cli.py" @ValueOutput.Name('noduleprob')