From 395140a9fc5ea9f9973519158a48b35542baa91a Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Thu, 22 Jun 2023 17:32:00 +0200 Subject: [PATCH 001/125] add initial implementation grt123 model --- models/grt123_lung_cancer/__init__.py | 1 + models/grt123_lung_cancer/config/config.yml | 14 ++ .../dockerfiles/cuda11.4/Dockerfile | 55 ++++++++ .../dockerfiles/cuda12.0/Dockerfile | 55 ++++++++ .../dockerfiles/nocuda/Dockerfile | 55 ++++++++ models/grt123_lung_cancer/scripts/run.py | 42 ++++++ .../utils/LungCancerClassifierRunner.py | 59 +++++++++ .../utils/PanImgConverters.py | 122 ++++++++++++++++++ models/grt123_lung_cancer/utils/__init__.py | 1 + 9 files changed, 404 insertions(+) create mode 100644 models/grt123_lung_cancer/__init__.py create mode 100644 models/grt123_lung_cancer/config/config.yml create mode 100644 models/grt123_lung_cancer/dockerfiles/cuda11.4/Dockerfile create mode 100644 models/grt123_lung_cancer/dockerfiles/cuda12.0/Dockerfile create mode 100644 models/grt123_lung_cancer/dockerfiles/nocuda/Dockerfile create mode 100644 models/grt123_lung_cancer/scripts/run.py create mode 100644 models/grt123_lung_cancer/utils/LungCancerClassifierRunner.py create mode 100644 models/grt123_lung_cancer/utils/PanImgConverters.py create mode 100644 models/grt123_lung_cancer/utils/__init__.py diff --git a/models/grt123_lung_cancer/__init__.py b/models/grt123_lung_cancer/__init__.py new file mode 100644 index 00000000..90f60fdd --- /dev/null +++ b/models/grt123_lung_cancer/__init__.py @@ -0,0 +1 @@ +from .utils import * \ No newline at end of file diff --git a/models/grt123_lung_cancer/config/config.yml b/models/grt123_lung_cancer/config/config.yml new file mode 100644 index 00000000..bad14ddc --- /dev/null +++ b/models/grt123_lung_cancer/config/config.yml @@ -0,0 +1,14 @@ +general: + data_base_dir: /app/data + +modules: + DicomImporter: + source_dir: input_data + import_dir: sorted_data + sort_data: True + meta: + mod: ct + + LungCancerClassifierRunner: + tmp_path: /app/tmp + n_preprocessing_workers: 8 diff --git a/models/grt123_lung_cancer/dockerfiles/cuda11.4/Dockerfile b/models/grt123_lung_cancer/dockerfiles/cuda11.4/Dockerfile new file mode 100644 index 00000000..036d4118 --- /dev/null +++ b/models/grt123_lung_cancer/dockerfiles/cuda11.4/Dockerfile @@ -0,0 +1,55 @@ +# syntax=docker/dockerfile:experimental + +# Specify the base image for the environment +FROM mhubai/base:cuda11.4 + +# Specify/override authors label +LABEL authors="sil.vandeleemput@radboudumc.nl" + +# Install panimg to test conversion integration TODO should later be installed with MHub/mhubio +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3-openslide \ + && rm -rf /var/lib/apt/lists/* +RUN pip3 install panimg + +# install required dependencies for grt123 algorithm (CPU-only) +RUN pip3 install --no-cache-dir \ + torch==1.10.0+cu113 -f https://download.pytorch.org/whl/torch_stable.html + + +# TODO remove later ==== Temporariy SSH fix as long as repo is private ======= +RUN apt-get update && apt-get install -y --no-install-recommends \ + openssh-client \ + && rm -rf /var/lib/apt/lists/* +# Add github public key to known_hosts for SSH +RUN mkdir -p -m 0600 ~/.ssh && ssh-keyscan github.com >> ~/.ssh/known_hosts +# TODO remove later =============================== + + +# TODO make public and remove ssh extras... +# Install grt123 algorithm and model weights +# - We use a shallow git clone for reduced bandwidth usage +# - We remove unnecessary files for a compacter docker layer +# - Subsequently we remove the .git directory to procuce a compacter docker layer, but keep the latest commit hash in the HEAD file +RUN --mount=type=ssh git clone --branch 44-port-to-python3 --depth 1 git@github.com:DIAGNijmegen/bodyct-dsb2017-grt123.git /grt123_lung_cancer && \ + rm -rf /grt123_lung_cancer/tests && \ + rm -rf /grt123_lung_cancer/training && \ + rm -rf /grt123_lung_cancer/processor && \ + rm -rf /grt123_lung_cancer/images && \ + rm /grt123_lung_cancer/README.md && \ + rm /grt123_lung_cancer/solution-grt123-team.pdf && \ + echo "$(git log --format="%H" -n 1)" > /grt123_lung_cancer/HEAD && \ + rm -rf /grt123_lung_cancer/.git/* && \ + mv /grt123_lung_cancer/HEAD /grt123_lung_cancer/.git + +# Clone MHub model (m-grt123-lung-cancer branch, fixed to commit TODO) +#RUN git init \ +# && git sparse-checkout set "models/grt123_lung_cancer" \ +# && git fetch https://github.com/MHubAI/models.git m-grt123-lung-cancer \ +# && git merge TODO + +# Add lobe segmentation code base to python path +ENV PYTHONPATH="/grt123_lung_cancer:/app" + +# Default run script +CMD ["python3", "/app/models/grt123_lung_cancer/scripts/run.py"] diff --git a/models/grt123_lung_cancer/dockerfiles/cuda12.0/Dockerfile b/models/grt123_lung_cancer/dockerfiles/cuda12.0/Dockerfile new file mode 100644 index 00000000..84a71111 --- /dev/null +++ b/models/grt123_lung_cancer/dockerfiles/cuda12.0/Dockerfile @@ -0,0 +1,55 @@ +# syntax=docker/dockerfile:experimental + +# Specify the base image for the environment +FROM mhubai/base:cuda12.0 + +# Specify/override authors label +LABEL authors="sil.vandeleemput@radboudumc.nl" + +# Install panimg to test conversion integration TODO should later be installed with MHub/mhubio +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3-openslide \ + && rm -rf /var/lib/apt/lists/* +RUN pip3 install panimg + +# install required dependencies for grt123 algorithm (CPU-only) +RUN pip3 install --no-cache-dir \ + torch===2.0.1+cu118 -f https://download.pytorch.org/whl/torch_stable.html + + +# TODO remove later ==== Temporariy SSH fix as long as repo is private ======= +RUN apt-get update && apt-get install -y --no-install-recommends \ + openssh-client \ + && rm -rf /var/lib/apt/lists/* +# Add github public key to known_hosts for SSH +RUN mkdir -p -m 0600 ~/.ssh && ssh-keyscan github.com >> ~/.ssh/known_hosts +# TODO remove later =============================== + + +# TODO make public and remove ssh extras... +# Install grt123 algorithm and model weights +# - We use a shallow git clone for reduced bandwidth usage +# - We remove unnecessary files for a compacter docker layer +# - Subsequently we remove the .git directory to procuce a compacter docker layer, but keep the latest commit hash in the HEAD file +RUN --mount=type=ssh git clone --branch 44-port-to-python3 --depth 1 git@github.com:DIAGNijmegen/bodyct-dsb2017-grt123.git /grt123_lung_cancer && \ + rm -rf /grt123_lung_cancer/tests && \ + rm -rf /grt123_lung_cancer/training && \ + rm -rf /grt123_lung_cancer/processor && \ + rm -rf /grt123_lung_cancer/images && \ + rm /grt123_lung_cancer/README.md && \ + rm /grt123_lung_cancer/solution-grt123-team.pdf && \ + echo "$(git log --format="%H" -n 1)" > /grt123_lung_cancer/HEAD && \ + rm -rf /grt123_lung_cancer/.git/* && \ + mv /grt123_lung_cancer/HEAD /grt123_lung_cancer/.git + +# Clone MHub model (m-grt123-lung-cancer branch, fixed to commit TODO) +#RUN git init \ +# && git sparse-checkout set "models/grt123_lung_cancer" \ +# && git fetch https://github.com/MHubAI/models.git m-grt123-lung-cancer \ +# && git merge TODO + +# Add lobe segmentation code base to python path +ENV PYTHONPATH="/grt123_lung_cancer:/app" + +# Default run script +CMD ["python3", "/app/models/grt123_lung_cancer/scripts/run.py"] diff --git a/models/grt123_lung_cancer/dockerfiles/nocuda/Dockerfile b/models/grt123_lung_cancer/dockerfiles/nocuda/Dockerfile new file mode 100644 index 00000000..7a9d968d --- /dev/null +++ b/models/grt123_lung_cancer/dockerfiles/nocuda/Dockerfile @@ -0,0 +1,55 @@ +# syntax=docker/dockerfile:experimental + +# Specify the base image for the environment +FROM mhubai/base:nocuda + +# Specify/override authors label +LABEL authors="sil.vandeleemput@radboudumc.nl" + +# Install panimg to test conversion integration TODO should later be installed with MHub/mhubio +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3-openslide \ + && rm -rf /var/lib/apt/lists/* +RUN pip3 install panimg + +# install required dependencies for grt123 algorithm (CPU-only) +RUN pip3 install --no-cache-dir \ + torch===2.0.1+cpu -f https://download.pytorch.org/whl/torch_stable.html + + +# TODO remove later ==== Temporariy SSH fix as long as repo is private ======= +RUN apt-get update && apt-get install -y --no-install-recommends \ + openssh-client \ + && rm -rf /var/lib/apt/lists/* +# Add github public key to known_hosts for SSH +RUN mkdir -p -m 0600 ~/.ssh && ssh-keyscan github.com >> ~/.ssh/known_hosts +# TODO remove later =============================== + + +# TODO make public and remove ssh extras... +# Install grt123 algorithm and model weights +# - We use a shallow git clone for reduced bandwidth usage +# - We remove unnecessary files for a compacter docker layer +# - Subsequently we remove the .git directory to procuce a compacter docker layer, but keep the latest commit hash in the HEAD file +RUN --mount=type=ssh git clone --branch 44-port-to-python3 --depth 1 git@github.com:DIAGNijmegen/bodyct-dsb2017-grt123.git /grt123_lung_cancer && \ + rm -rf /grt123_lung_cancer/tests && \ + rm -rf /grt123_lung_cancer/training && \ + rm -rf /grt123_lung_cancer/processor && \ + rm -rf /grt123_lung_cancer/images && \ + rm /grt123_lung_cancer/README.md && \ + rm /grt123_lung_cancer/solution-grt123-team.pdf && \ + echo "$(git log --format="%H" -n 1)" > /grt123_lung_cancer/HEAD && \ + rm -rf /grt123_lung_cancer/.git/* && \ + mv /grt123_lung_cancer/HEAD /grt123_lung_cancer/.git + +# Clone MHub model (m-grt123-lung-cancer branch, fixed to commit TODO) +#RUN git init \ +# && git sparse-checkout set "models/grt123_lung_cancer" \ +# && git fetch https://github.com/MHubAI/models.git m-grt123-lung-cancer \ +# && git merge TODO + +# Add lobe segmentation code base to python path +ENV PYTHONPATH="/grt123_lung_cancer:/app" + +# Default run script +CMD ["python3", "/app/models/grt123_lung_cancer/scripts/run.py"] diff --git a/models/grt123_lung_cancer/scripts/run.py b/models/grt123_lung_cancer/scripts/run.py new file mode 100644 index 00000000..f6294777 --- /dev/null +++ b/models/grt123_lung_cancer/scripts/run.py @@ -0,0 +1,42 @@ +""" +------------------------------------------------------- +MHub / DIAG - Run grt123 Lung Cancer Classifier locally +------------------------------------------------------- + +------------------------------------------------------- +Author: Sil van de Leemput +Email: sil.vandeleemput@radboudumc.nl +------------------------------------------------------- +""" + +import sys +sys.path.append('.') + +from mhubio.core import Config, DataType, FileType +from mhubio.modules.importer.DicomImporter import DicomImporter +from mhubio.modules.organizer.DataOrganizer import DataOrganizer +from models.grt123_lung_cancer.utils.LungCancerClassifierRunner import LungCancerClassifierRunner +from models.grt123_lung_cancer.utils.PanImgConverters import MhaPanImgConverter + +# clean-up +import shutil +shutil.rmtree("/app/data/sorted_data", ignore_errors=True) +shutil.rmtree("/app/tmp", ignore_errors=True) +shutil.rmtree("/app/data/output_data", ignore_errors=True) + +# config +config = Config('/app/models/grt123_lung_cancer/config/config.yml') + +# import (ct:dicom) +DicomImporter(config).execute() + +# convert (ct:dicom -> ct:mha) +MhaPanImgConverter(config).execute() + +# execute model (nnunet) +LungCancerClassifierRunner(config).execute() + +# organize data into output folder +organizer = DataOrganizer(config, set_file_permissions=sys.platform.startswith('linux')) +organizer.setTarget(DataType(FileType.JSON), "/app/data/output_data/[i:sid]/grt123_lung_cancer_findings.json") +organizer.execute() diff --git a/models/grt123_lung_cancer/utils/LungCancerClassifierRunner.py b/models/grt123_lung_cancer/utils/LungCancerClassifierRunner.py new file mode 100644 index 00000000..f4ddba6f --- /dev/null +++ b/models/grt123_lung_cancer/utils/LungCancerClassifierRunner.py @@ -0,0 +1,59 @@ +""" +---------------------------------------------------------- +Mhub / DIAG - Run Module for grt123 Lung Cancer Classifier +---------------------------------------------------------- + +---------------------------------------------------------- +Author: Sil van de Leemput +Email: sil.vandeleemput@radboudumc.nl +---------------------------------------------------------- +""" +import torch.cuda +from mhubio.core import Instance, InstanceData, IO, Module + +from pathlib import Path +import numpy as np +import SimpleITK as sitk + +import torch + +import main + + +@IO.Config('n_preprocessing_workers', int, 6, the="number of preprocessing workers to use for the grt123 lung mask preprocessor") +@IO.Config('tmp_path', str, "/app/tmp", the="the path to write intermediate grt123 files to") +class LungCancerClassifierRunner(Module): + + n_preprocessing_workers: int + tmp_path: str + + @IO.Instance() + @IO.Input('in_data', 'mha:mod=ct', the='input ct scan') + @IO.Output('out_data', 'grt123_lung_cancer_findings.json', 'json:model=grt123LungCancerClassification', 'in_data', the='predicted nodules and lung cancer findings of the lung lobe') + def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData) -> None: + + tmp_path = Path(self.tmp_path) + tmp_output_bbox_dir = tmp_path / "bbox" + tmp_output_prep_dir = tmp_path / "prep" + tmp_output_bbox_dir.mkdir(exist_ok=True, parents=True) + tmp_output_prep_dir.mkdir(exist_ok=True, parents=True) + + n_gpu = 1 if torch.cuda.is_available() else 0 + + # apply grt123 algorithm + results = main.main( + skip_detect=False, + skip_preprocessing=False, + datapath=str(Path(in_data.abspath).parent), + outputdir=str(tmp_path), + output_bbox_dir=str(tmp_output_bbox_dir), + output_prep_dir=str(tmp_output_prep_dir), + n_gpu=n_gpu, + n_worker_preprocessing=self.n_preprocessing_workers, + data_filter=r".*.mha" + ) + + # store classification results + self.v(f"Writing classification results to {out_data.abspath}") + assert len(results) > 0, "LungCancerClassifierRunner - Always expects at least one output report" + results[0].to_file(out_data.abspath) diff --git a/models/grt123_lung_cancer/utils/PanImgConverters.py b/models/grt123_lung_cancer/utils/PanImgConverters.py new file mode 100644 index 00000000..25dd618e --- /dev/null +++ b/models/grt123_lung_cancer/utils/PanImgConverters.py @@ -0,0 +1,122 @@ +""" +------------------------------------------------------------- +MHub - PanImg Conversion Modules Dicom2Mha and WSI-Dicom2Tiff +------------------------------------------------------------- + +------------------------------------------------------------- +Author: Sil van de Leemput +Email: sil.vandeleemput@radboudumc.nl +------------------------------------------------------------- +""" + + +from typing import Optional + +from mhubio.modules.convert.DataConverter import DataConverter +from mhubio.core import Instance, InstanceData, DataType, FileType + +import os +from pathlib import Path +import shutil + +from panimg.exceptions import UnconsumedFilesException +from panimg.image_builders.dicom import image_builder_dicom +from panimg.image_builders.tiff import image_builder_tiff +from panimg.image_builders.metaio_nrrd import image_builder_nrrd + +import SimpleITK + + +class MhaPanImgConverter(DataConverter): + """ + Conversion module. + Convert instance data from dicom or nrrd to mha. + """ + + def convert(self, instance: Instance) -> Optional[InstanceData]: + + # create a converted instance + has_instance_dicom = instance.hasType(DataType(FileType.DICOM)) + has_instance_nrrd = instance.hasType(DataType(FileType.NRRD)) + + assert has_instance_dicom or has_instance_nrrd, f"CONVERT ERROR: required datatype (dicom or nrrd) not available in instance {str(instance)}." + + # select input data, dicom has priority over nrrd + input_data = instance.data.filter(DataType(FileType.DICOM) if has_instance_dicom else DataType(FileType.NRRD)).first() + + # out data + mha_data = InstanceData("image.mha", DataType(FileType.MHA, input_data.type.meta)) + mha_data.instance = instance + + # paths + inp_data_dir = Path(input_data.abspath) + out_mha_file = Path(mha_data.abspath) + + # sanity check + assert(inp_data_dir.is_dir()) + + # DICOM CT to MHA conversion (if the file doesn't exist yet) + if out_mha_file.is_file(): + print("CONVERT ERROR: File already exists: ", out_mha_file) + return None + else: + # run conversion using panimg + input_files = {f for f in inp_data_dir.glob(["*.nrrd", "*.dcm"][has_instance_dicom]) if f.is_file()} + img_builder = image_builder_dicom if has_instance_dicom else image_builder_nrrd + try: + for result in img_builder(files=input_files): + sitk_image = result.image # SimpleITK image + SimpleITK.WriteImage(sitk_image, str(out_mha_file)) + except UnconsumedFilesException as e: + # e.errors is keyed with a Path to a file that could not be consumed, + # with a list of all the errors found with loading it, + # the user can then choose what to do with that information + print("CONVERT ERROR: UnconsumedFilesException during PanImg conversion: ", e.errors) + return None + + return mha_data + + +class TiffPanImgConverter(DataConverter): + """ + Conversion module. + Convert instance data from WSI-dicom to tiff. + """ + + def convert(self, instance: Instance) -> Optional[InstanceData]: + + # create a converted instance + assert instance.hasType(DataType(FileType.DICOM)), f"CONVERT ERROR: required datatype (dicom) not available in instance {str(instance)}." + dicom_data = instance.data.filter(DataType(FileType.DICOM)).first() + + # out data + tiff_data = InstanceData("image.tiff", DataType(FileType.TIFF, dicom_data.type.meta)) + tiff_data.instance = instance + + # paths + inp_dicom_dir = Path(dicom_data.abspath) + out_tiff_file = Path(tiff_data.abspath) + + # sanity check + assert(inp_dicom_dir.is_dir()) + + # WSI-DICOM to TIFF conversion (if the file doesn't exist yet) + if out_tiff_file.is_file(): + print("CONVERT ERROR: File already exists: ", out_tiff_file) + return None + else: + # run conversion using panimg + dcm_input_files = {f for f in inp_dicom_dir.glob("*.dcm") if f.is_file()} + + try: + for result in image_builder_tiff(files=dcm_input_files): + tiff_image = result.file # Path to the tiff file + shutil.move(str(tiff_image), str(out_tiff_file)) + except UnconsumedFilesException as e: + # e.errors is keyed with a Path to a file that could not be consumed, + # with a list of all the errors found with loading it, + # the user can then choose what to do with that information + print("CONVERT ERROR: UnconsumedFilesException during PanImg conversion: ", e.errors) + return None + + return tiff_data diff --git a/models/grt123_lung_cancer/utils/__init__.py b/models/grt123_lung_cancer/utils/__init__.py new file mode 100644 index 00000000..d9f025f9 --- /dev/null +++ b/models/grt123_lung_cancer/utils/__init__.py @@ -0,0 +1 @@ +from .LungCancerClassifierRunner import * \ No newline at end of file From 0d7ec9d726f68b38bdbe67d79e939916a476bcf8 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Tue, 4 Jul 2023 12:50:40 +0200 Subject: [PATCH 002/125] WIP initial pancreas_pdac model commit --- models/nnunet_pancreas_pdac/config/config.yml | 46 ++++++++++++++++ models/nnunet_pancreas_pdac/config/dseg.json | 53 ++++++++++++++++++ models/nnunet_pancreas_pdac/config/slicer.yml | 27 +++++++++ .../dockerfiles/cuda12.0/Dockerfile | 55 +++++++++++++++++++ models/nnunet_pancreas_pdac/scripts/run.py | 50 +++++++++++++++++ 5 files changed, 231 insertions(+) create mode 100644 models/nnunet_pancreas_pdac/config/config.yml create mode 100644 models/nnunet_pancreas_pdac/config/dseg.json create mode 100644 models/nnunet_pancreas_pdac/config/slicer.yml create mode 100644 models/nnunet_pancreas_pdac/dockerfiles/cuda12.0/Dockerfile create mode 100644 models/nnunet_pancreas_pdac/scripts/run.py diff --git a/models/nnunet_pancreas_pdac/config/config.yml b/models/nnunet_pancreas_pdac/config/config.yml new file mode 100644 index 00000000..02bd8319 --- /dev/null +++ b/models/nnunet_pancreas_pdac/config/config.yml @@ -0,0 +1,46 @@ +general: + version: 1.0 + data_base_dir: /app/data + description: base configuration for nnunet pancreas PDAC model + +execute: + - DicomImporter + - NiftiConverter + - NNUnetRunner + - DsegConverter + - DataOrganizer + +modules: + + DicomImporter: + source_dir: input_data + import_dir: sorted_data + sort_data: true + meta: + mod: ct + + NNUnetRunner: + input_data_type: nifti:mod=ct + nnunet_task: Task103_AllStructures + nnunet_model: 3d_fullres + checkpoint: model_final_checkpoint + folds: 0,1,2,3,4 + disable_augmentations: False + disable_patch_overlap: False + export_prob_maps: True + roi: PANCREAS,PANCREAS+NEOPLASM_MALIGNANT_PRIMARY + prob_map_segments: [Background, Pancreas, Pancreatic_cancer] + + DsegConverter: + #source_segs: [nifti:mod=seg] + #json_config_path: /app/models/nnunet_pancreas/config/dseg.json + source_segs: [nifti:mod=seg:roi=*] + model_name: NNUnet Pancreas PDAC + skip_empty_slices: True + + DataOrganizer: + targets: + - nifti:mod=ct-->/app/data/output_data/[i:sid]/image.nii.gz + - nifti:mod=seg-->/app/data/output_data/[i:sid]/pancreas.nii.gz + - dicomseg:mod=seg-->/app/data/output_data/[i:sid]/pancreas.seg.dcm + - nrrd:mod=prob_mask-->/app/data/output_data/[i:sid]/prob_masks/[path] \ No newline at end of file diff --git a/models/nnunet_pancreas_pdac/config/dseg.json b/models/nnunet_pancreas_pdac/config/dseg.json new file mode 100644 index 00000000..31ecb2aa --- /dev/null +++ b/models/nnunet_pancreas_pdac/config/dseg.json @@ -0,0 +1,53 @@ +{ + "ContentCreatorName": "IDC", + "ClinicalTrialSeriesID": "0", + "ClinicalTrialTimePointID": "1", + "SeriesDescription": "Segmentation", + "SeriesNumber": "42", + "InstanceNumber": "1", + "BodyPartExamined": "ABDOMEN", + "segmentAttributes": [ + [ + { + "labelID": 1, + "SegmentDescription": "Pancreas", + "SegmentAlgorithmType": "AUTOMATIC", + "SegmentAlgorithmName": "nnU-Net", + "SegmentedPropertyCategoryCodeSequence": { + "CodeValue": "123037004", + "CodingSchemeDesignator": "SCT", + "CodeMeaning": "Anatomical Structure" + }, + "SegmentedPropertyTypeCodeSequence": { + "CodeValue": "15776009", + "CodingSchemeDesignator": "SCT", + "CodeMeaning": "Pancreas" + }, + "recommendedDisplayRGBValue": [ + 249, + 180, + 111 + ] + }, + { + "labelID": 2, + "SegmentDescription": "Pancreatic Cancer", + "SegmentAlgorithmType": "AUTOMATIC", + "SegmentAlgorithmName": "nnU-Net", + "SegmentedPropertyCategoryCodeSequence": { + "CodeValue": "49755003", + "CodingSchemeDesignator": "SCT", + "CodeMeaning": "Morphologically Altered Structure" + }, + "SegmentedPropertyTypeCodeSequence": { + "CodeValue": "86049000", + "CodingSchemeDesignator": "SCT", + "CodeMeaning": "Neoplasm, Primary" + } + } + ] + ], + "ContentLabel": "SEGMENTATION", + "ContentDescription": "Image segmentation", + "ClinicalTrialCoordinatingCenterName": "dcmqi" + } \ No newline at end of file diff --git a/models/nnunet_pancreas_pdac/config/slicer.yml b/models/nnunet_pancreas_pdac/config/slicer.yml new file mode 100644 index 00000000..e6e84d4e --- /dev/null +++ b/models/nnunet_pancreas_pdac/config/slicer.yml @@ -0,0 +1,27 @@ +general: + version: 1.0 + data_base_dir: /app/data + description: 3D Slicer configuration for nnuner pancreas model + +execute: + - NrrdImporter + - NiftiConverter + - NNUnetRunner + - DataOrganizer + +modules: + + NrrdImporter: + input_dir: input_data + input_file_name: image.nrrd + + NNUnetRunner: + input_data_type: nifti:mod=ct + nnunet_task: Task007_Pancreas + nnunet_model: 3d_lowres + export_prob_maps: False + roi: pancreas + + DataOrganizer: + targets: + - nifti:mod=seg-->/app/data/output_data/[d:roi].nii.gz \ No newline at end of file diff --git a/models/nnunet_pancreas_pdac/dockerfiles/cuda12.0/Dockerfile b/models/nnunet_pancreas_pdac/dockerfiles/cuda12.0/Dockerfile new file mode 100644 index 00000000..fe2f9138 --- /dev/null +++ b/models/nnunet_pancreas_pdac/dockerfiles/cuda12.0/Dockerfile @@ -0,0 +1,55 @@ +# syntax=docker/dockerfile:experimental + +# Specify the base image for the environment +FROM mhubai/base:cuda12.0 + +# Specify/override authors label +LABEL authors="sil.vandeleemput@radboudumc.nl" + +# Install panimg to test conversion integration TODO should later be installed with MHub/mhubio +#RUN apt-get update && apt-get install -y --no-install-recommends \ +# python3-openslide \ +# && rm -rf /var/lib/apt/lists/* +#RUN pip3 install panimg + +# Clone MHub model (m-nnunet-pancreas branch, fixed to commit 407f1f884f09898bef9a9173e6434d681a50d399) # TODO +#RUN git init \ +# && git sparse-checkout set "models/nnunet_pancreas" \ +# && git fetch https://github.com/MHubAI/models.git m-nnunet-pancreas \ +# && git merge 407f1f884f09898bef9a9173e6434d681a50d399 + + +# Install git-lfs (required for downloading the model weights) +RUN apt update && apt install -y --no-install-recommends \ + git-lfs \ + && rm -rf /var/lib/apt/lists/* + +# TODO remove later ==== Temporariy SSH fix as long as repo is private ======= +RUN apt-get update && apt-get install -y --no-install-recommends \ + openssh-client \ + && rm -rf /var/lib/apt/lists/* +# Add github public key to known_hosts for SSH +RUN mkdir -p -m 0600 ~/.ssh && ssh-keyscan github.com >> ~/.ssh/known_hosts +# TODO remove later =============================== + + +RUN --mount=type=ssh git clone --depth 1 git@github.com:DIAGNijmegen/CE-CT_PDAC_AutomaticDetection_nnUnet.git /nnunet_pancreas_pdac && \ + cd /nnunet_pancreas_pdac && \ + git reset --hard 117bb4ebf8bc9e90509a468a5d56e0515987b5a7 && \ + rm -rf /nnunet_pancreas_pdac/.git + +# FIXME: set this environment variable as a shortcut to avoid nnunet crashing the build +# by pulling sklearn instead of scikit-learn +# N.B. this is a known issue: +# https://github.com/MIC-DKFZ/nnUNet/issues/1281 +# https://github.com/MIC-DKFZ/nnUNet/pull/1209 +ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True + +# Install nnunet and other requirements +RUN pip3 install --no-cache-dir -r /nnunet_pancreas_pdac/requirements.txt + +# specify nnunet specific environment variables +ENV WEIGHTS_FOLDER=/nnunet_pancreas_pdac/nnunet/results/nnUNet + +# Default run script +CMD ["python3", "-m", "mhubio.run", "--config", "/app/models/nnunet_pancreas_pdac/config/config.yml"] \ No newline at end of file diff --git a/models/nnunet_pancreas_pdac/scripts/run.py b/models/nnunet_pancreas_pdac/scripts/run.py new file mode 100644 index 00000000..3491f58b --- /dev/null +++ b/models/nnunet_pancreas_pdac/scripts/run.py @@ -0,0 +1,50 @@ +""" +------------------------------------------------- +MHub - run the NNUnet pancreas segmentation + pipeline +------------------------------------------------- + +------------------------------------------------- +Author: Sil van de Leemput +Email: sil.vandeleemput@radboudumc.nl +------------------------------------------------- +""" + +import sys +sys.path.append('.') + +from mhubio.core import Config, DataType, FileType, CT, SEG +from mhubio.modules.importer.DicomImporter import DicomImporter +from mhubio.modules.convert.NiftiConverter import NiftiConverter +from mhubio.modules.runner.NNUnetRunner import NNUnetRunner +from mhubio.modules.convert.DsegConverter import DsegConverter +from mhubio.modules.organizer.DataOrganizer import DataOrganizer + +# clean-up +import shutil +shutil.rmtree("/app/data/sorted_data", ignore_errors=True) +shutil.rmtree("/app/tmp", ignore_errors=True) +shutil.rmtree("/app/data/output_data", ignore_errors=True) + +# config +config = Config('/app/models/nnunet_pancreas_pdac/config/config.yml') +config.verbose = True # TODO: define levels of verbosity and integrate consistently. + +# import (ct:dicom) +DicomImporter(config).execute() + +# convert (ct:dicom -> ct:nifti) +NiftiConverter(config).execute() + +# execute model (nnunet) +NNUnetRunner(config).execute() + +# convert (seg:nifti -> seg:dcm) +DsegConverter(config).execute() + +# organize data into output folder +organizer = DataOrganizer(config, set_file_permissions=sys.platform.startswith('linux')) +organizer.setTarget(DataType(FileType.NIFTI, CT), "/app/data/output_data/[i:sid]/image.nii.gz") +organizer.setTarget(DataType(FileType.NIFTI, SEG), "/app/data/output_data/[i:sid]/pancreas.nii.gz") +organizer.setTarget(DataType(FileType.DICOMSEG, SEG), "/app/data/output_data/[i:sid]/pancreas.seg.dcm") +organizer.execute() \ No newline at end of file From e1b4fd04978b7b3fd073b0a86838db42e3a6b058 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Tue, 4 Jul 2023 13:09:06 +0200 Subject: [PATCH 003/125] made Dockerfiles install publicly available v2.0.0 of grt123 --- .../dockerfiles/cuda11.4/Dockerfile | 17 ++--------------- .../dockerfiles/cuda12.0/Dockerfile | 15 +-------------- .../dockerfiles/nocuda/Dockerfile | 15 +-------------- 3 files changed, 4 insertions(+), 43 deletions(-) diff --git a/models/grt123_lung_cancer/dockerfiles/cuda11.4/Dockerfile b/models/grt123_lung_cancer/dockerfiles/cuda11.4/Dockerfile index 036d4118..0b356621 100644 --- a/models/grt123_lung_cancer/dockerfiles/cuda11.4/Dockerfile +++ b/models/grt123_lung_cancer/dockerfiles/cuda11.4/Dockerfile @@ -1,5 +1,3 @@ -# syntax=docker/dockerfile:experimental - # Specify the base image for the environment FROM mhubai/base:cuda11.4 @@ -16,22 +14,11 @@ RUN pip3 install panimg RUN pip3 install --no-cache-dir \ torch==1.10.0+cu113 -f https://download.pytorch.org/whl/torch_stable.html - -# TODO remove later ==== Temporariy SSH fix as long as repo is private ======= -RUN apt-get update && apt-get install -y --no-install-recommends \ - openssh-client \ - && rm -rf /var/lib/apt/lists/* -# Add github public key to known_hosts for SSH -RUN mkdir -p -m 0600 ~/.ssh && ssh-keyscan github.com >> ~/.ssh/known_hosts -# TODO remove later =============================== - - -# TODO make public and remove ssh extras... # Install grt123 algorithm and model weights # - We use a shallow git clone for reduced bandwidth usage # - We remove unnecessary files for a compacter docker layer # - Subsequently we remove the .git directory to procuce a compacter docker layer, but keep the latest commit hash in the HEAD file -RUN --mount=type=ssh git clone --branch 44-port-to-python3 --depth 1 git@github.com:DIAGNijmegen/bodyct-dsb2017-grt123.git /grt123_lung_cancer && \ +RUN git clone --depth 1 --branch v2.0.0 https://github.com/DIAGNijmegen/bodyct-dsb2017-grt123.git /grt123_lung_cancer && \ rm -rf /grt123_lung_cancer/tests && \ rm -rf /grt123_lung_cancer/training && \ rm -rf /grt123_lung_cancer/processor && \ @@ -40,7 +27,7 @@ RUN --mount=type=ssh git clone --branch 44-port-to-python3 --depth 1 git@github. rm /grt123_lung_cancer/solution-grt123-team.pdf && \ echo "$(git log --format="%H" -n 1)" > /grt123_lung_cancer/HEAD && \ rm -rf /grt123_lung_cancer/.git/* && \ - mv /grt123_lung_cancer/HEAD /grt123_lung_cancer/.git + mv /grt123_lung_cancer/HEAD /grt123_lung_cancer/.git \ # Clone MHub model (m-grt123-lung-cancer branch, fixed to commit TODO) #RUN git init \ diff --git a/models/grt123_lung_cancer/dockerfiles/cuda12.0/Dockerfile b/models/grt123_lung_cancer/dockerfiles/cuda12.0/Dockerfile index 84a71111..2e9c8145 100644 --- a/models/grt123_lung_cancer/dockerfiles/cuda12.0/Dockerfile +++ b/models/grt123_lung_cancer/dockerfiles/cuda12.0/Dockerfile @@ -1,5 +1,3 @@ -# syntax=docker/dockerfile:experimental - # Specify the base image for the environment FROM mhubai/base:cuda12.0 @@ -16,22 +14,11 @@ RUN pip3 install panimg RUN pip3 install --no-cache-dir \ torch===2.0.1+cu118 -f https://download.pytorch.org/whl/torch_stable.html - -# TODO remove later ==== Temporariy SSH fix as long as repo is private ======= -RUN apt-get update && apt-get install -y --no-install-recommends \ - openssh-client \ - && rm -rf /var/lib/apt/lists/* -# Add github public key to known_hosts for SSH -RUN mkdir -p -m 0600 ~/.ssh && ssh-keyscan github.com >> ~/.ssh/known_hosts -# TODO remove later =============================== - - -# TODO make public and remove ssh extras... # Install grt123 algorithm and model weights # - We use a shallow git clone for reduced bandwidth usage # - We remove unnecessary files for a compacter docker layer # - Subsequently we remove the .git directory to procuce a compacter docker layer, but keep the latest commit hash in the HEAD file -RUN --mount=type=ssh git clone --branch 44-port-to-python3 --depth 1 git@github.com:DIAGNijmegen/bodyct-dsb2017-grt123.git /grt123_lung_cancer && \ +RUN git clone --depth 1 --branch v2.0.0 https://github.com/DIAGNijmegen/bodyct-dsb2017-grt123.git /grt123_lung_cancer && \ rm -rf /grt123_lung_cancer/tests && \ rm -rf /grt123_lung_cancer/training && \ rm -rf /grt123_lung_cancer/processor && \ diff --git a/models/grt123_lung_cancer/dockerfiles/nocuda/Dockerfile b/models/grt123_lung_cancer/dockerfiles/nocuda/Dockerfile index 7a9d968d..a63c2d61 100644 --- a/models/grt123_lung_cancer/dockerfiles/nocuda/Dockerfile +++ b/models/grt123_lung_cancer/dockerfiles/nocuda/Dockerfile @@ -1,5 +1,3 @@ -# syntax=docker/dockerfile:experimental - # Specify the base image for the environment FROM mhubai/base:nocuda @@ -16,22 +14,11 @@ RUN pip3 install panimg RUN pip3 install --no-cache-dir \ torch===2.0.1+cpu -f https://download.pytorch.org/whl/torch_stable.html - -# TODO remove later ==== Temporariy SSH fix as long as repo is private ======= -RUN apt-get update && apt-get install -y --no-install-recommends \ - openssh-client \ - && rm -rf /var/lib/apt/lists/* -# Add github public key to known_hosts for SSH -RUN mkdir -p -m 0600 ~/.ssh && ssh-keyscan github.com >> ~/.ssh/known_hosts -# TODO remove later =============================== - - -# TODO make public and remove ssh extras... # Install grt123 algorithm and model weights # - We use a shallow git clone for reduced bandwidth usage # - We remove unnecessary files for a compacter docker layer # - Subsequently we remove the .git directory to procuce a compacter docker layer, but keep the latest commit hash in the HEAD file -RUN --mount=type=ssh git clone --branch 44-port-to-python3 --depth 1 git@github.com:DIAGNijmegen/bodyct-dsb2017-grt123.git /grt123_lung_cancer && \ +RUN git clone --depth 1 --branch v2.0.0 https://github.com/DIAGNijmegen/bodyct-dsb2017-grt123.git /grt123_lung_cancer && \ rm -rf /grt123_lung_cancer/tests && \ rm -rf /grt123_lung_cancer/training && \ rm -rf /grt123_lung_cancer/processor && \ From a09b573628842f9a6000fa925f8f98633579ecce Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Tue, 4 Jul 2023 14:37:25 +0200 Subject: [PATCH 004/125] cleanup output JSON report --- .../utils/LungCancerClassifierRunner.py | 26 ++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/models/grt123_lung_cancer/utils/LungCancerClassifierRunner.py b/models/grt123_lung_cancer/utils/LungCancerClassifierRunner.py index f4ddba6f..9e91144c 100644 --- a/models/grt123_lung_cancer/utils/LungCancerClassifierRunner.py +++ b/models/grt123_lung_cancer/utils/LungCancerClassifierRunner.py @@ -11,6 +11,8 @@ import torch.cuda from mhubio.core import Instance, InstanceData, IO, Module +from typing import Dict +import json from pathlib import Path import numpy as np import SimpleITK as sitk @@ -20,6 +22,24 @@ import main +def cleanup_json_report(data: Dict): + for key in ["trainingset1", "trainingset2"]: + del data["lungcad"][key] + for key in ["patientuid", "studyuid"]: + del data["imageinfo"][key] + data["findings"] = [ + dict( + id=f["id"], + x=f["x"], + y=f["y"], + z=f["z"], + probability=f["probability"], + cancerprobability=f["cancerprobability"] + ) + for f in data["findings"] + ] + + @IO.Config('n_preprocessing_workers', int, 6, the="number of preprocessing workers to use for the grt123 lung mask preprocessor") @IO.Config('tmp_path', str, "/app/tmp", the="the path to write intermediate grt123 files to") class LungCancerClassifierRunner(Module): @@ -56,4 +76,8 @@ def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData # store classification results self.v(f"Writing classification results to {out_data.abspath}") assert len(results) > 0, "LungCancerClassifierRunner - Always expects at least one output report" - results[0].to_file(out_data.abspath) + results_json = results[0].to_json() + cleanup_json_report(results_json) + with open(out_data.abspath, "w") as f: + json.dump(results_json, f, indent=4) + From ec5f1463d8f584eb949e9cdfd442740670dd2017 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Tue, 4 Jul 2023 14:47:26 +0200 Subject: [PATCH 005/125] Move git HEAD file in Dockerfiles to retain proper hash content --- models/grt123_lung_cancer/dockerfiles/cuda11.4/Dockerfile | 4 ++-- models/grt123_lung_cancer/dockerfiles/cuda12.0/Dockerfile | 2 +- models/grt123_lung_cancer/dockerfiles/nocuda/Dockerfile | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/models/grt123_lung_cancer/dockerfiles/cuda11.4/Dockerfile b/models/grt123_lung_cancer/dockerfiles/cuda11.4/Dockerfile index 0b356621..b24c6f1d 100644 --- a/models/grt123_lung_cancer/dockerfiles/cuda11.4/Dockerfile +++ b/models/grt123_lung_cancer/dockerfiles/cuda11.4/Dockerfile @@ -25,9 +25,9 @@ RUN git clone --depth 1 --branch v2.0.0 https://github.com/DIAGNijmegen/bodyct-d rm -rf /grt123_lung_cancer/images && \ rm /grt123_lung_cancer/README.md && \ rm /grt123_lung_cancer/solution-grt123-team.pdf && \ - echo "$(git log --format="%H" -n 1)" > /grt123_lung_cancer/HEAD && \ + mv /grt123_lung_cancer/.git/HEAD /grt123_lung_cancer && \ rm -rf /grt123_lung_cancer/.git/* && \ - mv /grt123_lung_cancer/HEAD /grt123_lung_cancer/.git \ + mv /grt123_lung_cancer/HEAD /grt123_lung_cancer/.git # Clone MHub model (m-grt123-lung-cancer branch, fixed to commit TODO) #RUN git init \ diff --git a/models/grt123_lung_cancer/dockerfiles/cuda12.0/Dockerfile b/models/grt123_lung_cancer/dockerfiles/cuda12.0/Dockerfile index 2e9c8145..0a9724e6 100644 --- a/models/grt123_lung_cancer/dockerfiles/cuda12.0/Dockerfile +++ b/models/grt123_lung_cancer/dockerfiles/cuda12.0/Dockerfile @@ -25,7 +25,7 @@ RUN git clone --depth 1 --branch v2.0.0 https://github.com/DIAGNijmegen/bodyct-d rm -rf /grt123_lung_cancer/images && \ rm /grt123_lung_cancer/README.md && \ rm /grt123_lung_cancer/solution-grt123-team.pdf && \ - echo "$(git log --format="%H" -n 1)" > /grt123_lung_cancer/HEAD && \ + mv /grt123_lung_cancer/.git/HEAD /grt123_lung_cancer && \ rm -rf /grt123_lung_cancer/.git/* && \ mv /grt123_lung_cancer/HEAD /grt123_lung_cancer/.git diff --git a/models/grt123_lung_cancer/dockerfiles/nocuda/Dockerfile b/models/grt123_lung_cancer/dockerfiles/nocuda/Dockerfile index a63c2d61..862a1dc7 100644 --- a/models/grt123_lung_cancer/dockerfiles/nocuda/Dockerfile +++ b/models/grt123_lung_cancer/dockerfiles/nocuda/Dockerfile @@ -25,7 +25,7 @@ RUN git clone --depth 1 --branch v2.0.0 https://github.com/DIAGNijmegen/bodyct-d rm -rf /grt123_lung_cancer/images && \ rm /grt123_lung_cancer/README.md && \ rm /grt123_lung_cancer/solution-grt123-team.pdf && \ - echo "$(git log --format="%H" -n 1)" > /grt123_lung_cancer/HEAD && \ + mv /grt123_lung_cancer/.git/HEAD /grt123_lung_cancer && \ rm -rf /grt123_lung_cancer/.git/* && \ mv /grt123_lung_cancer/HEAD /grt123_lung_cancer/.git From 20729a9b65d4a83b0eb7762f0a3144a2ae2403e4 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Tue, 4 Jul 2023 15:17:09 +0200 Subject: [PATCH 006/125] change MHub/DIAG -> MHub/GC in comments --- models/grt123_lung_cancer/scripts/run.py | 10 +++++----- .../utils/LungCancerClassifierRunner.py | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/models/grt123_lung_cancer/scripts/run.py b/models/grt123_lung_cancer/scripts/run.py index f6294777..78971e1a 100644 --- a/models/grt123_lung_cancer/scripts/run.py +++ b/models/grt123_lung_cancer/scripts/run.py @@ -1,12 +1,12 @@ """ -------------------------------------------------------- -MHub / DIAG - Run grt123 Lung Cancer Classifier locally -------------------------------------------------------- +------------------------------------------------------ +MHub / GC - Run grt123 Lung Cancer Classifier locally +----------------------------------------------------- -------------------------------------------------------- +----------------------------------------------------- Author: Sil van de Leemput Email: sil.vandeleemput@radboudumc.nl -------------------------------------------------------- +----------------------------------------------------- """ import sys diff --git a/models/grt123_lung_cancer/utils/LungCancerClassifierRunner.py b/models/grt123_lung_cancer/utils/LungCancerClassifierRunner.py index 9e91144c..80bc2485 100644 --- a/models/grt123_lung_cancer/utils/LungCancerClassifierRunner.py +++ b/models/grt123_lung_cancer/utils/LungCancerClassifierRunner.py @@ -1,12 +1,12 @@ """ ----------------------------------------------------------- -Mhub / DIAG - Run Module for grt123 Lung Cancer Classifier ----------------------------------------------------------- +-------------------------------------------------------- +Mhub / GC - Run Module for grt123 Lung Cancer Classifier +-------------------------------------------------------- ----------------------------------------------------------- +-------------------------------------------------------- Author: Sil van de Leemput Email: sil.vandeleemput@radboudumc.nl ----------------------------------------------------------- +-------------------------------------------------------- """ import torch.cuda from mhubio.core import Instance, InstanceData, IO, Module From af79adb21687cb7968ce8f8f08804c9c0820b936 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Thu, 20 Jul 2023 16:33:22 +0200 Subject: [PATCH 007/125] renamed nnunet_pancreas_pdac -> gc_nnunet_pancreas, addded run script, mhaimporter and specific gcnnunetpancreasrunner * Fixed Dockerfile with nocuda for now using the new base image * Added two utils for this runner (MHAImporter, custom GCnnunetPancreasRunner) * Marked bunch of things as TODO --- models/gc_nnunet_pancreas/__init__.py | 1 + models/gc_nnunet_pancreas/config/config.yml | 33 ++++++++++ .../dockerfiles/nocuda/Dockerfile | 47 ++++++++++++++ models/gc_nnunet_pancreas/scripts/run.py | 54 ++++++++++++++++ .../utils/GCnnUnetPancreasRunner.py | 62 +++++++++++++++++++ .../gc_nnunet_pancreas/utils/MhaImporter.py | 45 ++++++++++++++ models/gc_nnunet_pancreas/utils/__init__.py | 2 + models/nnunet_pancreas_pdac/config/config.yml | 46 -------------- models/nnunet_pancreas_pdac/config/dseg.json | 53 ---------------- models/nnunet_pancreas_pdac/config/slicer.yml | 27 -------- .../dockerfiles/cuda12.0/Dockerfile | 55 ---------------- models/nnunet_pancreas_pdac/scripts/run.py | 50 --------------- 12 files changed, 244 insertions(+), 231 deletions(-) create mode 100644 models/gc_nnunet_pancreas/__init__.py create mode 100644 models/gc_nnunet_pancreas/config/config.yml create mode 100644 models/gc_nnunet_pancreas/dockerfiles/nocuda/Dockerfile create mode 100644 models/gc_nnunet_pancreas/scripts/run.py create mode 100644 models/gc_nnunet_pancreas/utils/GCnnUnetPancreasRunner.py create mode 100644 models/gc_nnunet_pancreas/utils/MhaImporter.py create mode 100644 models/gc_nnunet_pancreas/utils/__init__.py delete mode 100644 models/nnunet_pancreas_pdac/config/config.yml delete mode 100644 models/nnunet_pancreas_pdac/config/dseg.json delete mode 100644 models/nnunet_pancreas_pdac/config/slicer.yml delete mode 100644 models/nnunet_pancreas_pdac/dockerfiles/cuda12.0/Dockerfile delete mode 100644 models/nnunet_pancreas_pdac/scripts/run.py diff --git a/models/gc_nnunet_pancreas/__init__.py b/models/gc_nnunet_pancreas/__init__.py new file mode 100644 index 00000000..16281fe0 --- /dev/null +++ b/models/gc_nnunet_pancreas/__init__.py @@ -0,0 +1 @@ +from .utils import * diff --git a/models/gc_nnunet_pancreas/config/config.yml b/models/gc_nnunet_pancreas/config/config.yml new file mode 100644 index 00000000..3b436dac --- /dev/null +++ b/models/gc_nnunet_pancreas/config/config.yml @@ -0,0 +1,33 @@ +general: + version: 1.0 + data_base_dir: /app/data + description: base configuration for nnunet pancreas PDAC model + +modules: + + DicomImporter: + source_dir: input_data + import_dir: sorted_data + sort_data: true + meta: + mod: ct + + MhaImporter: + source_dir: input_data + import_dir: sorted_data + + GCNNUnetPancreasRunner: + +# TODO configure DsegConverter +# DsegConverter: +# #source_segs: [nifti:mod=seg] +# #json_config_path: /app/models/nnunet_pancreas/config/dseg.json +# source_segs: [mha:mod=seg:roi=*] +# model_name: GC NNUnet Pancreas +# skip_empty_slices: True + + DataOrganizer: + targets: + - mha:mod=heatmap-->/app/data/output_data/[i:sid]/heatmap.mha + - mha:mod=seg-->/app/data/output_data/[i:sid]/pancreas.seg.mha +# - dicomseg:mod=seg-->/app/data/output_data/[i:sid]/pancreas.seg.dcm diff --git a/models/gc_nnunet_pancreas/dockerfiles/nocuda/Dockerfile b/models/gc_nnunet_pancreas/dockerfiles/nocuda/Dockerfile new file mode 100644 index 00000000..d1945b36 --- /dev/null +++ b/models/gc_nnunet_pancreas/dockerfiles/nocuda/Dockerfile @@ -0,0 +1,47 @@ +# Specify the base image for the environment +FROM mhubai/base:latest +# TODO add CUDA support since algorithm takes eons otherwise... + +# Specify/override authors label +LABEL authors="sil.vandeleemput@radboudumc.nl" + +# Clone MHub model (m-gc-nnunet-pancreas branch, fixed to commit 407f1f884f09898bef9a9173e6434d681a50d399) # TODO +#RUN git init \ +# && git sparse-checkout set "models/gc_nnunet_pancreas" \ +# && git fetch https://github.com/MHubAI/models.git m-gc-nnunet-pancreas \ +# && git merge TODO + +# Install git-lfs (required for downloading the model weights) +RUN apt update && apt install -y --no-install-recommends \ + git-lfs \ + && rm -rf /var/lib/apt/lists/* + +# Install the model weights and the algorithm files +# * Pull algorithm from repo into /opt/algorithm for commit e4f4008c6e18e60a79f693448562a340a9252aa8 +# * Remove .git folder to keep docker layer small +# * Replace input images path in process.py with an existing folder to avoid errors +RUN git clone --depth 1 https://github.com/DIAGNijmegen/CE-CT_PDAC_AutomaticDetection_nnUnet.git /opt/algorithm && \ + cd /opt/algorithm && \ + git reset --hard e4f4008c6e18e60a79f693448562a340a9252aa8 && \ + rm -rf /opt/algorithm/.git && \ + sed -i 's/Path("\/input\/images\/")/Path("\/app")/g' /opt/algorithm/process.py + +# FIXME: set this environment variable as a shortcut to avoid nnunet crashing the build +# by pulling sklearn instead of scikit-learn +# N.B. this is a known issue: +# https://github.com/MIC-DKFZ/nnUNet/issues/1281 +# https://github.com/MIC-DKFZ/nnUNet/pull/1209 +ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True + +# Install nnUNet and other requirements +RUN pip3 install --no-cache-dir -r /opt/algorithm/requirements.txt + +# Extend the nnUNet installation with custom trainers +RUN SITE_PKG=`pip3 show nnunet | grep "Location:" | awk '{print $2}'` && \ + mv /opt/algorithm/nnUNetTrainerV2_Loss_CE_checkpoints.py "$SITE_PKG/nnunet/training/network_training/nnUNetTrainerV2_Loss_CE_checkpoints.py" + +# Add algorithm files to python path +ENV PYTHONPATH=/opt/algorithm:/app + +# Default run script +CMD ["python3", "/app/models/gc_nnunet_pancreas/scripts/run.py"] \ No newline at end of file diff --git a/models/gc_nnunet_pancreas/scripts/run.py b/models/gc_nnunet_pancreas/scripts/run.py new file mode 100644 index 00000000..8421566f --- /dev/null +++ b/models/gc_nnunet_pancreas/scripts/run.py @@ -0,0 +1,54 @@ +""" +--------------------------------------------------- +GC / MHub - run the NNUnet GC pancreas segmentation + pipeline +--------------------------------------------------- + +--------------------------------------------------- +Author: Sil van de Leemput +Email: sil.vandeleemput@radboudumc.nl +--------------------------------------------------- +""" + +import sys +sys.path.append('.') + +from mhubio.core import Config, DataType, FileType, CT, SEG, Meta +from mhubio.modules.importer.FileStructureImporter import FileStructureImporter +from mhubio.modules.importer.DicomImporter import DicomImporter +from mhubio.modules.importer.NrrdImporter import NrrdImporter +from mhubio.modules.convert.NiftiConverter import NiftiConverter +from mhubio.modules.runner.NNUnetRunner import NNUnetRunner +from mhubio.modules.convert.DsegConverter import DsegConverter +from mhubio.modules.organizer.DataOrganizer import DataOrganizer +from models.gc_nnunet_pancreas import MhaImporter, GCNNUnetPancreasRunner, HEATMAP + +# clean-up +import shutil +shutil.rmtree("/app/data/sorted_data", ignore_errors=True) +shutil.rmtree("/app/tmp", ignore_errors=True) +shutil.rmtree("/app/data/output_data", ignore_errors=True) + +# config +config = Config('/app/models/gc_nnunet_pancreas/config/config.yml') +config.verbose = True # TODO: define levels of verbosity and integrate consistently. + +# import (ct:dicom) +#DicomImporter(config).execute() + +# import (ct:mha) +MhaImporter(config).execute() +#FileStructureImporter(config).execute() + +# execute model (nnunet ct:mha -> (hm:mha, seg:mha)) +GCNNUnetPancreasRunner(config).execute() + +# convert (seg:nifti -> seg:dcm) +# DsegConverter(config).execute() + +# organize data into output folder +organizer = DataOrganizer(config, set_file_permissions=sys.platform.startswith('linux')) +organizer.setTarget(DataType(FileType.MHA, HEATMAP), "/app/data/output_data/[i:sid]/heatmap.mha") +organizer.setTarget(DataType(FileType.MHA, SEG), "/app/data/output_data/[i:sid]/pancreas.seg.mha") +#organizer.setTarget(DataType(FileType.DICOMSEG, SEG), "/app/data/output_data/[i:sid]/pancreas.seg.dcm") +organizer.execute() \ No newline at end of file diff --git a/models/gc_nnunet_pancreas/utils/GCnnUnetPancreasRunner.py b/models/gc_nnunet_pancreas/utils/GCnnUnetPancreasRunner.py new file mode 100644 index 00000000..b63badbd --- /dev/null +++ b/models/gc_nnunet_pancreas/utils/GCnnUnetPancreasRunner.py @@ -0,0 +1,62 @@ +""" +----------------------------------------------------------- +GC / MHub - Run Module for the GC NNUnet Pancreas Algorithm +----------------------------------------------------------- + +----------------------------------------------------------- +Author: Sil van de Leemput +Email: sil.vandeleemput@radboudumc.nl +----------------------------------------------------------- +""" + +from mhubio.core import Module, Instance, InstanceData, DataType, FileType, CT, SEG, IO, Meta +import os, subprocess, shutil + +from pathlib import Path + +from process import PDACDetectionContainer + +# TODO should move to MHubio/core/templates.py +HEATMAP = Meta(mod="heatmap") + +# @IO.Config('output_dir', str, "/app/tmp/gc_nnunet_pancreas/", the='directory to output the segmentation and the heatmap') +class GCNNUnetPancreasRunner(Module): + + # output_dir: str + + @IO.Instance() + @IO.Input('in_data', 'mha:mod=ct', the="input data") + @IO.Output('heatmap', 'heatmap.mha', 'mha:mod=heatmap:model=GCNNUnetPancreas', data="in_data", + the="heatmap of the pancreatic tumor likelihood") + @IO.Output('segmentation', 'segmentation.mha', 'mha:mod=seg:model=GCNNUnetPancreas', data="in_data", + the="segmentation of the pancreas, with the following classes: 1-veins, 2-arteries, 3-pancreas, 4-pancreatic duct, 5-bile duct, 6-cysts, 7-renal vein") + # @IO.Output('vei', 'Veins.mha', 'mha:mod=seg:model=GCNNUnetPancreas:roi=HEART', bundle='gc_nnunet_pancreas', in_signature=False, + # the="segmentation of the veins") + # @IO.Output('art', 'Arteries.mha', 'mha:mod=seg:model=GCNNUnetPancreas:roi=PULMONARY_ARTERY', bundle='gc_nnunet_pancreas', in_signature=False, + # the="segmentation of the arteries") + # @IO.Output('pan', 'Pancreas.mha', 'mha:mod=seg:model=GCNNUnetPancreas:roi=AORTA', bundle='gc_nnunet_pancreas', in_signature=False, + # the="segmentation of the pancreas") + # @IO.Output('pdc', 'PDC.mha', 'mha:mod=seg:model=GCNNUnetPancreas:roi=HEART', bundle='gc_nnunet_pancreas', in_signature=False, + # the="segmentation of the pancreatic duct") + # @IO.Output('bdt', 'BileDuct.mha', 'mha:mod=seg:model=GCNNUnetPancreas:roi=PULMONARY_ARTERY', bundle='gc_nnunet_pancreas', in_signature=False, + # the="segmentation of the bile duct") + # @IO.Output('cys', 'Cysts.mha', 'mha:mod=seg:model=GCNNUnetPancreas:roi=AORTA', bundle='gc_nnunet_pancreas', in_signature=False, + # the="segmentation of cysts") + # @IO.Output('rve', 'RenalVein.mha', 'mha:mod=seg:model=GCNNUnetPancreas:roi=AORTA', bundle='gc_nnunet_pancreas', in_signature=False, + # the="segmentation of the renal vein") + def task(self, instance: Instance, in_data: InstanceData, heatmap: InstanceData, segmentation: InstanceData, **kwargs) -> None: + algorithm = PDACDetectionContainer() + #algorithm.ct_ip_dir = Path("/input/images/") + algorithm.ct_image = in_data.abspath # set as str not Path + #algorithm.output_dir = Path(self.output_dir) + #algorithm.output_dir_tlm = algorithm.output_dir / "pancreatic-tumor-likelihood-map" + #algorithm.output_dir_seg = algorithm.output_dir / "pancreas-anatomy-and-vessel-segmentation" + algorithm.heatmap = Path(heatmap.abspath) # algorithm.output_dir_tlm / "heatmap.mha" + algorithm.segmentation = Path(segmentation.abspath) #algorithm.output_dir_seg / "segmentation.mha" + #algorithm.output_dir.mkdir(exist_ok=True, parents=True) + #algorithm.output_dir_tlm.mkdir(exist_ok=True, parents=True) + #algorithm.output_dir_seg.mkdir(exist_ok=True, parents=True) + self.v(in_data.abspath) + self.v(heatmap.abspath) + self.v(segmentation.abspath) + algorithm.process() diff --git a/models/gc_nnunet_pancreas/utils/MhaImporter.py b/models/gc_nnunet_pancreas/utils/MhaImporter.py new file mode 100644 index 00000000..f2ca93b4 --- /dev/null +++ b/models/gc_nnunet_pancreas/utils/MhaImporter.py @@ -0,0 +1,45 @@ +""" +-------------------------------------- +MHub / GC - MHA importer +-------------------------------------- + +-------------------------------------- +Author: Sil van de Leemput +Email: sil.vandeleemput@radboudumc.nl +-------------------------------------- +""" +import os +from typing import Optional +from pathlib import Path + +from mhubio.modules.importer.DataImporter import IDEF, DataImporter, FileType +from mhubio.core import Meta, DirectoryChain, CT + + +class MhaImporter(DataImporter): + def task(self) -> None: + source_dir = self.c['source_dir'] + source_dc = DirectoryChain(path=source_dir, parent=self.config.data.dc) + # input tiff file directory + input_dir = source_dc.abspath + self.v(f"{input_dir}") + + # add input tiff files as WSI images... + self.setBasePath(input_dir) + for input_tiff_file in Path(input_dir).glob("*.mha"): + self.v(f"{input_tiff_file}") + self.addMhaCT(str(input_tiff_file), ref=input_tiff_file.stem) + + # let the base module take over from here + super().task() + + def addMhaCT(self, path: str, ref: Optional[str] = None) -> None: + _path = self._resolvePath(path, ref) + self.v("adding CT in mha format with resolved path: ", _path) + assert os.path.isfile(_path) and _path.endswith('.mha'), f"Expect existing mha file, '{_path}' was given instead." + self._import_paths.append(IDEF( + ref = ref, + path = path, + ftype = FileType.MHA, + meta = CT + )) diff --git a/models/gc_nnunet_pancreas/utils/__init__.py b/models/gc_nnunet_pancreas/utils/__init__.py new file mode 100644 index 00000000..c35f6fef --- /dev/null +++ b/models/gc_nnunet_pancreas/utils/__init__.py @@ -0,0 +1,2 @@ +from .MhaImporter import * +from .GCnnUnetPancreasRunner import * \ No newline at end of file diff --git a/models/nnunet_pancreas_pdac/config/config.yml b/models/nnunet_pancreas_pdac/config/config.yml deleted file mode 100644 index 02bd8319..00000000 --- a/models/nnunet_pancreas_pdac/config/config.yml +++ /dev/null @@ -1,46 +0,0 @@ -general: - version: 1.0 - data_base_dir: /app/data - description: base configuration for nnunet pancreas PDAC model - -execute: - - DicomImporter - - NiftiConverter - - NNUnetRunner - - DsegConverter - - DataOrganizer - -modules: - - DicomImporter: - source_dir: input_data - import_dir: sorted_data - sort_data: true - meta: - mod: ct - - NNUnetRunner: - input_data_type: nifti:mod=ct - nnunet_task: Task103_AllStructures - nnunet_model: 3d_fullres - checkpoint: model_final_checkpoint - folds: 0,1,2,3,4 - disable_augmentations: False - disable_patch_overlap: False - export_prob_maps: True - roi: PANCREAS,PANCREAS+NEOPLASM_MALIGNANT_PRIMARY - prob_map_segments: [Background, Pancreas, Pancreatic_cancer] - - DsegConverter: - #source_segs: [nifti:mod=seg] - #json_config_path: /app/models/nnunet_pancreas/config/dseg.json - source_segs: [nifti:mod=seg:roi=*] - model_name: NNUnet Pancreas PDAC - skip_empty_slices: True - - DataOrganizer: - targets: - - nifti:mod=ct-->/app/data/output_data/[i:sid]/image.nii.gz - - nifti:mod=seg-->/app/data/output_data/[i:sid]/pancreas.nii.gz - - dicomseg:mod=seg-->/app/data/output_data/[i:sid]/pancreas.seg.dcm - - nrrd:mod=prob_mask-->/app/data/output_data/[i:sid]/prob_masks/[path] \ No newline at end of file diff --git a/models/nnunet_pancreas_pdac/config/dseg.json b/models/nnunet_pancreas_pdac/config/dseg.json deleted file mode 100644 index 31ecb2aa..00000000 --- a/models/nnunet_pancreas_pdac/config/dseg.json +++ /dev/null @@ -1,53 +0,0 @@ -{ - "ContentCreatorName": "IDC", - "ClinicalTrialSeriesID": "0", - "ClinicalTrialTimePointID": "1", - "SeriesDescription": "Segmentation", - "SeriesNumber": "42", - "InstanceNumber": "1", - "BodyPartExamined": "ABDOMEN", - "segmentAttributes": [ - [ - { - "labelID": 1, - "SegmentDescription": "Pancreas", - "SegmentAlgorithmType": "AUTOMATIC", - "SegmentAlgorithmName": "nnU-Net", - "SegmentedPropertyCategoryCodeSequence": { - "CodeValue": "123037004", - "CodingSchemeDesignator": "SCT", - "CodeMeaning": "Anatomical Structure" - }, - "SegmentedPropertyTypeCodeSequence": { - "CodeValue": "15776009", - "CodingSchemeDesignator": "SCT", - "CodeMeaning": "Pancreas" - }, - "recommendedDisplayRGBValue": [ - 249, - 180, - 111 - ] - }, - { - "labelID": 2, - "SegmentDescription": "Pancreatic Cancer", - "SegmentAlgorithmType": "AUTOMATIC", - "SegmentAlgorithmName": "nnU-Net", - "SegmentedPropertyCategoryCodeSequence": { - "CodeValue": "49755003", - "CodingSchemeDesignator": "SCT", - "CodeMeaning": "Morphologically Altered Structure" - }, - "SegmentedPropertyTypeCodeSequence": { - "CodeValue": "86049000", - "CodingSchemeDesignator": "SCT", - "CodeMeaning": "Neoplasm, Primary" - } - } - ] - ], - "ContentLabel": "SEGMENTATION", - "ContentDescription": "Image segmentation", - "ClinicalTrialCoordinatingCenterName": "dcmqi" - } \ No newline at end of file diff --git a/models/nnunet_pancreas_pdac/config/slicer.yml b/models/nnunet_pancreas_pdac/config/slicer.yml deleted file mode 100644 index e6e84d4e..00000000 --- a/models/nnunet_pancreas_pdac/config/slicer.yml +++ /dev/null @@ -1,27 +0,0 @@ -general: - version: 1.0 - data_base_dir: /app/data - description: 3D Slicer configuration for nnuner pancreas model - -execute: - - NrrdImporter - - NiftiConverter - - NNUnetRunner - - DataOrganizer - -modules: - - NrrdImporter: - input_dir: input_data - input_file_name: image.nrrd - - NNUnetRunner: - input_data_type: nifti:mod=ct - nnunet_task: Task007_Pancreas - nnunet_model: 3d_lowres - export_prob_maps: False - roi: pancreas - - DataOrganizer: - targets: - - nifti:mod=seg-->/app/data/output_data/[d:roi].nii.gz \ No newline at end of file diff --git a/models/nnunet_pancreas_pdac/dockerfiles/cuda12.0/Dockerfile b/models/nnunet_pancreas_pdac/dockerfiles/cuda12.0/Dockerfile deleted file mode 100644 index fe2f9138..00000000 --- a/models/nnunet_pancreas_pdac/dockerfiles/cuda12.0/Dockerfile +++ /dev/null @@ -1,55 +0,0 @@ -# syntax=docker/dockerfile:experimental - -# Specify the base image for the environment -FROM mhubai/base:cuda12.0 - -# Specify/override authors label -LABEL authors="sil.vandeleemput@radboudumc.nl" - -# Install panimg to test conversion integration TODO should later be installed with MHub/mhubio -#RUN apt-get update && apt-get install -y --no-install-recommends \ -# python3-openslide \ -# && rm -rf /var/lib/apt/lists/* -#RUN pip3 install panimg - -# Clone MHub model (m-nnunet-pancreas branch, fixed to commit 407f1f884f09898bef9a9173e6434d681a50d399) # TODO -#RUN git init \ -# && git sparse-checkout set "models/nnunet_pancreas" \ -# && git fetch https://github.com/MHubAI/models.git m-nnunet-pancreas \ -# && git merge 407f1f884f09898bef9a9173e6434d681a50d399 - - -# Install git-lfs (required for downloading the model weights) -RUN apt update && apt install -y --no-install-recommends \ - git-lfs \ - && rm -rf /var/lib/apt/lists/* - -# TODO remove later ==== Temporariy SSH fix as long as repo is private ======= -RUN apt-get update && apt-get install -y --no-install-recommends \ - openssh-client \ - && rm -rf /var/lib/apt/lists/* -# Add github public key to known_hosts for SSH -RUN mkdir -p -m 0600 ~/.ssh && ssh-keyscan github.com >> ~/.ssh/known_hosts -# TODO remove later =============================== - - -RUN --mount=type=ssh git clone --depth 1 git@github.com:DIAGNijmegen/CE-CT_PDAC_AutomaticDetection_nnUnet.git /nnunet_pancreas_pdac && \ - cd /nnunet_pancreas_pdac && \ - git reset --hard 117bb4ebf8bc9e90509a468a5d56e0515987b5a7 && \ - rm -rf /nnunet_pancreas_pdac/.git - -# FIXME: set this environment variable as a shortcut to avoid nnunet crashing the build -# by pulling sklearn instead of scikit-learn -# N.B. this is a known issue: -# https://github.com/MIC-DKFZ/nnUNet/issues/1281 -# https://github.com/MIC-DKFZ/nnUNet/pull/1209 -ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True - -# Install nnunet and other requirements -RUN pip3 install --no-cache-dir -r /nnunet_pancreas_pdac/requirements.txt - -# specify nnunet specific environment variables -ENV WEIGHTS_FOLDER=/nnunet_pancreas_pdac/nnunet/results/nnUNet - -# Default run script -CMD ["python3", "-m", "mhubio.run", "--config", "/app/models/nnunet_pancreas_pdac/config/config.yml"] \ No newline at end of file diff --git a/models/nnunet_pancreas_pdac/scripts/run.py b/models/nnunet_pancreas_pdac/scripts/run.py deleted file mode 100644 index 3491f58b..00000000 --- a/models/nnunet_pancreas_pdac/scripts/run.py +++ /dev/null @@ -1,50 +0,0 @@ -""" -------------------------------------------------- -MHub - run the NNUnet pancreas segmentation - pipeline -------------------------------------------------- - -------------------------------------------------- -Author: Sil van de Leemput -Email: sil.vandeleemput@radboudumc.nl -------------------------------------------------- -""" - -import sys -sys.path.append('.') - -from mhubio.core import Config, DataType, FileType, CT, SEG -from mhubio.modules.importer.DicomImporter import DicomImporter -from mhubio.modules.convert.NiftiConverter import NiftiConverter -from mhubio.modules.runner.NNUnetRunner import NNUnetRunner -from mhubio.modules.convert.DsegConverter import DsegConverter -from mhubio.modules.organizer.DataOrganizer import DataOrganizer - -# clean-up -import shutil -shutil.rmtree("/app/data/sorted_data", ignore_errors=True) -shutil.rmtree("/app/tmp", ignore_errors=True) -shutil.rmtree("/app/data/output_data", ignore_errors=True) - -# config -config = Config('/app/models/nnunet_pancreas_pdac/config/config.yml') -config.verbose = True # TODO: define levels of verbosity and integrate consistently. - -# import (ct:dicom) -DicomImporter(config).execute() - -# convert (ct:dicom -> ct:nifti) -NiftiConverter(config).execute() - -# execute model (nnunet) -NNUnetRunner(config).execute() - -# convert (seg:nifti -> seg:dcm) -DsegConverter(config).execute() - -# organize data into output folder -organizer = DataOrganizer(config, set_file_permissions=sys.platform.startswith('linux')) -organizer.setTarget(DataType(FileType.NIFTI, CT), "/app/data/output_data/[i:sid]/image.nii.gz") -organizer.setTarget(DataType(FileType.NIFTI, SEG), "/app/data/output_data/[i:sid]/pancreas.nii.gz") -organizer.setTarget(DataType(FileType.DICOMSEG, SEG), "/app/data/output_data/[i:sid]/pancreas.seg.dcm") -organizer.execute() \ No newline at end of file From 2942e373e3270f6be2cecfb49fd0e7a1806b81ca Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Tue, 1 Aug 2023 13:43:51 +0200 Subject: [PATCH 008/125] Updated for new base image (single Dockerfile), updated config.yml, run.py, and others... --- .../__init__.py | 0 .../gc_grt123_lung_cancer/config/config.yml | 26 ++++++++++++ .../dockerfiles/Dockerfile | 36 ++++++++++++++++ .../scripts/run.py | 10 ++--- .../utils/LungCancerClassifierRunner.py | 7 +++- .../utils/PanImgConverters.py | 8 ++-- .../utils/__init__.py | 0 models/grt123_lung_cancer/config/config.yml | 14 ------- .../dockerfiles/cuda11.4/Dockerfile | 42 ------------------- .../dockerfiles/cuda12.0/Dockerfile | 42 ------------------- .../dockerfiles/nocuda/Dockerfile | 42 ------------------- 11 files changed, 76 insertions(+), 151 deletions(-) rename models/{grt123_lung_cancer => gc_grt123_lung_cancer}/__init__.py (100%) create mode 100644 models/gc_grt123_lung_cancer/config/config.yml create mode 100644 models/gc_grt123_lung_cancer/dockerfiles/Dockerfile rename models/{grt123_lung_cancer => gc_grt123_lung_cancer}/scripts/run.py (67%) rename models/{grt123_lung_cancer => gc_grt123_lung_cancer}/utils/LungCancerClassifierRunner.py (93%) rename models/{grt123_lung_cancer => gc_grt123_lung_cancer}/utils/PanImgConverters.py (94%) rename models/{grt123_lung_cancer => gc_grt123_lung_cancer}/utils/__init__.py (100%) delete mode 100644 models/grt123_lung_cancer/config/config.yml delete mode 100644 models/grt123_lung_cancer/dockerfiles/cuda11.4/Dockerfile delete mode 100644 models/grt123_lung_cancer/dockerfiles/cuda12.0/Dockerfile delete mode 100644 models/grt123_lung_cancer/dockerfiles/nocuda/Dockerfile diff --git a/models/grt123_lung_cancer/__init__.py b/models/gc_grt123_lung_cancer/__init__.py similarity index 100% rename from models/grt123_lung_cancer/__init__.py rename to models/gc_grt123_lung_cancer/__init__.py diff --git a/models/gc_grt123_lung_cancer/config/config.yml b/models/gc_grt123_lung_cancer/config/config.yml new file mode 100644 index 00000000..c6947ac6 --- /dev/null +++ b/models/gc_grt123_lung_cancer/config/config.yml @@ -0,0 +1,26 @@ +general: + data_base_dir: /app/data + +execute: + - DicomImporter + - MhaConverter + - LungCancerClassifierRunner + - DataOrganizer + +modules: + DicomImporter: + source_dir: input_data + import_dir: sorted_data + sort_data: True + meta: + mod: ct + + LungCancerClassifierRunner: + tmp_path: /app/tmp + n_preprocessing_workers: 8 + + DataOrganizer: + target_dir: output_data + require_data_confirmation: true + targets: + - json-->[i:sid]/gc_grt123_lung_cancer_findings.json diff --git a/models/gc_grt123_lung_cancer/dockerfiles/Dockerfile b/models/gc_grt123_lung_cancer/dockerfiles/Dockerfile new file mode 100644 index 00000000..03ed18e8 --- /dev/null +++ b/models/gc_grt123_lung_cancer/dockerfiles/Dockerfile @@ -0,0 +1,36 @@ +# Specify the base image for the environment +FROM mhubai/base:latest + +# Specify/override authors label +LABEL authors="sil.vandeleemput@radboudumc.nl" + +# install required dependencies for grt123 algorithm including GPU support +RUN pip3 install --no-cache-dir \ + torch===2.0.1+cu118 -f https://download.pytorch.org/whl/torch_stable.html + +# Install grt123 algorithm and model weights +# - We use a shallow git clone for reduced bandwidth usage +# - We remove unnecessary files for a compacter docker layer +# - Subsequently we remove the .git directory to procuce a compacter docker layer, but keep the latest commit hash in the HEAD file +RUN git clone --depth 1 --branch v2.0.0 https://github.com/DIAGNijmegen/bodyct-dsb2017-grt123.git /gc_grt123_lung_cancer && \ + rm -rf /gc_grt123_lung_cancer/tests && \ + rm -rf /gc_grt123_lung_cancer/training && \ + rm -rf /gc_grt123_lung_cancer/processor && \ + rm -rf /gc_grt123_lung_cancer/images && \ + rm /gc_grt123_lung_cancer/README.md && \ + rm /gc_grt123_lung_cancer/solution-grt123-team.pdf && \ + mv /gc_grt123_lung_cancer/.git/HEAD /gc_grt123_lung_cancer && \ + rm -rf /gc_grt123_lung_cancer/.git/* && \ + mv /gc_grt123_lung_cancer/HEAD /gc_grt123_lung_cancer/.git + +# Clone MHub model (m-gc-grt123-lung-cancer branch, fixed to commit TODO) +#RUN git init \ +# && git sparse-checkout set "models/grt123_lung_cancer" \ +# && git fetch https://github.com/MHubAI/models.git m-gc-grt123-lung-cancer \ +# && git merge TODO + +# Add lobe segmentation code base to python path +ENV PYTHONPATH="/gc_grt123_lung_cancer:/app" + +# Default run script # TODO should be direct call to config.yml waiting for MhaConverter with panimg backend +CMD ["python3", "/app/models/gc_grt123_lung_cancer/scripts/run.py"] diff --git a/models/grt123_lung_cancer/scripts/run.py b/models/gc_grt123_lung_cancer/scripts/run.py similarity index 67% rename from models/grt123_lung_cancer/scripts/run.py rename to models/gc_grt123_lung_cancer/scripts/run.py index 78971e1a..e53880cd 100644 --- a/models/grt123_lung_cancer/scripts/run.py +++ b/models/gc_grt123_lung_cancer/scripts/run.py @@ -15,8 +15,8 @@ from mhubio.core import Config, DataType, FileType from mhubio.modules.importer.DicomImporter import DicomImporter from mhubio.modules.organizer.DataOrganizer import DataOrganizer -from models.grt123_lung_cancer.utils.LungCancerClassifierRunner import LungCancerClassifierRunner -from models.grt123_lung_cancer.utils.PanImgConverters import MhaPanImgConverter +from models.gc_grt123_lung_cancer.utils.LungCancerClassifierRunner import LungCancerClassifierRunner +from models.gc_grt123_lung_cancer.utils.PanImgConverters import MhaPanImgConverter # clean-up import shutil @@ -25,7 +25,7 @@ shutil.rmtree("/app/data/output_data", ignore_errors=True) # config -config = Config('/app/models/grt123_lung_cancer/config/config.yml') +config = Config('/app/models/gc_grt123_lung_cancer/config/config.yml') # import (ct:dicom) DicomImporter(config).execute() @@ -37,6 +37,4 @@ LungCancerClassifierRunner(config).execute() # organize data into output folder -organizer = DataOrganizer(config, set_file_permissions=sys.platform.startswith('linux')) -organizer.setTarget(DataType(FileType.JSON), "/app/data/output_data/[i:sid]/grt123_lung_cancer_findings.json") -organizer.execute() +DataOrganizer(config, set_file_permissions=sys.platform.startswith('linux')).execute() diff --git a/models/grt123_lung_cancer/utils/LungCancerClassifierRunner.py b/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py similarity index 93% rename from models/grt123_lung_cancer/utils/LungCancerClassifierRunner.py rename to models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py index 80bc2485..d1b6643c 100644 --- a/models/grt123_lung_cancer/utils/LungCancerClassifierRunner.py +++ b/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py @@ -58,7 +58,12 @@ def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData tmp_output_bbox_dir.mkdir(exist_ok=True, parents=True) tmp_output_prep_dir.mkdir(exist_ok=True, parents=True) - n_gpu = 1 if torch.cuda.is_available() else 0 + if torch.cuda.is_available(): + self.v("Running with a GPU") + n_gpu = 1 + else: + self.v("Running on the CPU, might be slow...") + n_gpu = 0 # apply grt123 algorithm results = main.main( diff --git a/models/grt123_lung_cancer/utils/PanImgConverters.py b/models/gc_grt123_lung_cancer/utils/PanImgConverters.py similarity index 94% rename from models/grt123_lung_cancer/utils/PanImgConverters.py rename to models/gc_grt123_lung_cancer/utils/PanImgConverters.py index 25dd618e..824d20f4 100644 --- a/models/grt123_lung_cancer/utils/PanImgConverters.py +++ b/models/gc_grt123_lung_cancer/utils/PanImgConverters.py @@ -68,10 +68,10 @@ def convert(self, instance: Instance) -> Optional[InstanceData]: sitk_image = result.image # SimpleITK image SimpleITK.WriteImage(sitk_image, str(out_mha_file)) except UnconsumedFilesException as e: - # e.errors is keyed with a Path to a file that could not be consumed, + # e.file_errors is keyed with a Path to a file that could not be consumed, # with a list of all the errors found with loading it, # the user can then choose what to do with that information - print("CONVERT ERROR: UnconsumedFilesException during PanImg conversion: ", e.errors) + print("CONVERT ERROR: UnconsumedFilesException during PanImg conversion: ", e.file_errors) return None return mha_data @@ -113,10 +113,10 @@ def convert(self, instance: Instance) -> Optional[InstanceData]: tiff_image = result.file # Path to the tiff file shutil.move(str(tiff_image), str(out_tiff_file)) except UnconsumedFilesException as e: - # e.errors is keyed with a Path to a file that could not be consumed, + # e.file_errors is keyed with a Path to a file that could not be consumed, # with a list of all the errors found with loading it, # the user can then choose what to do with that information - print("CONVERT ERROR: UnconsumedFilesException during PanImg conversion: ", e.errors) + print("CONVERT ERROR: UnconsumedFilesException during PanImg conversion: ", e.file_errors) return None return tiff_data diff --git a/models/grt123_lung_cancer/utils/__init__.py b/models/gc_grt123_lung_cancer/utils/__init__.py similarity index 100% rename from models/grt123_lung_cancer/utils/__init__.py rename to models/gc_grt123_lung_cancer/utils/__init__.py diff --git a/models/grt123_lung_cancer/config/config.yml b/models/grt123_lung_cancer/config/config.yml deleted file mode 100644 index bad14ddc..00000000 --- a/models/grt123_lung_cancer/config/config.yml +++ /dev/null @@ -1,14 +0,0 @@ -general: - data_base_dir: /app/data - -modules: - DicomImporter: - source_dir: input_data - import_dir: sorted_data - sort_data: True - meta: - mod: ct - - LungCancerClassifierRunner: - tmp_path: /app/tmp - n_preprocessing_workers: 8 diff --git a/models/grt123_lung_cancer/dockerfiles/cuda11.4/Dockerfile b/models/grt123_lung_cancer/dockerfiles/cuda11.4/Dockerfile deleted file mode 100644 index b24c6f1d..00000000 --- a/models/grt123_lung_cancer/dockerfiles/cuda11.4/Dockerfile +++ /dev/null @@ -1,42 +0,0 @@ -# Specify the base image for the environment -FROM mhubai/base:cuda11.4 - -# Specify/override authors label -LABEL authors="sil.vandeleemput@radboudumc.nl" - -# Install panimg to test conversion integration TODO should later be installed with MHub/mhubio -RUN apt-get update && apt-get install -y --no-install-recommends \ - python3-openslide \ - && rm -rf /var/lib/apt/lists/* -RUN pip3 install panimg - -# install required dependencies for grt123 algorithm (CPU-only) -RUN pip3 install --no-cache-dir \ - torch==1.10.0+cu113 -f https://download.pytorch.org/whl/torch_stable.html - -# Install grt123 algorithm and model weights -# - We use a shallow git clone for reduced bandwidth usage -# - We remove unnecessary files for a compacter docker layer -# - Subsequently we remove the .git directory to procuce a compacter docker layer, but keep the latest commit hash in the HEAD file -RUN git clone --depth 1 --branch v2.0.0 https://github.com/DIAGNijmegen/bodyct-dsb2017-grt123.git /grt123_lung_cancer && \ - rm -rf /grt123_lung_cancer/tests && \ - rm -rf /grt123_lung_cancer/training && \ - rm -rf /grt123_lung_cancer/processor && \ - rm -rf /grt123_lung_cancer/images && \ - rm /grt123_lung_cancer/README.md && \ - rm /grt123_lung_cancer/solution-grt123-team.pdf && \ - mv /grt123_lung_cancer/.git/HEAD /grt123_lung_cancer && \ - rm -rf /grt123_lung_cancer/.git/* && \ - mv /grt123_lung_cancer/HEAD /grt123_lung_cancer/.git - -# Clone MHub model (m-grt123-lung-cancer branch, fixed to commit TODO) -#RUN git init \ -# && git sparse-checkout set "models/grt123_lung_cancer" \ -# && git fetch https://github.com/MHubAI/models.git m-grt123-lung-cancer \ -# && git merge TODO - -# Add lobe segmentation code base to python path -ENV PYTHONPATH="/grt123_lung_cancer:/app" - -# Default run script -CMD ["python3", "/app/models/grt123_lung_cancer/scripts/run.py"] diff --git a/models/grt123_lung_cancer/dockerfiles/cuda12.0/Dockerfile b/models/grt123_lung_cancer/dockerfiles/cuda12.0/Dockerfile deleted file mode 100644 index 0a9724e6..00000000 --- a/models/grt123_lung_cancer/dockerfiles/cuda12.0/Dockerfile +++ /dev/null @@ -1,42 +0,0 @@ -# Specify the base image for the environment -FROM mhubai/base:cuda12.0 - -# Specify/override authors label -LABEL authors="sil.vandeleemput@radboudumc.nl" - -# Install panimg to test conversion integration TODO should later be installed with MHub/mhubio -RUN apt-get update && apt-get install -y --no-install-recommends \ - python3-openslide \ - && rm -rf /var/lib/apt/lists/* -RUN pip3 install panimg - -# install required dependencies for grt123 algorithm (CPU-only) -RUN pip3 install --no-cache-dir \ - torch===2.0.1+cu118 -f https://download.pytorch.org/whl/torch_stable.html - -# Install grt123 algorithm and model weights -# - We use a shallow git clone for reduced bandwidth usage -# - We remove unnecessary files for a compacter docker layer -# - Subsequently we remove the .git directory to procuce a compacter docker layer, but keep the latest commit hash in the HEAD file -RUN git clone --depth 1 --branch v2.0.0 https://github.com/DIAGNijmegen/bodyct-dsb2017-grt123.git /grt123_lung_cancer && \ - rm -rf /grt123_lung_cancer/tests && \ - rm -rf /grt123_lung_cancer/training && \ - rm -rf /grt123_lung_cancer/processor && \ - rm -rf /grt123_lung_cancer/images && \ - rm /grt123_lung_cancer/README.md && \ - rm /grt123_lung_cancer/solution-grt123-team.pdf && \ - mv /grt123_lung_cancer/.git/HEAD /grt123_lung_cancer && \ - rm -rf /grt123_lung_cancer/.git/* && \ - mv /grt123_lung_cancer/HEAD /grt123_lung_cancer/.git - -# Clone MHub model (m-grt123-lung-cancer branch, fixed to commit TODO) -#RUN git init \ -# && git sparse-checkout set "models/grt123_lung_cancer" \ -# && git fetch https://github.com/MHubAI/models.git m-grt123-lung-cancer \ -# && git merge TODO - -# Add lobe segmentation code base to python path -ENV PYTHONPATH="/grt123_lung_cancer:/app" - -# Default run script -CMD ["python3", "/app/models/grt123_lung_cancer/scripts/run.py"] diff --git a/models/grt123_lung_cancer/dockerfiles/nocuda/Dockerfile b/models/grt123_lung_cancer/dockerfiles/nocuda/Dockerfile deleted file mode 100644 index 862a1dc7..00000000 --- a/models/grt123_lung_cancer/dockerfiles/nocuda/Dockerfile +++ /dev/null @@ -1,42 +0,0 @@ -# Specify the base image for the environment -FROM mhubai/base:nocuda - -# Specify/override authors label -LABEL authors="sil.vandeleemput@radboudumc.nl" - -# Install panimg to test conversion integration TODO should later be installed with MHub/mhubio -RUN apt-get update && apt-get install -y --no-install-recommends \ - python3-openslide \ - && rm -rf /var/lib/apt/lists/* -RUN pip3 install panimg - -# install required dependencies for grt123 algorithm (CPU-only) -RUN pip3 install --no-cache-dir \ - torch===2.0.1+cpu -f https://download.pytorch.org/whl/torch_stable.html - -# Install grt123 algorithm and model weights -# - We use a shallow git clone for reduced bandwidth usage -# - We remove unnecessary files for a compacter docker layer -# - Subsequently we remove the .git directory to procuce a compacter docker layer, but keep the latest commit hash in the HEAD file -RUN git clone --depth 1 --branch v2.0.0 https://github.com/DIAGNijmegen/bodyct-dsb2017-grt123.git /grt123_lung_cancer && \ - rm -rf /grt123_lung_cancer/tests && \ - rm -rf /grt123_lung_cancer/training && \ - rm -rf /grt123_lung_cancer/processor && \ - rm -rf /grt123_lung_cancer/images && \ - rm /grt123_lung_cancer/README.md && \ - rm /grt123_lung_cancer/solution-grt123-team.pdf && \ - mv /grt123_lung_cancer/.git/HEAD /grt123_lung_cancer && \ - rm -rf /grt123_lung_cancer/.git/* && \ - mv /grt123_lung_cancer/HEAD /grt123_lung_cancer/.git - -# Clone MHub model (m-grt123-lung-cancer branch, fixed to commit TODO) -#RUN git init \ -# && git sparse-checkout set "models/grt123_lung_cancer" \ -# && git fetch https://github.com/MHubAI/models.git m-grt123-lung-cancer \ -# && git merge TODO - -# Add lobe segmentation code base to python path -ENV PYTHONPATH="/grt123_lung_cancer:/app" - -# Default run script -CMD ["python3", "/app/models/grt123_lung_cancer/scripts/run.py"] From 4bb7483da36659b42a6658aac9309e753fea818b Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Tue, 1 Aug 2023 15:23:11 +0200 Subject: [PATCH 009/125] update for new base image, add dseg.json for class labels --- models/gc_nnunet_pancreas/config/dseg.json | 168 ++++++++++++++++++ .../dockerfiles/{nocuda => }/Dockerfile | 3 +- models/gc_nnunet_pancreas/scripts/run.py | 14 +- .../gc_nnunet_pancreas/utils/MhaImporter.py | 45 ----- .../utils/PanImgConverters.py | 122 +++++++++++++ models/gc_nnunet_pancreas/utils/__init__.py | 4 +- 6 files changed, 298 insertions(+), 58 deletions(-) create mode 100644 models/gc_nnunet_pancreas/config/dseg.json rename models/gc_nnunet_pancreas/dockerfiles/{nocuda => }/Dockerfile (95%) delete mode 100644 models/gc_nnunet_pancreas/utils/MhaImporter.py create mode 100644 models/gc_nnunet_pancreas/utils/PanImgConverters.py diff --git a/models/gc_nnunet_pancreas/config/dseg.json b/models/gc_nnunet_pancreas/config/dseg.json new file mode 100644 index 00000000..d5c84596 --- /dev/null +++ b/models/gc_nnunet_pancreas/config/dseg.json @@ -0,0 +1,168 @@ +{ + "ContentCreatorName": "Reader1", + "ClinicalTrialSeriesID": "Session1", + "ClinicalTrialTimePointID": "1", + "SeriesDescription": "Segmentation", + "SeriesNumber": "300", + "InstanceNumber": "1", + "BodyPartExamined": "Pancreas", + "segmentAttributes": [ + [ + { + "labelID": 1, + "SegmentDescription": "Veins", + "SegmentAlgorithmType": "AUTOMATIC", + "SegmentAlgorithmName": "GC nnUNet Pancreas", + "SegmentedPropertyCategoryCodeSequence": { + "CodeValue": "123037004", + "CodingSchemeDesignator": "SCT", + "CodeMeaning": "Anatomical Structure" + }, + "SegmentedPropertyTypeCodeSequence": { + "CodeValue": "29092000", + "CodingSchemeDesignator": "SCT", + "CodeMeaning": "Vein" + }, + "SegmentedPropertyTypeModifierCodeSequence": { + "CodeValue": "51440002", + "CodingSchemeDesignator": "SCT", + "CodeMeaning": "Right and left" + }, + "recommendedDisplayRGBValue": [ + 0, + 151, + 206 + ] + }, + { + "labelID": 2, + "SegmentDescription": "Artery", + "SegmentAlgorithmType": "AUTOMATIC", + "SegmentAlgorithmName": "GC nnUNet Pancreas", + "SegmentedPropertyCategoryCodeSequence": { + "CodeValue": "123037004", + "CodingSchemeDesignator": "SCT", + "CodeMeaning": "Anatomical Structure" + }, + "SegmentedPropertyTypeCodeSequence": { + "CodeValue": "51114001", + "CodingSchemeDesignator": "SCT", + "CodeMeaning": "Artery" + }, + "SegmentedPropertyTypeModifierCodeSequence": { + "CodeValue": "51440002", + "CodingSchemeDesignator": "SCT", + "CodeMeaning": "Right and left" + }, + "recommendedDisplayRGBValue": [ + 216, + 101, + 79 + ] + }, + { + "labelID": 3, + "SegmentDescription": "Pancreas", + "SegmentAlgorithmType": "AUTOMATIC", + "SegmentAlgorithmName": "GC nnUNet Pancreas", + "SegmentedPropertyCategoryCodeSequence": { + "CodeValue": "123037004", + "CodingSchemeDesignator": "SCT", + "CodeMeaning": "Anatomical Structure" + }, + "SegmentedPropertyTypeCodeSequence": { + "CodeValue": "15776009", + "CodingSchemeDesignator": "SCT", + "CodeMeaning": "Pancreas" + }, + "recommendedDisplayRGBValue": [ + 249, + 180, + 111 + ] + }, + { + "labelID": 4, + "SegmentDescription": "Pancreatic duct", + "SegmentAlgorithmType": "AUTOMATIC", + "SegmentAlgorithmName": "GC nnUNet Pancreas", + "SegmentedPropertyCategoryCodeSequence": { + "CodeValue": "123037004", + "CodingSchemeDesignator": "SCT", + "CodeMeaning": "Anatomical Structure" + }, + "SegmentedPropertyTypeCodeSequence": { + "CodeValue": "69930009", + "CodingSchemeDesignator": "SCT", + "CodeMeaning": "Pancreatic duct" + } + }, + { + "labelID": 5, + "SegmentDescription": "Bile duct", + "SegmentAlgorithmType": "AUTOMATIC", + "SegmentAlgorithmName": "GC nnUNet Pancreas", + "SegmentedPropertyCategoryCodeSequence": { + "CodeValue": "123037004", + "CodingSchemeDesignator": "SCT", + "CodeMeaning": "Anatomical Structure" + }, + "SegmentedPropertyTypeCodeSequence": { + "CodeValue": "28273000", + "CodingSchemeDesignator": "SCT", + "CodeMeaning": "Bile duct" + }, + "SegmentedPropertyTypeModifierCodeSequence": { + "CodeValue": "51440002", + "CodingSchemeDesignator": "SCT", + "CodeMeaning": "Right and left" + }, + "recommendedDisplayRGBValue": [ + 0, + 145, + 30 + ] + }, + { + "labelID": 6, + "SegmentDescription": "Cysts", + "SegmentAlgorithmType": "AUTOMATIC", + "SegmentAlgorithmName": "GC nnUNet Pancreas", + "SegmentedPropertyCategoryCodeSequence": { + "CodeValue": "49755003", + "CodingSchemeDesignator": "SCT", + "CodeMeaning": "Morphologically Altered Structure" + }, + "SegmentedPropertyTypeCodeSequence": { + "CodeValue": "367643001", + "CodingSchemeDesignator": "SCT", + "CodeMeaning": "Cyst" + }, + "recommendedDisplayRGBValue": [ + 205, + 205, + 100 + ] + }, + { + "labelID": 7, + "SegmentDescription": "Renal vein", + "SegmentAlgorithmType": "AUTOMATIC", + "SegmentAlgorithmName": "GC nnUNet Pancreas", + "SegmentedPropertyCategoryCodeSequence": { + "CodeValue": "123037004", + "CodingSchemeDesignator": "SCT", + "CodeMeaning": "Anatomical Structure" + }, + "SegmentedPropertyTypeCodeSequence": { + "CodeValue": "56400007", + "CodingSchemeDesignator": "SCT", + "CodeMeaning": "Renal vein" + } + } + ] + ], + "ContentLabel": "SEGMENTATION", + "ContentDescription": "Image segmentation", + "ClinicalTrialCoordinatingCenterName": "dcmqi" +} \ No newline at end of file diff --git a/models/gc_nnunet_pancreas/dockerfiles/nocuda/Dockerfile b/models/gc_nnunet_pancreas/dockerfiles/Dockerfile similarity index 95% rename from models/gc_nnunet_pancreas/dockerfiles/nocuda/Dockerfile rename to models/gc_nnunet_pancreas/dockerfiles/Dockerfile index d1945b36..5eeeb0d8 100644 --- a/models/gc_nnunet_pancreas/dockerfiles/nocuda/Dockerfile +++ b/models/gc_nnunet_pancreas/dockerfiles/Dockerfile @@ -1,6 +1,5 @@ # Specify the base image for the environment FROM mhubai/base:latest -# TODO add CUDA support since algorithm takes eons otherwise... # Specify/override authors label LABEL authors="sil.vandeleemput@radboudumc.nl" @@ -33,7 +32,7 @@ RUN git clone --depth 1 https://github.com/DIAGNijmegen/CE-CT_PDAC_AutomaticDete # https://github.com/MIC-DKFZ/nnUNet/pull/1209 ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True -# Install nnUNet and other requirements +# Install nnUNet and other requirements (should install PyTorch as well...) RUN pip3 install --no-cache-dir -r /opt/algorithm/requirements.txt # Extend the nnUNet installation with custom trainers diff --git a/models/gc_nnunet_pancreas/scripts/run.py b/models/gc_nnunet_pancreas/scripts/run.py index 8421566f..9d25b15f 100644 --- a/models/gc_nnunet_pancreas/scripts/run.py +++ b/models/gc_nnunet_pancreas/scripts/run.py @@ -16,12 +16,10 @@ from mhubio.core import Config, DataType, FileType, CT, SEG, Meta from mhubio.modules.importer.FileStructureImporter import FileStructureImporter from mhubio.modules.importer.DicomImporter import DicomImporter -from mhubio.modules.importer.NrrdImporter import NrrdImporter -from mhubio.modules.convert.NiftiConverter import NiftiConverter -from mhubio.modules.runner.NNUnetRunner import NNUnetRunner from mhubio.modules.convert.DsegConverter import DsegConverter from mhubio.modules.organizer.DataOrganizer import DataOrganizer -from models.gc_nnunet_pancreas import MhaImporter, GCNNUnetPancreasRunner, HEATMAP +from models.gc_nnunet_pancreas import GCNNUnetPancreasRunner, HEATMAP +from models.gc_nnunet_pancreas.utils import MhaPanImgConverter # clean-up import shutil @@ -31,14 +29,12 @@ # config config = Config('/app/models/gc_nnunet_pancreas/config/config.yml') -config.verbose = True # TODO: define levels of verbosity and integrate consistently. # import (ct:dicom) -#DicomImporter(config).execute() +DicomImporter(config).execute() -# import (ct:mha) -MhaImporter(config).execute() -#FileStructureImporter(config).execute() +# convert (ct:dicom -> ct:mha) +MhaPanImgConverter(config).execute() # execute model (nnunet ct:mha -> (hm:mha, seg:mha)) GCNNUnetPancreasRunner(config).execute() diff --git a/models/gc_nnunet_pancreas/utils/MhaImporter.py b/models/gc_nnunet_pancreas/utils/MhaImporter.py deleted file mode 100644 index f2ca93b4..00000000 --- a/models/gc_nnunet_pancreas/utils/MhaImporter.py +++ /dev/null @@ -1,45 +0,0 @@ -""" --------------------------------------- -MHub / GC - MHA importer --------------------------------------- - --------------------------------------- -Author: Sil van de Leemput -Email: sil.vandeleemput@radboudumc.nl --------------------------------------- -""" -import os -from typing import Optional -from pathlib import Path - -from mhubio.modules.importer.DataImporter import IDEF, DataImporter, FileType -from mhubio.core import Meta, DirectoryChain, CT - - -class MhaImporter(DataImporter): - def task(self) -> None: - source_dir = self.c['source_dir'] - source_dc = DirectoryChain(path=source_dir, parent=self.config.data.dc) - # input tiff file directory - input_dir = source_dc.abspath - self.v(f"{input_dir}") - - # add input tiff files as WSI images... - self.setBasePath(input_dir) - for input_tiff_file in Path(input_dir).glob("*.mha"): - self.v(f"{input_tiff_file}") - self.addMhaCT(str(input_tiff_file), ref=input_tiff_file.stem) - - # let the base module take over from here - super().task() - - def addMhaCT(self, path: str, ref: Optional[str] = None) -> None: - _path = self._resolvePath(path, ref) - self.v("adding CT in mha format with resolved path: ", _path) - assert os.path.isfile(_path) and _path.endswith('.mha'), f"Expect existing mha file, '{_path}' was given instead." - self._import_paths.append(IDEF( - ref = ref, - path = path, - ftype = FileType.MHA, - meta = CT - )) diff --git a/models/gc_nnunet_pancreas/utils/PanImgConverters.py b/models/gc_nnunet_pancreas/utils/PanImgConverters.py new file mode 100644 index 00000000..824d20f4 --- /dev/null +++ b/models/gc_nnunet_pancreas/utils/PanImgConverters.py @@ -0,0 +1,122 @@ +""" +------------------------------------------------------------- +MHub - PanImg Conversion Modules Dicom2Mha and WSI-Dicom2Tiff +------------------------------------------------------------- + +------------------------------------------------------------- +Author: Sil van de Leemput +Email: sil.vandeleemput@radboudumc.nl +------------------------------------------------------------- +""" + + +from typing import Optional + +from mhubio.modules.convert.DataConverter import DataConverter +from mhubio.core import Instance, InstanceData, DataType, FileType + +import os +from pathlib import Path +import shutil + +from panimg.exceptions import UnconsumedFilesException +from panimg.image_builders.dicom import image_builder_dicom +from panimg.image_builders.tiff import image_builder_tiff +from panimg.image_builders.metaio_nrrd import image_builder_nrrd + +import SimpleITK + + +class MhaPanImgConverter(DataConverter): + """ + Conversion module. + Convert instance data from dicom or nrrd to mha. + """ + + def convert(self, instance: Instance) -> Optional[InstanceData]: + + # create a converted instance + has_instance_dicom = instance.hasType(DataType(FileType.DICOM)) + has_instance_nrrd = instance.hasType(DataType(FileType.NRRD)) + + assert has_instance_dicom or has_instance_nrrd, f"CONVERT ERROR: required datatype (dicom or nrrd) not available in instance {str(instance)}." + + # select input data, dicom has priority over nrrd + input_data = instance.data.filter(DataType(FileType.DICOM) if has_instance_dicom else DataType(FileType.NRRD)).first() + + # out data + mha_data = InstanceData("image.mha", DataType(FileType.MHA, input_data.type.meta)) + mha_data.instance = instance + + # paths + inp_data_dir = Path(input_data.abspath) + out_mha_file = Path(mha_data.abspath) + + # sanity check + assert(inp_data_dir.is_dir()) + + # DICOM CT to MHA conversion (if the file doesn't exist yet) + if out_mha_file.is_file(): + print("CONVERT ERROR: File already exists: ", out_mha_file) + return None + else: + # run conversion using panimg + input_files = {f for f in inp_data_dir.glob(["*.nrrd", "*.dcm"][has_instance_dicom]) if f.is_file()} + img_builder = image_builder_dicom if has_instance_dicom else image_builder_nrrd + try: + for result in img_builder(files=input_files): + sitk_image = result.image # SimpleITK image + SimpleITK.WriteImage(sitk_image, str(out_mha_file)) + except UnconsumedFilesException as e: + # e.file_errors is keyed with a Path to a file that could not be consumed, + # with a list of all the errors found with loading it, + # the user can then choose what to do with that information + print("CONVERT ERROR: UnconsumedFilesException during PanImg conversion: ", e.file_errors) + return None + + return mha_data + + +class TiffPanImgConverter(DataConverter): + """ + Conversion module. + Convert instance data from WSI-dicom to tiff. + """ + + def convert(self, instance: Instance) -> Optional[InstanceData]: + + # create a converted instance + assert instance.hasType(DataType(FileType.DICOM)), f"CONVERT ERROR: required datatype (dicom) not available in instance {str(instance)}." + dicom_data = instance.data.filter(DataType(FileType.DICOM)).first() + + # out data + tiff_data = InstanceData("image.tiff", DataType(FileType.TIFF, dicom_data.type.meta)) + tiff_data.instance = instance + + # paths + inp_dicom_dir = Path(dicom_data.abspath) + out_tiff_file = Path(tiff_data.abspath) + + # sanity check + assert(inp_dicom_dir.is_dir()) + + # WSI-DICOM to TIFF conversion (if the file doesn't exist yet) + if out_tiff_file.is_file(): + print("CONVERT ERROR: File already exists: ", out_tiff_file) + return None + else: + # run conversion using panimg + dcm_input_files = {f for f in inp_dicom_dir.glob("*.dcm") if f.is_file()} + + try: + for result in image_builder_tiff(files=dcm_input_files): + tiff_image = result.file # Path to the tiff file + shutil.move(str(tiff_image), str(out_tiff_file)) + except UnconsumedFilesException as e: + # e.file_errors is keyed with a Path to a file that could not be consumed, + # with a list of all the errors found with loading it, + # the user can then choose what to do with that information + print("CONVERT ERROR: UnconsumedFilesException during PanImg conversion: ", e.file_errors) + return None + + return tiff_data diff --git a/models/gc_nnunet_pancreas/utils/__init__.py b/models/gc_nnunet_pancreas/utils/__init__.py index c35f6fef..7fc72114 100644 --- a/models/gc_nnunet_pancreas/utils/__init__.py +++ b/models/gc_nnunet_pancreas/utils/__init__.py @@ -1,2 +1,2 @@ -from .MhaImporter import * -from .GCnnUnetPancreasRunner import * \ No newline at end of file +from .GCnnUnetPancreasRunner import * +from .PanImgConverters import * From 12f30d85cf81d659ba412f1d156b8209a97277c1 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Tue, 1 Aug 2023 17:14:15 +0200 Subject: [PATCH 010/125] cleanup code runner and run, configure dsegconverter, dataorganizer --- models/gc_nnunet_pancreas/config/config.yml | 34 +++++++++-------- models/gc_nnunet_pancreas/config/dseg.json | 8 ++-- models/gc_nnunet_pancreas/scripts/run.py | 15 +++----- .../utils/GCnnUnetPancreasRunner.py | 38 +++---------------- 4 files changed, 33 insertions(+), 62 deletions(-) diff --git a/models/gc_nnunet_pancreas/config/config.yml b/models/gc_nnunet_pancreas/config/config.yml index 3b436dac..2a449c6b 100644 --- a/models/gc_nnunet_pancreas/config/config.yml +++ b/models/gc_nnunet_pancreas/config/config.yml @@ -1,10 +1,16 @@ general: version: 1.0 data_base_dir: /app/data - description: base configuration for nnunet pancreas PDAC model + description: base configuration for GC NNUnet Pancreas model (dicom to dicom) -modules: +execute: +- DicomImporter +- MHAConverter +- GCNNUnetPancreasRunner +- DsegConverter +- DataOrganizer +modules: DicomImporter: source_dir: input_data import_dir: sorted_data @@ -12,22 +18,20 @@ modules: meta: mod: ct - MhaImporter: - source_dir: input_data - import_dir: sorted_data + MHAConverter: + # TODO add panimg backend here... GCNNUnetPancreasRunner: -# TODO configure DsegConverter -# DsegConverter: -# #source_segs: [nifti:mod=seg] -# #json_config_path: /app/models/nnunet_pancreas/config/dseg.json -# source_segs: [mha:mod=seg:roi=*] -# model_name: GC NNUnet Pancreas -# skip_empty_slices: True + DsegConverter: + model_name: 'GC NNUnet Pancreas' + source_segs: ['mha:mod=seg'] + target_dicom: dicom:mod=ct + skip_empty_slices: True + json_config_path: /app/models/gc_nnunet_pancreas/config/dseg.json DataOrganizer: targets: - - mha:mod=heatmap-->/app/data/output_data/[i:sid]/heatmap.mha - - mha:mod=seg-->/app/data/output_data/[i:sid]/pancreas.seg.mha -# - dicomseg:mod=seg-->/app/data/output_data/[i:sid]/pancreas.seg.dcm + - mha:mod=heatmap-->[i:sid]/nnunet_pancreas_heatmap.mha + - mha:mod=seg-->[i:sid]/nnunet_pancreas.seg.mha + - dicomseg:mod=seg-->[i:sid]/nnunet_pancreas.seg.dcm diff --git a/models/gc_nnunet_pancreas/config/dseg.json b/models/gc_nnunet_pancreas/config/dseg.json index d5c84596..1e52a967 100644 --- a/models/gc_nnunet_pancreas/config/dseg.json +++ b/models/gc_nnunet_pancreas/config/dseg.json @@ -1,11 +1,11 @@ { - "ContentCreatorName": "Reader1", - "ClinicalTrialSeriesID": "Session1", + "ContentCreatorName": "IDC", + "ClinicalTrialSeriesID": "0", "ClinicalTrialTimePointID": "1", "SeriesDescription": "Segmentation", - "SeriesNumber": "300", + "SeriesNumber": "42", "InstanceNumber": "1", - "BodyPartExamined": "Pancreas", + "BodyPartExamined": "ABDOMEN", "segmentAttributes": [ [ { diff --git a/models/gc_nnunet_pancreas/scripts/run.py b/models/gc_nnunet_pancreas/scripts/run.py index 9d25b15f..2baeb9e7 100644 --- a/models/gc_nnunet_pancreas/scripts/run.py +++ b/models/gc_nnunet_pancreas/scripts/run.py @@ -13,12 +13,11 @@ import sys sys.path.append('.') -from mhubio.core import Config, DataType, FileType, CT, SEG, Meta -from mhubio.modules.importer.FileStructureImporter import FileStructureImporter +from mhubio.core import Config from mhubio.modules.importer.DicomImporter import DicomImporter from mhubio.modules.convert.DsegConverter import DsegConverter from mhubio.modules.organizer.DataOrganizer import DataOrganizer -from models.gc_nnunet_pancreas import GCNNUnetPancreasRunner, HEATMAP +from models.gc_nnunet_pancreas import GCNNUnetPancreasRunner from models.gc_nnunet_pancreas.utils import MhaPanImgConverter # clean-up @@ -39,12 +38,8 @@ # execute model (nnunet ct:mha -> (hm:mha, seg:mha)) GCNNUnetPancreasRunner(config).execute() -# convert (seg:nifti -> seg:dcm) -# DsegConverter(config).execute() +# convert (seg:mha -> seg:dcm) +DsegConverter(config).execute() # organize data into output folder -organizer = DataOrganizer(config, set_file_permissions=sys.platform.startswith('linux')) -organizer.setTarget(DataType(FileType.MHA, HEATMAP), "/app/data/output_data/[i:sid]/heatmap.mha") -organizer.setTarget(DataType(FileType.MHA, SEG), "/app/data/output_data/[i:sid]/pancreas.seg.mha") -#organizer.setTarget(DataType(FileType.DICOMSEG, SEG), "/app/data/output_data/[i:sid]/pancreas.seg.dcm") -organizer.execute() \ No newline at end of file +DataOrganizer(config, set_file_permissions=sys.platform.startswith('linux')).execute() diff --git a/models/gc_nnunet_pancreas/utils/GCnnUnetPancreasRunner.py b/models/gc_nnunet_pancreas/utils/GCnnUnetPancreasRunner.py index b63badbd..0b9a1d3f 100644 --- a/models/gc_nnunet_pancreas/utils/GCnnUnetPancreasRunner.py +++ b/models/gc_nnunet_pancreas/utils/GCnnUnetPancreasRunner.py @@ -9,8 +9,7 @@ ----------------------------------------------------------- """ -from mhubio.core import Module, Instance, InstanceData, DataType, FileType, CT, SEG, IO, Meta -import os, subprocess, shutil +from mhubio.core import Module, Instance, InstanceData, DataType, Meta, IO from pathlib import Path @@ -19,44 +18,17 @@ # TODO should move to MHubio/core/templates.py HEATMAP = Meta(mod="heatmap") -# @IO.Config('output_dir', str, "/app/tmp/gc_nnunet_pancreas/", the='directory to output the segmentation and the heatmap') class GCNNUnetPancreasRunner(Module): - - # output_dir: str - @IO.Instance() @IO.Input('in_data', 'mha:mod=ct', the="input data") @IO.Output('heatmap', 'heatmap.mha', 'mha:mod=heatmap:model=GCNNUnetPancreas', data="in_data", the="heatmap of the pancreatic tumor likelihood") @IO.Output('segmentation', 'segmentation.mha', 'mha:mod=seg:model=GCNNUnetPancreas', data="in_data", - the="segmentation of the pancreas, with the following classes: 1-veins, 2-arteries, 3-pancreas, 4-pancreatic duct, 5-bile duct, 6-cysts, 7-renal vein") - # @IO.Output('vei', 'Veins.mha', 'mha:mod=seg:model=GCNNUnetPancreas:roi=HEART', bundle='gc_nnunet_pancreas', in_signature=False, - # the="segmentation of the veins") - # @IO.Output('art', 'Arteries.mha', 'mha:mod=seg:model=GCNNUnetPancreas:roi=PULMONARY_ARTERY', bundle='gc_nnunet_pancreas', in_signature=False, - # the="segmentation of the arteries") - # @IO.Output('pan', 'Pancreas.mha', 'mha:mod=seg:model=GCNNUnetPancreas:roi=AORTA', bundle='gc_nnunet_pancreas', in_signature=False, - # the="segmentation of the pancreas") - # @IO.Output('pdc', 'PDC.mha', 'mha:mod=seg:model=GCNNUnetPancreas:roi=HEART', bundle='gc_nnunet_pancreas', in_signature=False, - # the="segmentation of the pancreatic duct") - # @IO.Output('bdt', 'BileDuct.mha', 'mha:mod=seg:model=GCNNUnetPancreas:roi=PULMONARY_ARTERY', bundle='gc_nnunet_pancreas', in_signature=False, - # the="segmentation of the bile duct") - # @IO.Output('cys', 'Cysts.mha', 'mha:mod=seg:model=GCNNUnetPancreas:roi=AORTA', bundle='gc_nnunet_pancreas', in_signature=False, - # the="segmentation of cysts") - # @IO.Output('rve', 'RenalVein.mha', 'mha:mod=seg:model=GCNNUnetPancreas:roi=AORTA', bundle='gc_nnunet_pancreas', in_signature=False, - # the="segmentation of the renal vein") + the="segmentation of the pancreas, with the following classes: " + "1-veins, 2-arteries, 3-pancreas, 4-pancreatic duct, 5-bile duct, 6-cysts, 7-renal vein") def task(self, instance: Instance, in_data: InstanceData, heatmap: InstanceData, segmentation: InstanceData, **kwargs) -> None: algorithm = PDACDetectionContainer() - #algorithm.ct_ip_dir = Path("/input/images/") algorithm.ct_image = in_data.abspath # set as str not Path - #algorithm.output_dir = Path(self.output_dir) - #algorithm.output_dir_tlm = algorithm.output_dir / "pancreatic-tumor-likelihood-map" - #algorithm.output_dir_seg = algorithm.output_dir / "pancreas-anatomy-and-vessel-segmentation" - algorithm.heatmap = Path(heatmap.abspath) # algorithm.output_dir_tlm / "heatmap.mha" - algorithm.segmentation = Path(segmentation.abspath) #algorithm.output_dir_seg / "segmentation.mha" - #algorithm.output_dir.mkdir(exist_ok=True, parents=True) - #algorithm.output_dir_tlm.mkdir(exist_ok=True, parents=True) - #algorithm.output_dir_seg.mkdir(exist_ok=True, parents=True) - self.v(in_data.abspath) - self.v(heatmap.abspath) - self.v(segmentation.abspath) + algorithm.heatmap = Path(heatmap.abspath) + algorithm.segmentation = Path(segmentation.abspath) algorithm.process() From 0fe8183a70c2062656578c9f039809dd1c8bf18c Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Wed, 2 Aug 2023 18:02:02 +0200 Subject: [PATCH 011/125] update e-mailaddress and cleanup/simplify default.yml --- models/gc_lunglobes/config/default.yml | 6 +----- models/gc_lunglobes/dockerfiles/Dockerfile | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/models/gc_lunglobes/config/default.yml b/models/gc_lunglobes/config/default.yml index b30cbf9c..1bfe2981 100644 --- a/models/gc_lunglobes/config/default.yml +++ b/models/gc_lunglobes/config/default.yml @@ -7,7 +7,6 @@ execute: - DicomImporter - MhaConverter - LobeSegmentationRunner -- NiftiConverter - DsegConverter - DataOrganizer @@ -19,11 +18,8 @@ modules: meta: mod: ct - NiftiConverter: - targets: - - mha:mod=seg - DsegConverter: + source_segs: [mha:mod=seg:roi=*] dicomseg_json_path: /app/models/xie2020_lobe_segmentation/config/dseg.json skip_empty_slices: True diff --git a/models/gc_lunglobes/dockerfiles/Dockerfile b/models/gc_lunglobes/dockerfiles/Dockerfile index fe182e44..92e1b4e5 100644 --- a/models/gc_lunglobes/dockerfiles/Dockerfile +++ b/models/gc_lunglobes/dockerfiles/Dockerfile @@ -1,7 +1,7 @@ FROM mhubai/base:latest # Update authors label -LABEL authors="s.vandeleemput@radboudumc.nl,dbontempi@bwh.harvard.edu,lnuernberg@bwh.harvard.edu" +LABEL authors="sil.vandeleemput@radboudumc.nl,dbontempi@bwh.harvard.edu,lnuernberg@bwh.harvard.edu" # Install system dependencies for OpenCV RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 -y From f081bd3065e536c4adc6ee0b9278845df5a7ea23 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Wed, 2 Aug 2023 23:07:45 +0200 Subject: [PATCH 012/125] removed redundant line source_segs from DsegConverter in default.yml --- models/gc_lunglobes/config/default.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/models/gc_lunglobes/config/default.yml b/models/gc_lunglobes/config/default.yml index 1bfe2981..e3e0b03a 100644 --- a/models/gc_lunglobes/config/default.yml +++ b/models/gc_lunglobes/config/default.yml @@ -19,7 +19,6 @@ modules: mod: ct DsegConverter: - source_segs: [mha:mod=seg:roi=*] dicomseg_json_path: /app/models/xie2020_lobe_segmentation/config/dseg.json skip_empty_slices: True From 16d18932d7d043fd3e2650c5200ec03694433166 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Wed, 30 Aug 2023 17:47:10 +0200 Subject: [PATCH 013/125] add panimg backend for mhaconverter and cleanup --- .../config/{config.yml => default.yml} | 8 +- .../gc_nnunet_pancreas/dockerfiles/Dockerfile | 5 +- models/gc_nnunet_pancreas/scripts/run.py | 45 ------- ...easRunner.py => GCNNUnetPancreasRunner.py} | 0 .../utils/PanImgConverters.py | 122 ------------------ models/gc_nnunet_pancreas/utils/__init__.py | 3 +- 6 files changed, 7 insertions(+), 176 deletions(-) rename models/gc_nnunet_pancreas/config/{config.yml => default.yml} (88%) delete mode 100644 models/gc_nnunet_pancreas/scripts/run.py rename models/gc_nnunet_pancreas/utils/{GCnnUnetPancreasRunner.py => GCNNUnetPancreasRunner.py} (100%) delete mode 100644 models/gc_nnunet_pancreas/utils/PanImgConverters.py diff --git a/models/gc_nnunet_pancreas/config/config.yml b/models/gc_nnunet_pancreas/config/default.yml similarity index 88% rename from models/gc_nnunet_pancreas/config/config.yml rename to models/gc_nnunet_pancreas/config/default.yml index 2a449c6b..5ae2cae2 100644 --- a/models/gc_nnunet_pancreas/config/config.yml +++ b/models/gc_nnunet_pancreas/config/default.yml @@ -5,7 +5,7 @@ general: execute: - DicomImporter -- MHAConverter +- MhaConverter - GCNNUnetPancreasRunner - DsegConverter - DataOrganizer @@ -18,10 +18,8 @@ modules: meta: mod: ct - MHAConverter: - # TODO add panimg backend here... - - GCNNUnetPancreasRunner: + MhaConverter: + engine: panimg DsegConverter: model_name: 'GC NNUnet Pancreas' diff --git a/models/gc_nnunet_pancreas/dockerfiles/Dockerfile b/models/gc_nnunet_pancreas/dockerfiles/Dockerfile index 5eeeb0d8..c19efbc6 100644 --- a/models/gc_nnunet_pancreas/dockerfiles/Dockerfile +++ b/models/gc_nnunet_pancreas/dockerfiles/Dockerfile @@ -42,5 +42,6 @@ RUN SITE_PKG=`pip3 show nnunet | grep "Location:" | awk '{print $2}'` && \ # Add algorithm files to python path ENV PYTHONPATH=/opt/algorithm:/app -# Default run script -CMD ["python3", "/app/models/gc_nnunet_pancreas/scripts/run.py"] \ No newline at end of file +# Configure main entrypoint +ENTRYPOINT ["python3", "-m", "mhubio.run"] +CMD ["--config", "/app/models/gc_nnunet_pancreas/config/default.yml"] \ No newline at end of file diff --git a/models/gc_nnunet_pancreas/scripts/run.py b/models/gc_nnunet_pancreas/scripts/run.py deleted file mode 100644 index 2baeb9e7..00000000 --- a/models/gc_nnunet_pancreas/scripts/run.py +++ /dev/null @@ -1,45 +0,0 @@ -""" ---------------------------------------------------- -GC / MHub - run the NNUnet GC pancreas segmentation - pipeline ---------------------------------------------------- - ---------------------------------------------------- -Author: Sil van de Leemput -Email: sil.vandeleemput@radboudumc.nl ---------------------------------------------------- -""" - -import sys -sys.path.append('.') - -from mhubio.core import Config -from mhubio.modules.importer.DicomImporter import DicomImporter -from mhubio.modules.convert.DsegConverter import DsegConverter -from mhubio.modules.organizer.DataOrganizer import DataOrganizer -from models.gc_nnunet_pancreas import GCNNUnetPancreasRunner -from models.gc_nnunet_pancreas.utils import MhaPanImgConverter - -# clean-up -import shutil -shutil.rmtree("/app/data/sorted_data", ignore_errors=True) -shutil.rmtree("/app/tmp", ignore_errors=True) -shutil.rmtree("/app/data/output_data", ignore_errors=True) - -# config -config = Config('/app/models/gc_nnunet_pancreas/config/config.yml') - -# import (ct:dicom) -DicomImporter(config).execute() - -# convert (ct:dicom -> ct:mha) -MhaPanImgConverter(config).execute() - -# execute model (nnunet ct:mha -> (hm:mha, seg:mha)) -GCNNUnetPancreasRunner(config).execute() - -# convert (seg:mha -> seg:dcm) -DsegConverter(config).execute() - -# organize data into output folder -DataOrganizer(config, set_file_permissions=sys.platform.startswith('linux')).execute() diff --git a/models/gc_nnunet_pancreas/utils/GCnnUnetPancreasRunner.py b/models/gc_nnunet_pancreas/utils/GCNNUnetPancreasRunner.py similarity index 100% rename from models/gc_nnunet_pancreas/utils/GCnnUnetPancreasRunner.py rename to models/gc_nnunet_pancreas/utils/GCNNUnetPancreasRunner.py diff --git a/models/gc_nnunet_pancreas/utils/PanImgConverters.py b/models/gc_nnunet_pancreas/utils/PanImgConverters.py deleted file mode 100644 index 824d20f4..00000000 --- a/models/gc_nnunet_pancreas/utils/PanImgConverters.py +++ /dev/null @@ -1,122 +0,0 @@ -""" -------------------------------------------------------------- -MHub - PanImg Conversion Modules Dicom2Mha and WSI-Dicom2Tiff -------------------------------------------------------------- - -------------------------------------------------------------- -Author: Sil van de Leemput -Email: sil.vandeleemput@radboudumc.nl -------------------------------------------------------------- -""" - - -from typing import Optional - -from mhubio.modules.convert.DataConverter import DataConverter -from mhubio.core import Instance, InstanceData, DataType, FileType - -import os -from pathlib import Path -import shutil - -from panimg.exceptions import UnconsumedFilesException -from panimg.image_builders.dicom import image_builder_dicom -from panimg.image_builders.tiff import image_builder_tiff -from panimg.image_builders.metaio_nrrd import image_builder_nrrd - -import SimpleITK - - -class MhaPanImgConverter(DataConverter): - """ - Conversion module. - Convert instance data from dicom or nrrd to mha. - """ - - def convert(self, instance: Instance) -> Optional[InstanceData]: - - # create a converted instance - has_instance_dicom = instance.hasType(DataType(FileType.DICOM)) - has_instance_nrrd = instance.hasType(DataType(FileType.NRRD)) - - assert has_instance_dicom or has_instance_nrrd, f"CONVERT ERROR: required datatype (dicom or nrrd) not available in instance {str(instance)}." - - # select input data, dicom has priority over nrrd - input_data = instance.data.filter(DataType(FileType.DICOM) if has_instance_dicom else DataType(FileType.NRRD)).first() - - # out data - mha_data = InstanceData("image.mha", DataType(FileType.MHA, input_data.type.meta)) - mha_data.instance = instance - - # paths - inp_data_dir = Path(input_data.abspath) - out_mha_file = Path(mha_data.abspath) - - # sanity check - assert(inp_data_dir.is_dir()) - - # DICOM CT to MHA conversion (if the file doesn't exist yet) - if out_mha_file.is_file(): - print("CONVERT ERROR: File already exists: ", out_mha_file) - return None - else: - # run conversion using panimg - input_files = {f for f in inp_data_dir.glob(["*.nrrd", "*.dcm"][has_instance_dicom]) if f.is_file()} - img_builder = image_builder_dicom if has_instance_dicom else image_builder_nrrd - try: - for result in img_builder(files=input_files): - sitk_image = result.image # SimpleITK image - SimpleITK.WriteImage(sitk_image, str(out_mha_file)) - except UnconsumedFilesException as e: - # e.file_errors is keyed with a Path to a file that could not be consumed, - # with a list of all the errors found with loading it, - # the user can then choose what to do with that information - print("CONVERT ERROR: UnconsumedFilesException during PanImg conversion: ", e.file_errors) - return None - - return mha_data - - -class TiffPanImgConverter(DataConverter): - """ - Conversion module. - Convert instance data from WSI-dicom to tiff. - """ - - def convert(self, instance: Instance) -> Optional[InstanceData]: - - # create a converted instance - assert instance.hasType(DataType(FileType.DICOM)), f"CONVERT ERROR: required datatype (dicom) not available in instance {str(instance)}." - dicom_data = instance.data.filter(DataType(FileType.DICOM)).first() - - # out data - tiff_data = InstanceData("image.tiff", DataType(FileType.TIFF, dicom_data.type.meta)) - tiff_data.instance = instance - - # paths - inp_dicom_dir = Path(dicom_data.abspath) - out_tiff_file = Path(tiff_data.abspath) - - # sanity check - assert(inp_dicom_dir.is_dir()) - - # WSI-DICOM to TIFF conversion (if the file doesn't exist yet) - if out_tiff_file.is_file(): - print("CONVERT ERROR: File already exists: ", out_tiff_file) - return None - else: - # run conversion using panimg - dcm_input_files = {f for f in inp_dicom_dir.glob("*.dcm") if f.is_file()} - - try: - for result in image_builder_tiff(files=dcm_input_files): - tiff_image = result.file # Path to the tiff file - shutil.move(str(tiff_image), str(out_tiff_file)) - except UnconsumedFilesException as e: - # e.file_errors is keyed with a Path to a file that could not be consumed, - # with a list of all the errors found with loading it, - # the user can then choose what to do with that information - print("CONVERT ERROR: UnconsumedFilesException during PanImg conversion: ", e.file_errors) - return None - - return tiff_data diff --git a/models/gc_nnunet_pancreas/utils/__init__.py b/models/gc_nnunet_pancreas/utils/__init__.py index 7fc72114..683c17d1 100644 --- a/models/gc_nnunet_pancreas/utils/__init__.py +++ b/models/gc_nnunet_pancreas/utils/__init__.py @@ -1,2 +1 @@ -from .GCnnUnetPancreasRunner import * -from .PanImgConverters import * +from .GCNNUnetPancreasRunner import * From 0ad89310a31f51a6c82002d67e947ba7377b7227 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Wed, 13 Sep 2023 10:41:58 +0200 Subject: [PATCH 014/125] add specific commit hash for algorithm main branch at 2023/09/13 --- models/gc_lunglobes/dockerfiles/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/models/gc_lunglobes/dockerfiles/Dockerfile b/models/gc_lunglobes/dockerfiles/Dockerfile index 92e1b4e5..fab5ca99 100644 --- a/models/gc_lunglobes/dockerfiles/Dockerfile +++ b/models/gc_lunglobes/dockerfiles/Dockerfile @@ -23,8 +23,9 @@ RUN git stash \ && git sparse-checkout set "models/gc_lunglobes" \ && git fetch https://github.com/MHubAI/models.git main -# Install Xie's pulmonary lobe segmentation algorithm and model weights +# Install Xie's pulmonary lobe segmentation algorithm and model weights (main branch commit at 2023/09/13) RUN git clone https://github.com/DIAGNijmegen/bodyct-pulmonary-lobe-segmentation.git src && \ + cd src && git reset --hard 5a64b70504d46c042c30851a69cec370f1202e67 && cd /app && \ sed -i 's/from models import CTSUNet/from src.models import CTSUNet/g' src/test.py # Default run script From 4f05036be8d2fca5f9ebc32e17ea7bb1eaa0875a Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Wed, 13 Sep 2023 12:32:22 +0200 Subject: [PATCH 015/125] Updated the grt123 mhub model files * default.yml * renamed from config.yml * added version and description * updated pipeline with panimg mhaconverter * Dockerfile * added fixed commit hash for grt123 repo git clone * updated entrypoint * LungCancerClassifierRunner.py * removed tmp_path config option * added requestTempDir for tmp_path * added more comments * Removed script files and custom PanImgConverter --- .../config/{config.yml => default.yml} | 6 +- .../dockerfiles/Dockerfile | 10 +- models/gc_grt123_lung_cancer/scripts/run.py | 40 ------ .../utils/LungCancerClassifierRunner.py | 10 +- .../utils/PanImgConverters.py | 122 ------------------ 5 files changed, 16 insertions(+), 172 deletions(-) rename models/gc_grt123_lung_cancer/config/{config.yml => default.yml} (77%) delete mode 100644 models/gc_grt123_lung_cancer/scripts/run.py delete mode 100644 models/gc_grt123_lung_cancer/utils/PanImgConverters.py diff --git a/models/gc_grt123_lung_cancer/config/config.yml b/models/gc_grt123_lung_cancer/config/default.yml similarity index 77% rename from models/gc_grt123_lung_cancer/config/config.yml rename to models/gc_grt123_lung_cancer/config/default.yml index c6947ac6..a068585b 100644 --- a/models/gc_grt123_lung_cancer/config/config.yml +++ b/models/gc_grt123_lung_cancer/config/default.yml @@ -1,5 +1,7 @@ general: data_base_dir: /app/data + version: 1.0 + description: grt123 lung nodule and lung cancer classifier default (dicom to json) execute: - DicomImporter @@ -15,8 +17,10 @@ modules: meta: mod: ct + MhaConverter: + engine: panimg + LungCancerClassifierRunner: - tmp_path: /app/tmp n_preprocessing_workers: 8 DataOrganizer: diff --git a/models/gc_grt123_lung_cancer/dockerfiles/Dockerfile b/models/gc_grt123_lung_cancer/dockerfiles/Dockerfile index 03ed18e8..ea2416cc 100644 --- a/models/gc_grt123_lung_cancer/dockerfiles/Dockerfile +++ b/models/gc_grt123_lung_cancer/dockerfiles/Dockerfile @@ -9,10 +9,11 @@ RUN pip3 install --no-cache-dir \ torch===2.0.1+cu118 -f https://download.pytorch.org/whl/torch_stable.html # Install grt123 algorithm and model weights -# - We use a shallow git clone for reduced bandwidth usage +# - Git clone the algorithm repository for v2.0.0 (fixed to v2.0.0 tag commit on 2023/09/13) # - We remove unnecessary files for a compacter docker layer # - Subsequently we remove the .git directory to procuce a compacter docker layer, but keep the latest commit hash in the HEAD file -RUN git clone --depth 1 --branch v2.0.0 https://github.com/DIAGNijmegen/bodyct-dsb2017-grt123.git /gc_grt123_lung_cancer && \ +RUN git clone --branch v2.0.0 https://github.com/DIAGNijmegen/bodyct-dsb2017-grt123.git /gc_grt123_lung_cancer && \ + cd /gc_grt123_lung_cancer && git reset --hard 9a4ca0415c7fc1d3023a16650bf1cdce86f8bb59 && \ rm -rf /gc_grt123_lung_cancer/tests && \ rm -rf /gc_grt123_lung_cancer/training && \ rm -rf /gc_grt123_lung_cancer/processor && \ @@ -32,5 +33,6 @@ RUN git clone --depth 1 --branch v2.0.0 https://github.com/DIAGNijmegen/bodyct-d # Add lobe segmentation code base to python path ENV PYTHONPATH="/gc_grt123_lung_cancer:/app" -# Default run script # TODO should be direct call to config.yml waiting for MhaConverter with panimg backend -CMD ["python3", "/app/models/gc_grt123_lung_cancer/scripts/run.py"] +# Default entrypoint +ENTRYPOINT ["python3", "-m", "mhubio.run"] +CMD ["--config", "/app/models/gc_grt123_lung_cancer/config/default.yml"] diff --git a/models/gc_grt123_lung_cancer/scripts/run.py b/models/gc_grt123_lung_cancer/scripts/run.py deleted file mode 100644 index e53880cd..00000000 --- a/models/gc_grt123_lung_cancer/scripts/run.py +++ /dev/null @@ -1,40 +0,0 @@ -""" ------------------------------------------------------- -MHub / GC - Run grt123 Lung Cancer Classifier locally ------------------------------------------------------ - ------------------------------------------------------ -Author: Sil van de Leemput -Email: sil.vandeleemput@radboudumc.nl ------------------------------------------------------ -""" - -import sys -sys.path.append('.') - -from mhubio.core import Config, DataType, FileType -from mhubio.modules.importer.DicomImporter import DicomImporter -from mhubio.modules.organizer.DataOrganizer import DataOrganizer -from models.gc_grt123_lung_cancer.utils.LungCancerClassifierRunner import LungCancerClassifierRunner -from models.gc_grt123_lung_cancer.utils.PanImgConverters import MhaPanImgConverter - -# clean-up -import shutil -shutil.rmtree("/app/data/sorted_data", ignore_errors=True) -shutil.rmtree("/app/tmp", ignore_errors=True) -shutil.rmtree("/app/data/output_data", ignore_errors=True) - -# config -config = Config('/app/models/gc_grt123_lung_cancer/config/config.yml') - -# import (ct:dicom) -DicomImporter(config).execute() - -# convert (ct:dicom -> ct:mha) -MhaPanImgConverter(config).execute() - -# execute model (nnunet) -LungCancerClassifierRunner(config).execute() - -# organize data into output folder -DataOrganizer(config, set_file_permissions=sys.platform.startswith('linux')).execute() diff --git a/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py b/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py index d1b6643c..eb170d67 100644 --- a/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py +++ b/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py @@ -19,9 +19,10 @@ import torch +# Import the main module for the grt123 algorithm, which must be used for running the classification import main - +# This method cleans the raw results from the grt123 algorithm output and only keeps the relevant details def cleanup_json_report(data: Dict): for key in ["trainingset1", "trainingset2"]: del data["lungcad"][key] @@ -41,7 +42,6 @@ def cleanup_json_report(data: Dict): @IO.Config('n_preprocessing_workers', int, 6, the="number of preprocessing workers to use for the grt123 lung mask preprocessor") -@IO.Config('tmp_path', str, "/app/tmp", the="the path to write intermediate grt123 files to") class LungCancerClassifierRunner(Module): n_preprocessing_workers: int @@ -51,13 +51,14 @@ class LungCancerClassifierRunner(Module): @IO.Input('in_data', 'mha:mod=ct', the='input ct scan') @IO.Output('out_data', 'grt123_lung_cancer_findings.json', 'json:model=grt123LungCancerClassification', 'in_data', the='predicted nodules and lung cancer findings of the lung lobe') def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData) -> None: - - tmp_path = Path(self.tmp_path) + # create temporary directories for the preprocessed data and the cropped bounding boxes + tmp_path = Path(self.config.data.requestTempDir('grt123')) tmp_output_bbox_dir = tmp_path / "bbox" tmp_output_prep_dir = tmp_path / "prep" tmp_output_bbox_dir.mkdir(exist_ok=True, parents=True) tmp_output_prep_dir.mkdir(exist_ok=True, parents=True) + # determine the number of GPUs we can use if torch.cuda.is_available(): self.v("Running with a GPU") n_gpu = 1 @@ -85,4 +86,3 @@ def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData cleanup_json_report(results_json) with open(out_data.abspath, "w") as f: json.dump(results_json, f, indent=4) - diff --git a/models/gc_grt123_lung_cancer/utils/PanImgConverters.py b/models/gc_grt123_lung_cancer/utils/PanImgConverters.py deleted file mode 100644 index 824d20f4..00000000 --- a/models/gc_grt123_lung_cancer/utils/PanImgConverters.py +++ /dev/null @@ -1,122 +0,0 @@ -""" -------------------------------------------------------------- -MHub - PanImg Conversion Modules Dicom2Mha and WSI-Dicom2Tiff -------------------------------------------------------------- - -------------------------------------------------------------- -Author: Sil van de Leemput -Email: sil.vandeleemput@radboudumc.nl -------------------------------------------------------------- -""" - - -from typing import Optional - -from mhubio.modules.convert.DataConverter import DataConverter -from mhubio.core import Instance, InstanceData, DataType, FileType - -import os -from pathlib import Path -import shutil - -from panimg.exceptions import UnconsumedFilesException -from panimg.image_builders.dicom import image_builder_dicom -from panimg.image_builders.tiff import image_builder_tiff -from panimg.image_builders.metaio_nrrd import image_builder_nrrd - -import SimpleITK - - -class MhaPanImgConverter(DataConverter): - """ - Conversion module. - Convert instance data from dicom or nrrd to mha. - """ - - def convert(self, instance: Instance) -> Optional[InstanceData]: - - # create a converted instance - has_instance_dicom = instance.hasType(DataType(FileType.DICOM)) - has_instance_nrrd = instance.hasType(DataType(FileType.NRRD)) - - assert has_instance_dicom or has_instance_nrrd, f"CONVERT ERROR: required datatype (dicom or nrrd) not available in instance {str(instance)}." - - # select input data, dicom has priority over nrrd - input_data = instance.data.filter(DataType(FileType.DICOM) if has_instance_dicom else DataType(FileType.NRRD)).first() - - # out data - mha_data = InstanceData("image.mha", DataType(FileType.MHA, input_data.type.meta)) - mha_data.instance = instance - - # paths - inp_data_dir = Path(input_data.abspath) - out_mha_file = Path(mha_data.abspath) - - # sanity check - assert(inp_data_dir.is_dir()) - - # DICOM CT to MHA conversion (if the file doesn't exist yet) - if out_mha_file.is_file(): - print("CONVERT ERROR: File already exists: ", out_mha_file) - return None - else: - # run conversion using panimg - input_files = {f for f in inp_data_dir.glob(["*.nrrd", "*.dcm"][has_instance_dicom]) if f.is_file()} - img_builder = image_builder_dicom if has_instance_dicom else image_builder_nrrd - try: - for result in img_builder(files=input_files): - sitk_image = result.image # SimpleITK image - SimpleITK.WriteImage(sitk_image, str(out_mha_file)) - except UnconsumedFilesException as e: - # e.file_errors is keyed with a Path to a file that could not be consumed, - # with a list of all the errors found with loading it, - # the user can then choose what to do with that information - print("CONVERT ERROR: UnconsumedFilesException during PanImg conversion: ", e.file_errors) - return None - - return mha_data - - -class TiffPanImgConverter(DataConverter): - """ - Conversion module. - Convert instance data from WSI-dicom to tiff. - """ - - def convert(self, instance: Instance) -> Optional[InstanceData]: - - # create a converted instance - assert instance.hasType(DataType(FileType.DICOM)), f"CONVERT ERROR: required datatype (dicom) not available in instance {str(instance)}." - dicom_data = instance.data.filter(DataType(FileType.DICOM)).first() - - # out data - tiff_data = InstanceData("image.tiff", DataType(FileType.TIFF, dicom_data.type.meta)) - tiff_data.instance = instance - - # paths - inp_dicom_dir = Path(dicom_data.abspath) - out_tiff_file = Path(tiff_data.abspath) - - # sanity check - assert(inp_dicom_dir.is_dir()) - - # WSI-DICOM to TIFF conversion (if the file doesn't exist yet) - if out_tiff_file.is_file(): - print("CONVERT ERROR: File already exists: ", out_tiff_file) - return None - else: - # run conversion using panimg - dcm_input_files = {f for f in inp_dicom_dir.glob("*.dcm") if f.is_file()} - - try: - for result in image_builder_tiff(files=dcm_input_files): - tiff_image = result.file # Path to the tiff file - shutil.move(str(tiff_image), str(out_tiff_file)) - except UnconsumedFilesException as e: - # e.file_errors is keyed with a Path to a file that could not be consumed, - # with a list of all the errors found with loading it, - # the user can then choose what to do with that information - print("CONVERT ERROR: UnconsumedFilesException during PanImg conversion: ", e.file_errors) - return None - - return tiff_data From 19d29d89e493011cb3e6f2f94d372bb9b9a47373 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Thu, 14 Sep 2023 13:00:49 +0200 Subject: [PATCH 016/125] Updated and cleaned Dockerfile and Runner and added some comments --- .../gc_nnunet_pancreas/dockerfiles/Dockerfile | 23 +++++++++---------- .../utils/GCNNUnetPancreasRunner.py | 11 +++++---- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/models/gc_nnunet_pancreas/dockerfiles/Dockerfile b/models/gc_nnunet_pancreas/dockerfiles/Dockerfile index c19efbc6..3fe9a951 100644 --- a/models/gc_nnunet_pancreas/dockerfiles/Dockerfile +++ b/models/gc_nnunet_pancreas/dockerfiles/Dockerfile @@ -4,22 +4,14 @@ FROM mhubai/base:latest # Specify/override authors label LABEL authors="sil.vandeleemput@radboudumc.nl" -# Clone MHub model (m-gc-nnunet-pancreas branch, fixed to commit 407f1f884f09898bef9a9173e6434d681a50d399) # TODO -#RUN git init \ -# && git sparse-checkout set "models/gc_nnunet_pancreas" \ -# && git fetch https://github.com/MHubAI/models.git m-gc-nnunet-pancreas \ -# && git merge TODO - # Install git-lfs (required for downloading the model weights) -RUN apt update && apt install -y --no-install-recommends \ - git-lfs \ - && rm -rf /var/lib/apt/lists/* +RUN apt update && apt install -y --no-install-recommends git-lfs && rm -rf /var/lib/apt/lists/* # Install the model weights and the algorithm files -# * Pull algorithm from repo into /opt/algorithm for commit e4f4008c6e18e60a79f693448562a340a9252aa8 +# * Pull algorithm from repo into /opt/algorithm (main branch, commit e4f4008c6e18e60a79f693448562a340a9252aa8) # * Remove .git folder to keep docker layer small # * Replace input images path in process.py with an existing folder to avoid errors -RUN git clone --depth 1 https://github.com/DIAGNijmegen/CE-CT_PDAC_AutomaticDetection_nnUnet.git /opt/algorithm && \ +RUN git clone https://github.com/DIAGNijmegen/CE-CT_PDAC_AutomaticDetection_nnUnet.git /opt/algorithm && \ cd /opt/algorithm && \ git reset --hard e4f4008c6e18e60a79f693448562a340a9252aa8 && \ rm -rf /opt/algorithm/.git && \ @@ -39,9 +31,16 @@ RUN pip3 install --no-cache-dir -r /opt/algorithm/requirements.txt RUN SITE_PKG=`pip3 show nnunet | grep "Location:" | awk '{print $2}'` && \ mv /opt/algorithm/nnUNetTrainerV2_Loss_CE_checkpoints.py "$SITE_PKG/nnunet/training/network_training/nnUNetTrainerV2_Loss_CE_checkpoints.py" +# Clone the main branch of MHubAI/models TODO check if ok +RUN git stash \ + && git fetch https://github.com/MHubAI/models.git main \ + && git merge FETCH_HEAD \ + && git sparse-checkout set "models/gc_nnunet_pancreas" \ + && git fetch https://github.com/MHubAI/models.git main + # Add algorithm files to python path ENV PYTHONPATH=/opt/algorithm:/app # Configure main entrypoint ENTRYPOINT ["python3", "-m", "mhubio.run"] -CMD ["--config", "/app/models/gc_nnunet_pancreas/config/default.yml"] \ No newline at end of file +CMD ["--config", "/app/models/gc_nnunet_pancreas/config/default.yml"] diff --git a/models/gc_nnunet_pancreas/utils/GCNNUnetPancreasRunner.py b/models/gc_nnunet_pancreas/utils/GCNNUnetPancreasRunner.py index 0b9a1d3f..e57fac33 100644 --- a/models/gc_nnunet_pancreas/utils/GCNNUnetPancreasRunner.py +++ b/models/gc_nnunet_pancreas/utils/GCNNUnetPancreasRunner.py @@ -13,10 +13,12 @@ from pathlib import Path +# Import the algorithm pipeline class from the CE-CT_PDAC_AutomaticDetection_nnUnet repository from process import PDACDetectionContainer # TODO should move to MHubio/core/templates.py -HEATMAP = Meta(mod="heatmap") +HEATMAP = Meta(mod="heatmap") + class GCNNUnetPancreasRunner(Module): @IO.Instance() @@ -27,8 +29,9 @@ class GCNNUnetPancreasRunner(Module): the="segmentation of the pancreas, with the following classes: " "1-veins, 2-arteries, 3-pancreas, 4-pancreatic duct, 5-bile duct, 6-cysts, 7-renal vein") def task(self, instance: Instance, in_data: InstanceData, heatmap: InstanceData, segmentation: InstanceData, **kwargs) -> None: + # Configure the algorithm pipeline class and run it algorithm = PDACDetectionContainer() - algorithm.ct_image = in_data.abspath # set as str not Path - algorithm.heatmap = Path(heatmap.abspath) - algorithm.segmentation = Path(segmentation.abspath) + algorithm.ct_image = in_data.abspath # set as str not Path + algorithm.heatmap = Path(heatmap.abspath) + algorithm.segmentation = Path(segmentation.abspath) algorithm.process() From 364831a2e525bba94d1157399d5702dfc1304d02 Mon Sep 17 00:00:00 2001 From: Sil van de Leemput Date: Tue, 3 Oct 2023 12:55:18 +0200 Subject: [PATCH 017/125] Create meta.json --- models/gc_grt123_lung_cancer/meta.json | 121 +++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 models/gc_grt123_lung_cancer/meta.json diff --git a/models/gc_grt123_lung_cancer/meta.json b/models/gc_grt123_lung_cancer/meta.json new file mode 100644 index 00000000..000cd2ce --- /dev/null +++ b/models/gc_grt123_lung_cancer/meta.json @@ -0,0 +1,121 @@ +{ + "id": "2e67a3cc-4680-4058-bf4e-f965cf50f06f", + "name": "lung_cancer_risk_estimation", + "title": "Lung cancer risk estimation on thorax CT scans", + "summary": { + "description": "This algorithm analyzes non-contrast CT scans of the thorax and predicts the lung cancer risk. ", + "inputs": [ + { + "label": "CT", + "description": "Chest CT", + "format": "DICOM", + "modality": "CT", + "bodypartexamined": "Chest", + "slicethickness": "2.5mm", + "non-contrast": true, + "contrast": false + } + ], + "outputs": [ + { + "type": "Prediction", + "valueType": "number", + "label": "Cancer probability score", + "description": "Probability that the scan contains cancer nodules", + "classes": [] + } + ], + "model": { + "architecture": "3D convolutional neural network", + "training": "supervised", + "cmpapproach": "3D" + }, + "data": { + "training": { + "vol_samples": 2285 + }, + "evaluation": { + "vol_samples": 506 + }, + "public": true, + "external": false + } + }, + "details": { + "name": " bodyct-dsb2017-grt123", + "version": "", + "devteam": "DIAGNijmegen (Diagnostic Image Analysis Group, Radboud UMC, The Netherlands)", + "type": "3D Deep Leaky Noisy-or Network", + "date": { + "weights": "", + "code": "2023-07-04", + "pub": "2017-11-22" + }, + "cite": "Liao F, Liang M, Li Z, Hu X, Song S. Evaluate the Malignancy of Pulmonary Nodules Using the 3-D Deep Leaky Noisy-OR Network. IEEE Trans Neural Netw Learning Syst. 2019;30(11):3484-3495.", + "license": { + "code": "MIT", + "weights": "" + }, + "publications": [ + { + "title": "JOURNAL OF LATEX CLASS FILES, VOL. 14, NO. 8, AUGUST 2015 1 Evaluate the Malignancy of Pulmonary Nodules Using the 3D Deep Leaky Noisy-or Network", + "uri": "https://arxiv.org/pdf/1711.08324.pdf" + } + ], + "github": "https://github.com/DIAGNijmegen/bodyct-dsb2017-grt123", + "zenodo": "", + "colab": "", + "slicer": false + }, + "info": { + "use": { + "title": "Intended use", + "text": "This algorithm analyzes non-contrast CT scans of the thorax and predicts the lung cancer risk. ", + "references": [], + "tables": [] + }, + "analyses": { + "title": "", + "text": "", + "references": [], + "tables": [] + }, + "evaluation": { + "title": "", + "text": "", + "references": [], + "tables": [] + }, + "training": { + "title": "Training data", + "text": "The Luna dataset includes the images from the LIDC/IDRI dataset in a different format, together with additional annotations. The LIDC/IDRI dataset is available at https://wiki.cancerimagingarchive.net/display/Public/LIDC-IDRI under a Creative Commons Attribution 3.0 Unported License.", + "references": [ + { + "label": "LUng Nodule Analysis 2016 dataset part 1", + "uri": "https://zenodo.org/record/3723295" + }, + { + "label": "LUng Nodule Analysis 2016 dataset part 2", + "uri": "https://zenodo.org/record/4121926" + }, + { + "label": "Data Science Bowl 2017 dataset", + "uri": "https://www.kaggle.com/competitions/data-science-bowl-2017/data" + } + ], + "tables": [] + }, + "ethics": { + "title": "", + "text": "", + "references": [], + "tables": [] + }, + "limitations": { + "title": "", + "text": "", + "references": [], + "tables": [] + } + } +} From 737a5d2c210138876b8f59391d0492d4856bbc1c Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Tue, 10 Oct 2023 15:34:37 +0200 Subject: [PATCH 018/125] add meta.json --- models/gc_nnunet_pancreas/meta.json | 129 ++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 models/gc_nnunet_pancreas/meta.json diff --git a/models/gc_nnunet_pancreas/meta.json b/models/gc_nnunet_pancreas/meta.json new file mode 100644 index 00000000..bce7005b --- /dev/null +++ b/models/gc_nnunet_pancreas/meta.json @@ -0,0 +1,129 @@ +{ + "id": "bf7ae4bb-c6f5-4b1e-89aa-a8de246def57", + "name": "pdac_detection_in_ct", + "title": "Pancreatic Ductal Adenocarcinoma Detection in CT", + "summary": { + "description": "This algorithm produces a tumor likelihood heatmap for the presence of pancreatic ductal adenocarcinoma (PDAC) in an input venous-phase contrast-enhanced computed tomography scan (CECT). Additionally, the algorithm provides the segmentation of multiple surrounding anatomical structures such as the pancreatic duct, common bile duct, veins and arteries. The heatmap and segmentations are resampled to the same spatial resolution and physical dimensions as the input CECT image for easier visualisation.", + "inputs": [ + { + "label": "Venous phase CT scan", + "description": "A contrast-enhanced CT scan in the venous phase and axial reconstruction", + "format": "DICOM", + "modality": "CT", + "bodypartexamined": "Abdomen", + "slicethickness": "2.5mm", + "non-contrast": false, + "contrast": false + } + ], + "outputs": [ + { + "type": "Segmentation", + "classes": [ + "veins", + "arteries", + "pancreas", + "pancreatic duct", + "bile duct", + "cysts", + "renal vein" + ] + }, + { + "type": "Prediction", + "valueType": "number", + "label": "Pancreatic tumor likelihood", + "description": "Pancreatic tumor likelihood map with values between 0 and 1", + "classes": [] + } + ], + "model": { + "architecture": "nnUnet ", + "training": "supervised", + "cmpapproach": "3D" + }, + "data": { + "training": { + "vol_samples": 242 + }, + "evaluation": { + "vol_samples": 361 + }, + "public": true, + "external": false + } + }, + "details": { + "name": "Fully Automatic Deep Learning Framework for Pancreatic Ductal Adenocarcinoma Detection on Computed Tomography", + "version": "", + "devteam": "DIAGNijmegen (Diagnostic Image Analysis Group, Radboud UMC, The Netherlands)", + "type": "The models were developed using nnUnet. All models employed a 3D U-Net as the base architecture and were trained for 250.000 training steps with five-fold cross-validation.", + "date": { + "weights": "2023-06-28", + "code": "2022-07-19", + "pub": "2022-01-13" + }, + "cite": "Alves N, Schuurmans M, Litjens G, Bosma JS, Hermans J, Huisman H. Fully Automatic Deep Learning Framework for Pancreatic Ductal Adenocarcinoma Detection on Computed Tomography. Cancers (Basel). 2022 Jan 13;14(2):376. doi: 10.3390/cancers14020376. PMID: 35053538; PMCID: PMC8774174.", + "license": { + "code": "Apache 2.0", + "weights": "Apache 2.0" + }, + "publications": [ + { + "title": "Fully Automatic Deep Learning Framework for Pancreatic Ductal Adenocarcinoma Detection on Computed Tomography ", + "uri": "https://www.mdpi.com/2072-6694/14/2/376" + } + ], + "github": "https://github.com/DIAGNijmegen/CE-CT_PDAC_AutomaticDetection_nnUnet", + "zenodo": "", + "colab": "", + "slicer": false + }, + "info": { + "use": { + "title": "Intended Use", + "text": "This algorithm is intended to be used only on venous-phase CECT examinations of patients with clinical suspicion of PDAC. This algorithm should not be used in different patient demographics.", + "references": [], + "tables": [] + }, + "analyses": { + "title": "Analysis", + "text": "The study evaluated a medical model's performance for tumor detection by analyzing receiver operating characteristic (ROC) and free-response receiver operating characteristic (FROC) curves, assessing both tumor presence and lesion localization, and compared three configurations using statistical tests and ensemble modeling.", + "references": [], + "tables": [] + }, + "evaluation": { + "title": "Evaluation Data", + "text": "This framework was tested in an independent, external cohort consisting of two publicly available datasets.", + "references": [ + { + "label": "The Medical Segmentation Decathlon pancreas dataset (training portion) consisting of 281 patients with pancreatic malignancies (including lesions in the head, neck, body, and tail of the pancreas) and voxel-level annotations for the pancreas and lesion.", + "uri": "http://medicaldecathlon.com/" + }, + { + "label": "The Cancer Imaging Archive dataset from the US National Institutes of Health Clinical Center, containing 80 patients with normal pancreas and respective voxel-level annotations.", + "uri": "https://wiki.cancerimagingarchive.net/display/Public/Pancreas-CT" + } + ], + "tables": [] + }, + "training": { + "title": "Training data", + "text": "CE-CT scans in the portal venous phase from 119 patients with pathology-proven PDAC in the pancreatic head (PDAC cohort) and 123 patients with normal pancreas (non-PDAC cohort), acquired between 1 January 2013 and 1 June 2020, were selected for model development.", + "references": [], + "tables": [] + }, + "ethics": { + "title": "", + "text": "", + "references": [], + "tables": [] + }, + "limitations": { + "title": "Before using this model", + "text": "Test the model retrospectively and prospectively on a diagnostic cohort that reflects the target population that the model will be used upon to confirm the validity of the model within a local setting.", + "references": [], + "tables": [] + } + } +} \ No newline at end of file From 1fa6d724374fc0f13b479a5cfc1f2d6676242129 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Thu, 12 Oct 2023 13:42:57 +0200 Subject: [PATCH 019/125] initial functional implementation of the PICAI baseline algorithm model --- models/gc_picai_baseline/config/default.yml | 35 ++++++++++ .../gc_picai_baseline/config/mha-pipeline.yml | 31 +++++++++ .../gc_picai_baseline/dockerfiles/Dockerfile | 51 ++++++++++++++ .../utils/PicaiBaselineRunner.py | 68 +++++++++++++++++++ models/gc_picai_baseline/utils/__init__.py | 1 + 5 files changed, 186 insertions(+) create mode 100644 models/gc_picai_baseline/config/default.yml create mode 100644 models/gc_picai_baseline/config/mha-pipeline.yml create mode 100644 models/gc_picai_baseline/dockerfiles/Dockerfile create mode 100644 models/gc_picai_baseline/utils/PicaiBaselineRunner.py create mode 100644 models/gc_picai_baseline/utils/__init__.py diff --git a/models/gc_picai_baseline/config/default.yml b/models/gc_picai_baseline/config/default.yml new file mode 100644 index 00000000..c86566be --- /dev/null +++ b/models/gc_picai_baseline/config/default.yml @@ -0,0 +1,35 @@ +general: + data_base_dir: /app/data + version: 1.0 + description: Prostate MRI classification default (dicom to json) + +execute: +- FileStructureImporter +- MhaConverter +- PicaiBaselineRunner +- ReportExporter +- DataOrganizer + +modules: + FileStructureImporter: + input_dir: input_data + structures: + - $sid@instance/images/transverse-adc-prostate-mri@dicom:mod=mradc + - $sid/images/transverse-t2-prostate-mri@dicom:mod=mrt2 + - $sid/images/transverse-hbv-prostate-mri@dicom:mod=mrhbv + import_id: sid + + MhaConverter: + engine: panimg + + ReportExporter: + format: compact + includes: + - data: prostate_cancer_probability + label: prostate_cancer_probability + value: value + + DataOrganizer: + targets: + - json:mod=report-->[i:sid]/cspca-case-level-likelihood.json + - mha:mod=hm-->[i:sid]/cspca-detection-map.mha diff --git a/models/gc_picai_baseline/config/mha-pipeline.yml b/models/gc_picai_baseline/config/mha-pipeline.yml new file mode 100644 index 00000000..f70fac4b --- /dev/null +++ b/models/gc_picai_baseline/config/mha-pipeline.yml @@ -0,0 +1,31 @@ +general: + data_base_dir: /app/data + version: 1.0 + description: Prostate MRI classification MHA pipeline (mha to json) + +execute: +- FileStructureImporter +- PicaiBaselineRunner +- ReportExporter +- DataOrganizer + +modules: + FileStructureImporter: + input_dir: input_data + structures: + - $sid@instance/images/transverse-adc-prostate-mri/adc.mha@mha:mod=mradc + - $sid/images/transverse-t2-prostate-mri/t2w.mha@mha:mod=mrt2 + - $sid/images/transverse-hbv-prostate-mri/hbv.mha@mha:mod=mrhbv + import_id: sid + + ReportExporter: + format: compact + includes: + - data: prostate_cancer_probability + label: prostate_cancer_probability + value: value + + DataOrganizer: + targets: + - json:mod=report-->[i:sid]/cspca-case-level-likelihood.json + - mha:mod=hm-->[i:sid]/cspca-detection-map.mha diff --git a/models/gc_picai_baseline/dockerfiles/Dockerfile b/models/gc_picai_baseline/dockerfiles/Dockerfile new file mode 100644 index 00000000..a1bff797 --- /dev/null +++ b/models/gc_picai_baseline/dockerfiles/Dockerfile @@ -0,0 +1,51 @@ +# Specify the base image for the environment +FROM mhubai/base:latest + +# Specify/override authors label +LABEL authors="sil.vandeleemput@radboudumc.nl" + +# Install PyTorch 2.0.1 (CUDA enabled) +RUN pip3 install --no-cache-dir torch==2.0.1+cu118 -f https://download.pytorch.org/whl/torch_stable.html + +# Install git-lfs (required for unpacking model weights) +RUN apt update && apt install -y --no-install-recommends git-lfs && rm -rf /var/lib/apt/lists/* + +# Install PICAI baseline algorithm and model weights +# - Git clone the algorithm repository for v2.1.1 (fixed to v2.1.1 tag) +# - We remove unnecessary files for a compacter docker layer +# - Subsequently we remove the .git directory to procuce a compacter docker layer +RUN git clone --depth 1 --branch v2.1.1 https://github.com/DIAGNijmegen/picai_nnunet_semi_supervised_gc_algorithm.git /opt/algorithm && \ + rm -rf /opt/algorithm/test && \ + rm -rf /opt/algorithm/.git + +# Install additional PICAI requirements +RUN pip3 install --no-cache-dir -r /opt/algorithm/requirements.txt + +# Extend the nnUNet installation with custom trainers +RUN SITE_PKG=`pip3 show nnunet | grep "Location:" | awk '{print $2}'` && \ + mv /opt/algorithm/nnUNetTrainerV2_focalLoss.py "$SITE_PKG/nnunet/training/network_training/nnUNet_variants/loss_function/nnUNetTrainerV2_focalLoss.py" +RUN SITE_PKG=`pip3 show nnunet | grep "Location:" | awk '{print $2}'` && \ + mv /opt/algorithm/nnUNetTrainerV2_Loss_CE_checkpoints.py "$SITE_PKG/nnunet/training/network_training/nnUNetTrainerV2_Loss_CE_checkpoints.py" +RUN SITE_PKG=`pip3 show nnunet | grep "Location:" | awk '{print $2}'` && \ + mv /opt/algorithm/nnUNetTrainerV2_Loss_FL_and_CE.py "$SITE_PKG/nnunet/training/network_training/nnUNetTrainerV2_Loss_FL_and_CE.py" + +# Two code edits to the __init__ method of the algorithm class in process.py to prevent some of its default behavior +# 1. Skip forced error caused by using a different input locations than expected (we don't use the GC dirs) +# 2. Prevent unnecessary folder creation before input directories have been set (we will set the correct directory later) +RUN sed -i "s|file_paths = list(Path(folder).glob(scan_glob_format))|return|g" /opt/algorithm/process.py && \ + sed -i "s|self.cspca_detection_map_path.parent.mkdir(exist_ok=True, parents=True)||g" /opt/algorithm/process.py + +# FIXME: temporary fix waiting for the latest base image update +# Clone the main branch of MHubAI/models +#RUN git stash \ +# && git fetch https://github.com/MHubAI/models.git main \ +# && git merge FETCH_HEAD \ +# && git sparse-checkout set "models/gc_picai_baseline" \ +# && git fetch https://github.com/MHubAI/models.git main + +# Add lobe segmentation code base to python path +ENV PYTHONPATH="/app:/opt/algorithm" + +# Default entrypoint +ENTRYPOINT ["python3", "-m", "mhubio.run"] +CMD ["--config", "/app/models/gc_picai_baseline/config/default.yml"] diff --git a/models/gc_picai_baseline/utils/PicaiBaselineRunner.py b/models/gc_picai_baseline/utils/PicaiBaselineRunner.py new file mode 100644 index 00000000..62b88e30 --- /dev/null +++ b/models/gc_picai_baseline/utils/PicaiBaselineRunner.py @@ -0,0 +1,68 @@ +""" +--------------------------------------------------------- +Mhub / DIAG - Run Module for the PICAI baseline Algorithm +--------------------------------------------------------- + +--------------------------------------------------------- +Author: Sil van de Leemput +Email: sil.vandeleemput@radboudumc.nl +--------------------------------------------------------- +""" + +import json +from pathlib import Path + +from mhubio.core import Instance, InstanceData, IO, Module, ValueOutput, ClassOutput, Meta + +# Import the PICAI Classifier algorithm class from /opt/algorithm +from process import csPCaAlgorithm as PicaiClassifier + + +@ValueOutput.Name('prostate_cancer_probability') +@ValueOutput.Meta(Meta(key="value")) +@ValueOutput.Label('ProstateCancerProbability') +@ValueOutput.Type(float) +@ValueOutput.Description('Probability of case-level prostate cancer.') +class ProstateCancerProbability(ValueOutput): + pass + + +class PicaiBaselineRunner(Module): + + @IO.Instance() + @IO.Input('in_data_t2', 'mha:mod=mrt2', the='input T2 weighted prostate MR image') + @IO.Input('in_data_adc', 'mha:mod=mradc', the='input ADC prostate MR image') + @IO.Input('in_data_hbv', 'mha:mod=mrhbv', the='input HBV prostate MR image') + @IO.Output('cancer_probability_json', 'cspca-case-level-likelihood.json', "json", bundle='model', the='output JSON file with PICAI baseline prostate cancer probability') + @IO.Output('cancer_detection_heatmap', 'cspca_detection_map.mha', "mha:mod=hm", bundle='model', the='output heatmap indicating prostate cancer probability') + @IO.OutputData('cancer_probability', ProstateCancerProbability, the='PICAI baseline prostate cancer probability') + def task(self, instance: Instance, in_data_t2: InstanceData, in_data_adc: InstanceData, in_data_hbv: InstanceData, cancer_probability_json: InstanceData, cancer_detection_heatmap: InstanceData, cancer_probability: ProstateCancerProbability) -> None: + # Initialize classifier object + classifier = PicaiClassifier() + + # Specify input files (the order is important!) + classifier.scan_paths = [ + Path(in_data_t2.abspath), + Path(in_data_adc.abspath), + Path(in_data_hbv.abspath), + ] + + # Specify output files + classifier.cspca_detection_map_path = Path(cancer_detection_heatmap.abspath) + classifier.case_confidence_path = Path(cancer_probability_json.abspath) + + # Run the classifier on the input images + classifier.process() + + # Extract cancer probability value from cancer_probability_file + if not Path(cancer_probability_json.abspath).is_file(): + raise FileNotFoundError(f"Output file {cancer_probability_json.abspath} could not be found!") + + with open(cancer_probability_json.abspath, "r") as f: + cancer_prob = float(json.load(f)) + + if not (isinstance(cancer_prob, (float, int)) and (0.0 <= cancer_prob <= 1.0)): + raise ValueError(f"Cancer probability value should be a probability value, found: {cancer_prob}") + + # Output the predicted values + cancer_probability.value = cancer_prob diff --git a/models/gc_picai_baseline/utils/__init__.py b/models/gc_picai_baseline/utils/__init__.py new file mode 100644 index 00000000..a0ec22bc --- /dev/null +++ b/models/gc_picai_baseline/utils/__init__.py @@ -0,0 +1 @@ +from .PicaiBaselineRunner import * From 954b4a2c0ced7a903a5de29a599164fbdc5ceacc Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Tue, 24 Oct 2023 15:13:10 +0200 Subject: [PATCH 020/125] add meta.json --- models/gc_picai_baseline/meta.json | 133 +++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100644 models/gc_picai_baseline/meta.json diff --git a/models/gc_picai_baseline/meta.json b/models/gc_picai_baseline/meta.json new file mode 100644 index 00000000..312743c5 --- /dev/null +++ b/models/gc_picai_baseline/meta.json @@ -0,0 +1,133 @@ +{ + "id": "c5f886fb-9f54-4555-a954-da02b22d6d3f", + "name": "picai_baseline", + "title": "PI-CAI challenge baseline", + "summary": { + "description": "The PI-CAI challenge is to validate modern AI algorithms at clinically significant prostate cancer (csPCa) detection and diagnosis. This model algorithm provides the baseline for the challenge.", + "inputs": [ + { + "label": "Prostate biparametric MRI", + "description": "Prostate biparametric MRI exam", + "format": "DICOM", + "modality": "MR", + "bodypartexamined": "Prostate", + "slicethickness": "", + "non-contrast": false, + "contrast": false + } + ], + "outputs": [ + { + "type": "Prediction", + "valueType": "Probability", + "label": "Prostate cancer probability", + "description": "Case-level likelihood of harboring clinically significant prostate cancer, in range [0,1]", + "classes": [] + }, + { + "type": "Prediction", + "valueType": "Probability map", + "label": "Transverse cancer detection map", + "description": "Detection map of clinically significant prostate cancer lesions in 3D, where each voxel represents a floating point in range [0,1]", + "classes": [] + } + ], + "model": { + "architecture": "3d fullres nnUNet", + "training": "supervised", + "cmpapproach": "3D" + }, + "data": { + "training": { + "vol_samples": 1200 + }, + "evaluation": { + "vol_samples": 300 + }, + "public": false, + "external": false + } + }, + "details": { + "name": "PI-CAI challenge baseline", + "version": "v2.1.1", + "devteam": "Diagnostic Image Analysis Group, Radboud University Medical Center, Nijmegen, The Netherlands", + "type": "Prediction", + "date": { + "weights": "2022-06-22", + "code": "2022-09-05", + "pub": "" + }, + "cite": "", + "license": { + "code": "Apache 2.0", + "weights": "Apache 2.0" + }, + "publications": [], + "github": "https://github.com/DIAGNijmegen/picai_nnunet_semi_supervised_gc_algorithm", + "zenodo": "", + "colab": "", + "slicer": false + }, + "info": { + "use": { + "title": "Intended use", + "text": "Prediction of the likelihood of harboring clinically significant prostate cancer (csPCa) in prostate biparametric MRI exams.", + "references": [], + "tables": [] + }, + "analyses": { + "title": "Evaluation", + "text": "Patient-level diagnosis performance is evaluated using the Area Under Receiver Operating Characteristic (AUROC) metric. Lesion-level detection performance is evaluated using the Average Precision (AP) metric. Overall score used to rank each AI algorithm is the average of both task-specific metrics: Overall Ranking Score = (AP + AUROC) / 2", + "references": [ + { + "label": "PI-CAI AI challenge details", + "uri": "https://pi-cai.grand-challenge.org/AI/" + } + ], + "tables": [] + }, + "evaluation": { + "title": "Evaluation data", + "text": "The test sets are two private cohorts of 100 and 1000 biparametric MRI exams respectively. The first was used to tune the algorithms in a public leaderboard, the second was used to determine the top 5 AI algorithms.", + "references": [ + { + "label": "PI-CAI data section", + "uri": "https://pi-cai.grand-challenge.org/DATA/" + } + ], + "tables": [] + }, + "training": { + "title": "Training data", + "text": "For the PI-CAI a publicly available training datasets of 1500 biparametric MRI exams including 328 cases from the ProstateX challenge were made available.", + "references": [ + { + "label": "PI-CAI publicly available training data", + "uri": "https://zenodo.org/record/6624726" + }, + { + "label": "PI-CAI publicly available training data annotations", + "uri": "https://github.com/DIAGNijmegen/picai_labels" + }, + { + "label": "ProstateX challenge", + "uri": "https://prostatex.grand-challenge.org/" + } + ], + "tables": [] + }, + "ethics": { + "title": "", + "text": "", + "references": [], + "tables": [] + }, + "limitations": { + "title": "Limitations", + "text": "This algorithm was developed for research purposes only.", + "references": [], + "tables": [] + } + } +} \ No newline at end of file From 47ce41819b87bf6bb0833defbc60505a3d2e243a Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Mon, 6 Nov 2023 13:51:42 +0100 Subject: [PATCH 021/125] changed input structure and added allow_multi_input on MHAConverter --- models/gc_picai_baseline/config/default.yml | 5 ++--- models/gc_picai_baseline/utils/PicaiBaselineRunner.py | 6 +++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/models/gc_picai_baseline/config/default.yml b/models/gc_picai_baseline/config/default.yml index c86566be..fe257f9d 100644 --- a/models/gc_picai_baseline/config/default.yml +++ b/models/gc_picai_baseline/config/default.yml @@ -14,13 +14,12 @@ modules: FileStructureImporter: input_dir: input_data structures: - - $sid@instance/images/transverse-adc-prostate-mri@dicom:mod=mradc - - $sid/images/transverse-t2-prostate-mri@dicom:mod=mrt2 - - $sid/images/transverse-hbv-prostate-mri@dicom:mod=mrhbv + - $sid@instance/$type@dicom:mod=mr import_id: sid MhaConverter: engine: panimg + allow_multi_input: true ReportExporter: format: compact diff --git a/models/gc_picai_baseline/utils/PicaiBaselineRunner.py b/models/gc_picai_baseline/utils/PicaiBaselineRunner.py index 62b88e30..d4a122f2 100644 --- a/models/gc_picai_baseline/utils/PicaiBaselineRunner.py +++ b/models/gc_picai_baseline/utils/PicaiBaselineRunner.py @@ -30,9 +30,9 @@ class ProstateCancerProbability(ValueOutput): class PicaiBaselineRunner(Module): @IO.Instance() - @IO.Input('in_data_t2', 'mha:mod=mrt2', the='input T2 weighted prostate MR image') - @IO.Input('in_data_adc', 'mha:mod=mradc', the='input ADC prostate MR image') - @IO.Input('in_data_hbv', 'mha:mod=mrhbv', the='input HBV prostate MR image') + @IO.Input('in_data_t2', 'mha:mod=mr:type=t2w', the='input T2 weighted prostate MR image') + @IO.Input('in_data_adc', 'mha:mod=mr:type=adc', the='input ADC prostate MR image') + @IO.Input('in_data_hbv', 'mha:mod=mr:type=hbv', the='input HBV prostate MR image') @IO.Output('cancer_probability_json', 'cspca-case-level-likelihood.json', "json", bundle='model', the='output JSON file with PICAI baseline prostate cancer probability') @IO.Output('cancer_detection_heatmap', 'cspca_detection_map.mha', "mha:mod=hm", bundle='model', the='output heatmap indicating prostate cancer probability') @IO.OutputData('cancer_probability', ProstateCancerProbability, the='PICAI baseline prostate cancer probability') From 0d19c7c5b907b2be6a1ef935fe9e2c01fff8fda8 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Wed, 15 Nov 2023 13:06:34 +0100 Subject: [PATCH 022/125] add link to baseline algorithm on grand-challenge in meta.json --- models/gc_picai_baseline/meta.json | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/models/gc_picai_baseline/meta.json b/models/gc_picai_baseline/meta.json index 312743c5..59aab4d2 100644 --- a/models/gc_picai_baseline/meta.json +++ b/models/gc_picai_baseline/meta.json @@ -73,7 +73,12 @@ "use": { "title": "Intended use", "text": "Prediction of the likelihood of harboring clinically significant prostate cancer (csPCa) in prostate biparametric MRI exams.", - "references": [], + "references": [ + { + "label": "PI-CAI baseline algorithm on grand-challenge", + "uri": "https://grand-challenge.org/algorithms/pi-cai-baseline-nnu-net-semi-supervised/" + } + ], "tables": [] }, "analyses": { From f675fc07445a37ef993bef631411ea4154963663 Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Mon, 20 Nov 2023 11:40:52 +0100 Subject: [PATCH 023/125] updated dockerfile, utils and config for latest mhubio --- models/nnunet_prostate/config/default.yml | 30 ++++++++++++++ models/nnunet_prostate/dockerfiles/Dockerfile | 39 ++++++++++++++++++ .../utils/ProstateResampler.py | 36 +++++++++++++++++ .../nnunet_prostate/utils/ProstateRunner.py | 40 +++++++++++++++++++ 4 files changed, 145 insertions(+) create mode 100644 models/nnunet_prostate/config/default.yml create mode 100644 models/nnunet_prostate/dockerfiles/Dockerfile create mode 100644 models/nnunet_prostate/utils/ProstateResampler.py create mode 100644 models/nnunet_prostate/utils/ProstateRunner.py diff --git a/models/nnunet_prostate/config/default.yml b/models/nnunet_prostate/config/default.yml new file mode 100644 index 00000000..3feb4fe9 --- /dev/null +++ b/models/nnunet_prostate/config/default.yml @@ -0,0 +1,30 @@ +general: + data_base_dir: /app/data + version: 1.0 + description: Prostate MR ADC-T2 segmentation (dicom2dicom) + +execute: +- FileStructureImporter +- NiftiConverter +- ProstateResampler +- ProstateRunner +- DataOrganizer + +modules: + FileStructureImporter: + outsource_instances: True + import_id: patientID/studyID + structures: + - $patientID/$studyID@instance/$part@/dicom@dicom + - $patientID@instance:studyID=none/ADC$part@/dicom@dicom + - $patientID@instance:studyID=none/T2$part@/dicom@dicom + + NiftiConverter: + in_datas: dicom:part=ADC|T2 + allow_multi_input: true + overwrite_existing_file: true + + DataOrganizer: + targets: + - NIFTI:mod=seg-->[i:patientID]/[i:studyID]/results.nii.gz + - LOG-->[i:patientID]/[i:studyID]/logs/[d:part]/[d:log-task]_[path] \ No newline at end of file diff --git a/models/nnunet_prostate/dockerfiles/Dockerfile b/models/nnunet_prostate/dockerfiles/Dockerfile new file mode 100644 index 00000000..b697d2c1 --- /dev/null +++ b/models/nnunet_prostate/dockerfiles/Dockerfile @@ -0,0 +1,39 @@ +# Specify the base image for the environment +FROM mhubai/base:latest + +# Authors of the image +LABEL authors="lnuernberg@bwh.harvard.edu" + +# FIXME: set this environment variable as a shortcut to avoid nnunet crashing the build +# by pulling sklearn instead of scikit-learn +# N.B. this is a known issue: +# https://github.com/MIC-DKFZ/nnUNet/issues/1281 +# https://github.com/MIC-DKFZ/nnUNet/pull/1209 +ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True + +# isntall additional system dependencies +RUN apt update && apt install -y dcm2niix + +# install additional python dependencies +RUN pip3 install --no-cache-dir \ + nnunet \ + nibabel + +# pull weights for platipy's nnU-Net so that the user doesn't need to every time a container is run +ENV WEIGHTS_DIR="/root/.nnunet/nnUNet_models/nnUNet/" +ENV WEIGHTS_URL="https://www.dropbox.com/s/igpwt45v6hlquxp/Task005_Prostate.zip" +ENV WEIGHTS_FN="Task005_Prostate.zip" + +RUN wget --directory-prefix ${WEIGHTS_DIR} ${WEIGHTS_URL} +RUN unzip ${WEIGHTS_DIR}${WEIGHTS_FN} -d ${WEIGHTS_DIR} +RUN rm ${WEIGHTS_DIR}${WEIGHTS_FN} + +# Import the MHub model definiton +#ARG MHUB_MODELS_REPO +#RUN buildutils/import_mhub_model.sh casust ${MHUB_MODELS_REPO} + +# specify nnunet specific environment variables +ENV WEIGHTS_FOLDER=$WEIGHTS_DIR + +# Default run script +CMD ["python3", "/app/models/nnunet_prostate/scripts/run.py"] \ No newline at end of file diff --git a/models/nnunet_prostate/utils/ProstateResampler.py b/models/nnunet_prostate/utils/ProstateResampler.py new file mode 100644 index 00000000..35eba6a4 --- /dev/null +++ b/models/nnunet_prostate/utils/ProstateResampler.py @@ -0,0 +1,36 @@ +import os +import pyplastimatch as pypla + +from mhubio.core import Module, Instance, DataType, InstanceData, FileType, IO + +# for specific use case, resample ADC to match T2 (T2 is his 'sesired_grid' property value) +# TODO: add reference to colab notebook? +class ProstateResampler(Module): + + @IO.Instance() + @IO.Input('in_data', 'nifti:part=ADC', the="ADC image") + @IO.Input('fixed_data', 'nifti:part=T2', the="T2 image") + @IO.Output('out_data', 'resampled.nii.gz', 'nifti:part=ADC:resampled_to=T2', data='in_data', the="ADC image resampled to T2") + def task(self, instance: Instance, in_data: InstanceData, fixed_data: InstanceData, out_data: InstanceData): + + # log data + log_data = InstanceData('_pypla.log', DataType(FileType.LOG, in_data.type.meta + { + "log-origin": "plastimatch", + "log-task": "resampling", + "log-caller": "Resampler", + "log-instance": str(instance) + }), data=in_data, auto_increment=True) + + # process + resample_args = { + 'input': in_data.abspath, + 'output': out_data.abspath, + 'fixed': fixed_data.abspath, + } + + # TODO add log file + pypla.resample( + verbose=self.config.verbose, + path_to_log_file=log_data.abspath, + **resample_args # type: ignore + ) \ No newline at end of file diff --git a/models/nnunet_prostate/utils/ProstateRunner.py b/models/nnunet_prostate/utils/ProstateRunner.py new file mode 100644 index 00000000..5757425b --- /dev/null +++ b/models/nnunet_prostate/utils/ProstateRunner.py @@ -0,0 +1,40 @@ +import os, shutil +from mhubio.core import Module, Instance, InstanceData, IO + +class ProstateRunner(Module): + + @IO.Instance() + @IO.Input('T2', 'nifti:part=T2', the="T2 image") + @IO.Input('ADC', 'nifti:part=ADC:resampled_to=T2', the="ADC image resampled to T2") + @IO.Output('P', 'VOLUME_001.nii.gz', 'nifti:mod=seg:model=nnunet_t005_prostate', bundle='nnunet-out', the="Prostate segmentation") + def task(self, instance: Instance, T2: InstanceData, ADC: InstanceData, P: InstanceData) -> None: + + # copy input files to align with the nnunet input folder and file name format + # T2: 0000 + # ADC: 0001 + inp_dir = self.config.data.requestTempDir(label="nnunet-model-inp") + inp_file_T2 = f'VOLUME_001_0000.nii.gz' + inp_file_ADC = f'VOLUME_001_0001.nii.gz' + shutil.copyfile(T2.abspath, os.path.join(inp_dir, inp_file_T2)) + shutil.copyfile(ADC.abspath, os.path.join(inp_dir, inp_file_ADC)) + + # define output folder (temp dir) and also override environment variable for nnunet + assert P.bundle is not None, f"Output bundle is required: {str(P)}" + os.environ['RESULTS_FOLDER'] = P.bundle.abspath + + # symlink nnunet input folder to the input data with python + # create symlink in python + # NOTE: this is a workaround for the nnunet bash script that expects the input data to be in a specific folder + # structure. This is not the case for the mhub data structure. So we create a symlink to the input data + # in the nnunet input folder structure. + os.symlink(os.environ['WEIGHTS_FOLDER'], os.path.join(P.bundle.abspath, 'nnUNet')) + + # construct nnunet inference command + bash_command = ["nnUNet_predict"] + bash_command += ["--input_folder", str(inp_dir)] + bash_command += ["--output_folder", str(P.bundle.abspath)] + bash_command += ["--task_name", 'Task005_Prostate'] + bash_command += ["--model", '3d_fullres'] + + # run command + self.subprocess(bash_command, text=True) \ No newline at end of file From 34d22dc6c611a7d345f1ff621bbe751ad8142ffd Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Mon, 20 Nov 2023 11:43:22 +0100 Subject: [PATCH 024/125] updating Dockerfile with model import --- models/nnunet_prostate/dockerfiles/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/models/nnunet_prostate/dockerfiles/Dockerfile b/models/nnunet_prostate/dockerfiles/Dockerfile index b697d2c1..87547c1c 100644 --- a/models/nnunet_prostate/dockerfiles/Dockerfile +++ b/models/nnunet_prostate/dockerfiles/Dockerfile @@ -29,8 +29,8 @@ RUN unzip ${WEIGHTS_DIR}${WEIGHTS_FN} -d ${WEIGHTS_DIR} RUN rm ${WEIGHTS_DIR}${WEIGHTS_FN} # Import the MHub model definiton -#ARG MHUB_MODELS_REPO -#RUN buildutils/import_mhub_model.sh casust ${MHUB_MODELS_REPO} +ARG MHUB_MODELS_REPO +RUN buildutils/import_mhub_model.sh nnunet_prostate ${MHUB_MODELS_REPO} # specify nnunet specific environment variables ENV WEIGHTS_FOLDER=$WEIGHTS_DIR From ebe8bd9461a3cb41b8af1bb59a7122306aa4e798 Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Wed, 22 Nov 2023 13:29:18 +0100 Subject: [PATCH 025/125] rename model --- .../config/default.yml | 0 .../dockerfiles/Dockerfile | 5 +++-- .../utils/ProstateResampler.py | 0 .../utils/ProstateRunner.py | 0 4 files changed, 3 insertions(+), 2 deletions(-) rename models/{nnunet_prostate => nnunet_prostate_zonal_task05}/config/default.yml (100%) rename models/{nnunet_prostate => nnunet_prostate_zonal_task05}/dockerfiles/Dockerfile (86%) rename models/{nnunet_prostate => nnunet_prostate_zonal_task05}/utils/ProstateResampler.py (100%) rename models/{nnunet_prostate => nnunet_prostate_zonal_task05}/utils/ProstateRunner.py (100%) diff --git a/models/nnunet_prostate/config/default.yml b/models/nnunet_prostate_zonal_task05/config/default.yml similarity index 100% rename from models/nnunet_prostate/config/default.yml rename to models/nnunet_prostate_zonal_task05/config/default.yml diff --git a/models/nnunet_prostate/dockerfiles/Dockerfile b/models/nnunet_prostate_zonal_task05/dockerfiles/Dockerfile similarity index 86% rename from models/nnunet_prostate/dockerfiles/Dockerfile rename to models/nnunet_prostate_zonal_task05/dockerfiles/Dockerfile index 87547c1c..e2900427 100644 --- a/models/nnunet_prostate/dockerfiles/Dockerfile +++ b/models/nnunet_prostate_zonal_task05/dockerfiles/Dockerfile @@ -30,10 +30,11 @@ RUN rm ${WEIGHTS_DIR}${WEIGHTS_FN} # Import the MHub model definiton ARG MHUB_MODELS_REPO -RUN buildutils/import_mhub_model.sh nnunet_prostate ${MHUB_MODELS_REPO} +RUN buildutils/import_mhub_model.sh nnunet_prostate_zonal_task05 ${MHUB_MODELS_REPO} # specify nnunet specific environment variables ENV WEIGHTS_FOLDER=$WEIGHTS_DIR # Default run script -CMD ["python3", "/app/models/nnunet_prostate/scripts/run.py"] \ No newline at end of file +ENTRYPOINT ["mhub.run"] +CMD ["--config", "/app/models/nnunet_prostate_zonal_task05/config/default.yml"] \ No newline at end of file diff --git a/models/nnunet_prostate/utils/ProstateResampler.py b/models/nnunet_prostate_zonal_task05/utils/ProstateResampler.py similarity index 100% rename from models/nnunet_prostate/utils/ProstateResampler.py rename to models/nnunet_prostate_zonal_task05/utils/ProstateResampler.py diff --git a/models/nnunet_prostate/utils/ProstateRunner.py b/models/nnunet_prostate_zonal_task05/utils/ProstateRunner.py similarity index 100% rename from models/nnunet_prostate/utils/ProstateRunner.py rename to models/nnunet_prostate_zonal_task05/utils/ProstateRunner.py From b1a472ab41d2b29f8a47474ec6adb2d5b6378ba9 Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Wed, 22 Nov 2023 14:02:01 +0100 Subject: [PATCH 026/125] implement dicomseg conversion step --- models/nnunet_prostate_zonal_task05/config/default.yml | 10 ++++++++++ .../utils/ProstateRunner.py | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/models/nnunet_prostate_zonal_task05/config/default.yml b/models/nnunet_prostate_zonal_task05/config/default.yml index 3feb4fe9..8c82102c 100644 --- a/models/nnunet_prostate_zonal_task05/config/default.yml +++ b/models/nnunet_prostate_zonal_task05/config/default.yml @@ -8,6 +8,7 @@ execute: - NiftiConverter - ProstateResampler - ProstateRunner +- DsegConverter - DataOrganizer modules: @@ -24,7 +25,16 @@ modules: allow_multi_input: true overwrite_existing_file: true + DsegConverter: + model_name: nnUNet Zonal Prostate (Task05) + target_dicom: dicom:part=T2 + source_segs: nifti:mod=seg:roi=* + body_part_examined: PROSTATE + skip_empty_slices: True + segment_id_meta_key: roi + DataOrganizer: targets: + - DICOMSEG:mod=seg-->[i:patientID]/[i:studyID]/nnunet_prostate_zonal_task05.seg.dcm - NIFTI:mod=seg-->[i:patientID]/[i:studyID]/results.nii.gz - LOG-->[i:patientID]/[i:studyID]/logs/[d:part]/[d:log-task]_[path] \ No newline at end of file diff --git a/models/nnunet_prostate_zonal_task05/utils/ProstateRunner.py b/models/nnunet_prostate_zonal_task05/utils/ProstateRunner.py index 5757425b..f8e65c3a 100644 --- a/models/nnunet_prostate_zonal_task05/utils/ProstateRunner.py +++ b/models/nnunet_prostate_zonal_task05/utils/ProstateRunner.py @@ -6,7 +6,7 @@ class ProstateRunner(Module): @IO.Instance() @IO.Input('T2', 'nifti:part=T2', the="T2 image") @IO.Input('ADC', 'nifti:part=ADC:resampled_to=T2', the="ADC image resampled to T2") - @IO.Output('P', 'VOLUME_001.nii.gz', 'nifti:mod=seg:model=nnunet_t005_prostate', bundle='nnunet-out', the="Prostate segmentation") + @IO.Output('P', 'VOLUME_001.nii.gz', 'nifti:mod=seg:model=nnunet_t005_prostate:roi=PROSTATE_PERIPHERAL_ZONE,PROSTATE_TRANSITION_ZONE', bundle='nnunet-out', the="Prostate segmentation") def task(self, instance: Instance, T2: InstanceData, ADC: InstanceData, P: InstanceData) -> None: # copy input files to align with the nnunet input folder and file name format From 194b7cd6906d023212f463b3ca9d2f8119e090a6 Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Wed, 22 Nov 2023 18:00:44 +0100 Subject: [PATCH 027/125] Adding model meta data Co-authored-by: Cosmin Ciausu --- models/nnunet_prostate_zonal_task05/meta.json | 136 ++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 models/nnunet_prostate_zonal_task05/meta.json diff --git a/models/nnunet_prostate_zonal_task05/meta.json b/models/nnunet_prostate_zonal_task05/meta.json new file mode 100644 index 00000000..efce5301 --- /dev/null +++ b/models/nnunet_prostate_zonal_task05/meta.json @@ -0,0 +1,136 @@ +{ + "id": "f2eb536b-448a-4e9a-8981-3efc51301f62", + "name": "nnunet_prostate_zonal_task05", + "title": "nnU-Net (Prostate transitional zone and peripheral zone segmentation)", + "summary": { + "description": "nnU-Net's zonal prostate segmentation model is a multi-modality input AI-based pipeline for the automated segmentation of the peripheral and transition zone of the prostate on MRI scans.", + "inputs": [ + { + "label": "T2 input image", + "description": "The T2 axial sequence being one of the two input image", + "format": "DICOM", + "modality": "MR", + "bodypartexamined": "Prostate", + "slicethickness": "3.6 mm", + "non-contrast": true, + "contrast": false + }, + { + "label": "ADC Input Image", + "description": "The ADC axial sequence being one of the two input image", + "format": "DICOM", + "modality": "MR", + "bodypartexamined": "Prostate", + "slicethickness": "3.6 mm", + "non-contrast": true, + "contrast": false + } + ], + "outputs": [ + { + "type": "Segmentation", + "classes": [ + "PROSTATE_PERIPHERAL_ZONE", + "PROSTATE_TRANSITION_ZONE" + ] + } + ], + "model": { + "architecture": "U-net", + "training": "Supervised", + "cmpapproach": "2D, 3D, ensemble" + }, + "data": { + "training": { + "vol_samples": 32 + }, + "evaluation": { + "vol_samples": 16 + }, + "public": "Yes", + "external": false + } + }, + "details": { + "name": "nnU-Net Zonal prostate regions Segmentation Model", + "version": "1.0.0", + "devteam": "MIC-DKFZ (Helmholtz Imaging Applied Computer Vision Lab)", + "type": "nnU-Net (U-Net structure, optimized by data-driven heuristics)", + "date": { + "weights": "2020", + "code": "2020", + "pub": "2020" + }, + "cite": "Isensee, F., Jaeger, P. F., Kohl, S. A., Petersen, J., & Maier-Hein, K. H. (2020). nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation. Nature Methods, 1-9.", + "license": { + "code": "Apache 2.0", + "weights": "CC BY-NC 4.0" + }, + "publications": [ + { + "title": "nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation", + "uri": "https://www.nature.com/articles/s41592-020-01008-z" + } + ], + "github": "https://github.com/MIC-DKFZ/nnUNet/tree/nnunetv1", + "zenodo": "https://zenodo.org/record/4485926" + }, + "info": { + "use": { + "title": "Intended Use", + "text": "This model is intended to perform prostate regions anatomy segmentation in MR ADC and T2 scans. The slice thickness of the training data is 3.6mm. ADC and T2 input modalities are co-registered during training. To assure optimal results during inference, co-registration of ADC and T2 input sequences is recommended. No endorectal coil was present during training." + }, + "analyses": { + "title": "Quantitative Analyses", + "text": "The model's performance was assessed using the Dice Coefficient, in the context of the Medical Segmentation Decathlon challenge. The complete breakdown of the metrics can be consulted on GrandChallenge [1] and is reported in the supplementary material to the publication [2].", + "references": [ + { + "label": "Medical Segmentation Decathlon on GrandChallenge", + "uri": "https://decathlon-10.grand-challenge.org/evaluation/challenge/leaderboard" + }, + { + "label": "nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation", + "uri": "https://www.nature.com/articles/s41592-020-01008-z" + } + ] + }, + "evaluation": { + "title": "Evaluation Data", + "text": "The evaluation dataset consists of 16 validation samples coming from the same training collection.", + "tables": [], + "references": [ + { + "label": "Medical Segmentation Decathlon", + "uri": "https://www.nature.com/articles/s41467-022-30695-9" + }, + { + "label": "Medical Decathlon Prostate dataset (direct download)", + "uri": "https://drive.google.com/drive/folders/1HqEgzS8BV2c7xYNrZdEAnrHk7osJJ--2" + } + ] + }, + "training": { + "title": "Training Data", + "text": "The training dataset consists of 32 MRI cases containing the prostate, from the Medical Segmentation Decathlon. The authors report the following characteristics for the portal venous phase CT scans of the training dataset:", + "tables": [ + { + "label": "Medical Image Decathlon dataset (training)", + "entries": { + "Slice Thickness": "3.6 mm", + "In-Plane Resolution": "0.62 mm" + } + } + ], + "references": [ + { + "label": "Medical Segmentation Decathlon", + "uri": "https://www.nature.com/articles/s41467-022-30695-9" + }, + { + "label": "Medical Decathlon Prostate dataset (direct download)", + "uri": "https://drive.google.com/drive/folders/1HqEgzS8BV2c7xYNrZdEAnrHk7osJJ--2" + } + ] + } + } +} \ No newline at end of file From 16ceeb629a2e248f6b1ead6e0414b2717a061083 Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Wed, 22 Nov 2023 18:05:02 +0100 Subject: [PATCH 028/125] minor fix on metadata --- models/nnunet_prostate_zonal_task05/meta.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/models/nnunet_prostate_zonal_task05/meta.json b/models/nnunet_prostate_zonal_task05/meta.json index efce5301..08c73346 100644 --- a/models/nnunet_prostate_zonal_task05/meta.json +++ b/models/nnunet_prostate_zonal_task05/meta.json @@ -37,8 +37,8 @@ ], "model": { "architecture": "U-net", - "training": "Supervised", - "cmpapproach": "2D, 3D, ensemble" + "training": "supervised", + "cmpapproach": "3D" }, "data": { "training": { @@ -47,7 +47,7 @@ "evaluation": { "vol_samples": 16 }, - "public": "Yes", + "public": true, "external": false } }, From 810f68a5ca666241bb72466445ffcd2f82676cf1 Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Wed, 22 Nov 2023 18:06:10 +0100 Subject: [PATCH 029/125] fix Dockerfile (compliance) --- models/nnunet_prostate_zonal_task05/dockerfiles/Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/models/nnunet_prostate_zonal_task05/dockerfiles/Dockerfile b/models/nnunet_prostate_zonal_task05/dockerfiles/Dockerfile index e2900427..416f17b0 100644 --- a/models/nnunet_prostate_zonal_task05/dockerfiles/Dockerfile +++ b/models/nnunet_prostate_zonal_task05/dockerfiles/Dockerfile @@ -1,4 +1,3 @@ -# Specify the base image for the environment FROM mhubai/base:latest # Authors of the image From 483bf910ad845974d79520b2bf69dd37448f2583 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Thu, 23 Nov 2023 17:03:05 +0100 Subject: [PATCH 030/125] removed first line comment and added mhub model definition import lines --- models/gc_picai_baseline/dockerfiles/Dockerfile | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/models/gc_picai_baseline/dockerfiles/Dockerfile b/models/gc_picai_baseline/dockerfiles/Dockerfile index a1bff797..84a1c607 100644 --- a/models/gc_picai_baseline/dockerfiles/Dockerfile +++ b/models/gc_picai_baseline/dockerfiles/Dockerfile @@ -1,4 +1,3 @@ -# Specify the base image for the environment FROM mhubai/base:latest # Specify/override authors label @@ -35,13 +34,9 @@ RUN SITE_PKG=`pip3 show nnunet | grep "Location:" | awk '{print $2}'` && \ RUN sed -i "s|file_paths = list(Path(folder).glob(scan_glob_format))|return|g" /opt/algorithm/process.py && \ sed -i "s|self.cspca_detection_map_path.parent.mkdir(exist_ok=True, parents=True)||g" /opt/algorithm/process.py -# FIXME: temporary fix waiting for the latest base image update -# Clone the main branch of MHubAI/models -#RUN git stash \ -# && git fetch https://github.com/MHubAI/models.git main \ -# && git merge FETCH_HEAD \ -# && git sparse-checkout set "models/gc_picai_baseline" \ -# && git fetch https://github.com/MHubAI/models.git main +# Import the MHub model definiton +ARG MHUB_MODELS_REPO +RUN buildutils/import_mhub_model.sh gc_picai_baseline ${MHUB_MODELS_REPO} # Add lobe segmentation code base to python path ENV PYTHONPATH="/app:/opt/algorithm" From 334d5738575023698898c3e8cdffa8eee0ca656d Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Thu, 23 Nov 2023 21:42:08 +0100 Subject: [PATCH 031/125] update mhub model definition import Dockerfile --- models/gc_nnunet_pancreas/dockerfiles/Dockerfile | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/models/gc_nnunet_pancreas/dockerfiles/Dockerfile b/models/gc_nnunet_pancreas/dockerfiles/Dockerfile index 3fe9a951..9f6b56a5 100644 --- a/models/gc_nnunet_pancreas/dockerfiles/Dockerfile +++ b/models/gc_nnunet_pancreas/dockerfiles/Dockerfile @@ -31,12 +31,9 @@ RUN pip3 install --no-cache-dir -r /opt/algorithm/requirements.txt RUN SITE_PKG=`pip3 show nnunet | grep "Location:" | awk '{print $2}'` && \ mv /opt/algorithm/nnUNetTrainerV2_Loss_CE_checkpoints.py "$SITE_PKG/nnunet/training/network_training/nnUNetTrainerV2_Loss_CE_checkpoints.py" -# Clone the main branch of MHubAI/models TODO check if ok -RUN git stash \ - && git fetch https://github.com/MHubAI/models.git main \ - && git merge FETCH_HEAD \ - && git sparse-checkout set "models/gc_nnunet_pancreas" \ - && git fetch https://github.com/MHubAI/models.git main +# Import the MHub model definiton +ARG MHUB_MODELS_REPO +RUN buildutils/import_mhub_model.sh gc_nnunet_pancreas ${MHUB_MODELS_REPO} # Add algorithm files to python path ENV PYTHONPATH=/opt/algorithm:/app From 4466e56ec79da9a7dc581227ecccea4d684174f5 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Thu, 23 Nov 2023 21:45:15 +0100 Subject: [PATCH 032/125] removed first comment line in Dockerfile --- models/gc_nnunet_pancreas/dockerfiles/Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/models/gc_nnunet_pancreas/dockerfiles/Dockerfile b/models/gc_nnunet_pancreas/dockerfiles/Dockerfile index 9f6b56a5..68577762 100644 --- a/models/gc_nnunet_pancreas/dockerfiles/Dockerfile +++ b/models/gc_nnunet_pancreas/dockerfiles/Dockerfile @@ -1,4 +1,3 @@ -# Specify the base image for the environment FROM mhubai/base:latest # Specify/override authors label From ecdd16601a6f21ecfb6999daa973c0e91e7db65f Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Fri, 24 Nov 2023 22:37:34 +0100 Subject: [PATCH 033/125] added mhub model definition and removed first comment line Dockerfile --- models/gc_grt123_lung_cancer/dockerfiles/Dockerfile | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/models/gc_grt123_lung_cancer/dockerfiles/Dockerfile b/models/gc_grt123_lung_cancer/dockerfiles/Dockerfile index ea2416cc..d70f0ab3 100644 --- a/models/gc_grt123_lung_cancer/dockerfiles/Dockerfile +++ b/models/gc_grt123_lung_cancer/dockerfiles/Dockerfile @@ -1,4 +1,3 @@ -# Specify the base image for the environment FROM mhubai/base:latest # Specify/override authors label @@ -24,11 +23,9 @@ RUN git clone --branch v2.0.0 https://github.com/DIAGNijmegen/bodyct-dsb2017-grt rm -rf /gc_grt123_lung_cancer/.git/* && \ mv /gc_grt123_lung_cancer/HEAD /gc_grt123_lung_cancer/.git -# Clone MHub model (m-gc-grt123-lung-cancer branch, fixed to commit TODO) -#RUN git init \ -# && git sparse-checkout set "models/grt123_lung_cancer" \ -# && git fetch https://github.com/MHubAI/models.git m-gc-grt123-lung-cancer \ -# && git merge TODO +# Import the MHub model definition +ARG MHUB_MODELS_REPO +RUN buildutils/import_mhub_model.sh gc_grt123_lung_cancer ${MHUB_MODELS_REPO} # Add lobe segmentation code base to python path ENV PYTHONPATH="/gc_grt123_lung_cancer:/app" From 5ab7f20c6ba894fcd8a5e30cdd47c70c54edc102 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Fri, 24 Nov 2023 22:50:19 +0100 Subject: [PATCH 034/125] cleanup runner imports, add new style logging --- .../utils/LungCancerClassifierRunner.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py b/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py index eb170d67..1ae1e129 100644 --- a/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py +++ b/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py @@ -14,8 +14,6 @@ from typing import Dict import json from pathlib import Path -import numpy as np -import SimpleITK as sitk import torch @@ -45,7 +43,6 @@ def cleanup_json_report(data: Dict): class LungCancerClassifierRunner(Module): n_preprocessing_workers: int - tmp_path: str @IO.Instance() @IO.Input('in_data', 'mha:mod=ct', the='input ct scan') @@ -60,10 +57,10 @@ def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData # determine the number of GPUs we can use if torch.cuda.is_available(): - self.v("Running with a GPU") + self.log("Running with a GPU", "NOTICE") n_gpu = 1 else: - self.v("Running on the CPU, might be slow...") + self.log("Running on the CPU, might be slow...", "NOTICE") n_gpu = 0 # apply grt123 algorithm @@ -79,8 +76,8 @@ def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData data_filter=r".*.mha" ) - # store classification results - self.v(f"Writing classification results to {out_data.abspath}") + # store classification results (original json file) + self.log(f"Writing classification results to {out_data.abspath}", "NOTICE") assert len(results) > 0, "LungCancerClassifierRunner - Always expects at least one output report" results_json = results[0].to_json() cleanup_json_report(results_json) From 2d4365ad91a33be3bf86f666a1a4768d8e9dbc81 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Mon, 27 Nov 2023 10:17:52 +0100 Subject: [PATCH 035/125] added value output for overall score and added dynamic value output for all findings --- .../utils/LungCancerClassifierRunner.py | 50 ++++++++++++++++--- 1 file changed, 42 insertions(+), 8 deletions(-) diff --git a/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py b/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py index 1ae1e129..56181b72 100644 --- a/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py +++ b/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py @@ -9,7 +9,7 @@ -------------------------------------------------------- """ import torch.cuda -from mhubio.core import Instance, InstanceData, IO, Module +from mhubio.core import Instance, InstanceData, IO, Module, ValueOutput, Meta from typing import Dict import json @@ -20,6 +20,25 @@ # Import the main module for the grt123 algorithm, which must be used for running the classification import main + +@ValueOutput.Name('lncancerprob') +@ValueOutput.Meta(Meta(min=0.0, max=1.0, type="probability")) +@ValueOutput.Label('Lung Nodule cancer probability score.') +@ValueOutput.Type(float) +@ValueOutput.Description('The predicted cancer probability score for a single lung nodule detected by the algorithm') +class LNCancerProb(ValueOutput): + pass + + +@ValueOutput.Name('clcancerprob') +@ValueOutput.Meta(Meta(min=0.0, max=1.0, type="probability")) +@ValueOutput.Label('Case level cancer probability score.') +@ValueOutput.Type(float) +@ValueOutput.Description('The predicted cancer probability score for the whole case') +class CLCancerProb(ValueOutput): + pass + + # This method cleans the raw results from the grt123 algorithm output and only keeps the relevant details def cleanup_json_report(data: Dict): for key in ["trainingset1", "trainingset2"]: @@ -46,8 +65,10 @@ class LungCancerClassifierRunner(Module): @IO.Instance() @IO.Input('in_data', 'mha:mod=ct', the='input ct scan') - @IO.Output('out_data', 'grt123_lung_cancer_findings.json', 'json:model=grt123LungCancerClassification', 'in_data', the='predicted nodules and lung cancer findings of the lung lobe') - def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData) -> None: + @IO.Output('out_data', 'grt123_lung_cancer_findings.json', 'json:model=grt123LungCancerClassification', data='in_data', the='predicted nodules and lung cancer findings of the lung lobe') + @IO.OutputData('clcancerprob', CLCancerProb, the='Case level probability score') + @IO.OutputDatas('lncancerprobs', LNCancerProb, the='Individual lung nodule probability scores') + def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData, clcancerprob: CLCancerProb, lncancerprobs: LNCancerProb) -> None: # create temporary directories for the preprocessed data and the cropped bounding boxes tmp_path = Path(self.config.data.requestTempDir('grt123')) tmp_output_bbox_dir = tmp_path / "bbox" @@ -76,10 +97,23 @@ def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData data_filter=r".*.mha" ) - # store classification results (original json file) - self.log(f"Writing classification results to {out_data.abspath}", "NOTICE") + # retrieve classification results assert len(results) > 0, "LungCancerClassifierRunner - Always expects at least one output report" - results_json = results[0].to_json() - cleanup_json_report(results_json) + results_dict = results[0].to_json() + cleanup_json_report(results_dict) + + # export to JSON (original json file) + self.log(f"Writing classification results to {out_data.abspath}", "NOTICE") with open(out_data.abspath, "w") as f: - json.dump(results_json, f, indent=4) + json.dump(results_dict, f, indent=4) + + # set output value for case level cancer probability + clcancerprob.value = results_dict["cancerinfo"]["casecancerprobability"] + + # set output values for nodule level cancer probabilities + for finding in results_dict["findings"]: + nodule_cancer_prob = LNCancerProb() + nodule_cancer_prob.meta = Meta(id=finding['id'], x=finding['x'], y=finding['y'], z=finding['z'], ) + nodule_cancer_prob.description += f" (for nodule {finding['id']} at location ({finding['x']}, {finding['y']}, {finding['z']}))" + nodule_cancer_prob.value = finding["cancerprobability"] + lncancerprobs.add(nodule_cancer_prob) From 0e449b814a7cc14d03d44fa0504438d2411dd11e Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Tue, 28 Nov 2023 11:24:37 +0100 Subject: [PATCH 036/125] update algorithm version to release gclobe165 with fix for #69 --- models/gc_lunglobes/dockerfiles/Dockerfile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/models/gc_lunglobes/dockerfiles/Dockerfile b/models/gc_lunglobes/dockerfiles/Dockerfile index 1343e6bb..0e87d31a 100644 --- a/models/gc_lunglobes/dockerfiles/Dockerfile +++ b/models/gc_lunglobes/dockerfiles/Dockerfile @@ -19,9 +19,8 @@ RUN pip3 install --no-cache-dir --force-reinstall SimpleITK==1.2.4 ARG MHUB_MODELS_REPO RUN buildutils/import_mhub_model.sh gc_lunglobes ${MHUB_MODELS_REPO} -# Install Xie's pulmonary lobe segmentation algorithm and model weights (main branch commit at 2023/09/13) -RUN git clone https://github.com/DIAGNijmegen/bodyct-pulmonary-lobe-segmentation.git src && \ - cd src && git reset --hard 5a64b70504d46c042c30851a69cec370f1202e67 && cd /app && \ +# Install Xie's pulmonary lobe segmentation algorithm and model weights (release gclobe165 v1.6.5) +RUN git clone --depth 1 --branch v1.6.5 https://github.com/DIAGNijmegen/bodyct-pulmonary-lobe-segmentation.git src && \ sed -i 's/from models import CTSUNet/from src.models import CTSUNet/g' src/test.py # Default run script From d329d29a2d181b9c33a7740fe17c501b2074a998 Mon Sep 17 00:00:00 2001 From: Rakkalos <72577931+Rakkalos@users.noreply.github.com> Date: Wed, 29 Nov 2023 16:07:49 -0500 Subject: [PATCH 037/125] added task024 promise nnunet model --- .../nnunet_prostate_task24/config/default.yml | 41 ++++++ .../dockerfiles/Dockerfile | 32 +++++ models/nnunet_prostate_task24/meta.json | 125 ++++++++++++++++++ 3 files changed, 198 insertions(+) create mode 100644 models/nnunet_prostate_task24/config/default.yml create mode 100644 models/nnunet_prostate_task24/dockerfiles/Dockerfile create mode 100644 models/nnunet_prostate_task24/meta.json diff --git a/models/nnunet_prostate_task24/config/default.yml b/models/nnunet_prostate_task24/config/default.yml new file mode 100644 index 00000000..f12c1f44 --- /dev/null +++ b/models/nnunet_prostate_task24/config/default.yml @@ -0,0 +1,41 @@ +general: + data_base_dir: /app/data + version: 1.0 + description: default configuration for nnUNet MR Prostate segmentation (dicom to dicom) + +execute: +- DicomImporter +- NiftiConverter +- NNUnetRunner +- DsegConverter +- DataOrganizer + +modules: + DicomImporter: + source_dir: input_data + import_dir: sorted_data + sort_data: true + meta: + mod: 'mr' + + NiftiConverter: + in_datas: dicom:mod=mr + engine: dcm2niix + + NNUnetRunner: + in_data: nifti:mod=mr + nnunet_task: Task024_Promise + nnunet_model: 3d_fullres + roi: PROSTATE + + BamfProcessorRunner: + + DsegConverter: + source_segs: nifti:mod=seg + target_dicom: dicom:mod=mr + model_name: 'nnUNet MR Prostate' + skip_empty_slices: True + + DataOrganizer: + targets: + - dicomseg-->[i:sid]/nnunet_mr_prostate.seg.dcm \ No newline at end of file diff --git a/models/nnunet_prostate_task24/dockerfiles/Dockerfile b/models/nnunet_prostate_task24/dockerfiles/Dockerfile new file mode 100644 index 00000000..5ad3ab81 --- /dev/null +++ b/models/nnunet_prostate_task24/dockerfiles/Dockerfile @@ -0,0 +1,32 @@ +FROM mhubai/base:latest + +# FIXME: set this environment variable as a shortcut to avoid nnunet crashing the build +# by pulling sklearn instead of scikit-learn +# N.B. this is a known issue: +# https://github.com/MIC-DKFZ/nnUNet/issues/1281 +# https://github.com/MIC-DKFZ/nnUNet/pull/1209 +ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True + +# Install nnunet and platipy +RUN pip3 install --no-cache-dir \ + nnunet + +# Clone the main branch of MHubAI/models +ARG MHUB_MODELS_REPO +RUN buildutils/import_mhub_model.sh nnunet_prostate_task24 ${MHUB_MODELS_REPO} + +# Pull weights into the container +ENV WEIGHTS_DIR=/root/.nnunet/nnUNet_models/nnUNet/ +RUN mkdir -p $WEIGHTS_DIR +ENV WEIGHTS_FN=Task024_Promise.zip +ENV WEIGHTS_URL=https://zenodo.org/records/4003545/files/$WEIGHTS_FN +RUN wget --directory-prefix ${WEIGHTS_DIR} ${WEIGHTS_URL} +RUN unzip ${WEIGHTS_DIR}${WEIGHTS_FN} -d ${WEIGHTS_DIR} +RUN rm ${WEIGHTS_DIR}${WEIGHTS_FN} + +# specify nnunet specific environment variables +ENV WEIGHTS_FOLDER=$WEIGHTS_DIR + +# Default run script +ENTRYPOINT ["mhub.run"] +CMD ["--config", "/app/models/nnunet_prostate_task24/config/default.yml"] diff --git a/models/nnunet_prostate_task24/meta.json b/models/nnunet_prostate_task24/meta.json new file mode 100644 index 00000000..ee24e409 --- /dev/null +++ b/models/nnunet_prostate_task24/meta.json @@ -0,0 +1,125 @@ +{ + "id": "...", + "name": "nnunet_prostate_task24_promise", + "title": "nnU-Net (Whole prostate segmentation)", + "summary": { + "description": "nnU-Net's whole prostate segmentation model is a single-modality (i.e. T2) input AI-based pipeline for the automated segmentation of the whole prostate on MRI scans.", + "inputs": [ + { + "label": "T2 input image", + "description": "The T2 axial-acquired sequence being the input image", + "format": "DICOM", + "modality": "MR", + "bodypartexamined": "Prostate", + "slicethickness": "3.6 mm", + "non-contrast": true, + "contrast": false + } + ], + "outputs": [ + { + "type": "Segmentation", + "classes": [ + "PROSTATE" + ] + } + ], + "model": { + "architecture": "U-net", + "training": "supervised", + "cmpapproach": "3D" + }, + "data": { + "training": { + "vol_samples": 50 + }, + "test": { + "vol_samples": 30 + }, + "public": true, + "external": false + } + }, + "details": { + "name": "nnU-Net whole prostate segmentation model", + "version": "1.0.0", + "devteam": "MIC-DKFZ (Helmholtz Imaging Applied Computer Vision Lab)", + "type": "nnU-Net (U-Net structure, optimized by data-driven heuristics)", + "date": { + "weights": "2020", + "code": "2020", + "pub": "2020" + }, + "cite": "Isensee, F., Jaeger, P. F., Kohl, S. A., Petersen, J., & Maier-Hein, K. H. (2020). nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation. Nature Methods, 1-9.", + "license": { + "code": "Apache 2.0", + "weights": "CC BY-NC 4.0" + }, + "publications": [ + { + "title": "nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation", + "uri": "https://www.nature.com/articles/s41592-020-01008-z" + } + ], + "github": "https://github.com/MIC-DKFZ/nnUNet/tree/nnunetv1", + "zenodo": "https://zenodo.org/record/4485926" + }, + "info": { + "use": { + "title": "Intended Use", + "text": "This model is intended to perform prostate anatomy segmentation in MR T2 scans. The slice thickness of the training data is 2.2~4mm. Endorectal coil was present during training." + }, + "analyses": { + "title": "Quantitative Analyses", + "text": "The model's performance was assessed using the Dice Coefficient, in the context of the Promise12 challenge. The complete breakdown of the metrics can be consulted on GrandChallenge [1] and is reported in the supplementary material to the publication [2].", + "references": [ + { + "label": "Evaluation of prostate segmentation algorithms for MRI: The PROMISE12 challenge", + "uri": "https://doi.org/10.1016/j.media.2013.12.002" + }, + { + "label": "nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation", + "uri": "https://www.nature.com/articles/s41592-020-01008-z" + } + ] + }, + "evaluation": { + "title": "Testing Data", + "text": "The test dataset consists of 30 test samples coming from the same training collection.", + "tables": [], + "references": [ + { + "label": "Evaluation of prostate segmentation algorithms for MRI: The PROMISE12 challenge", + "uri": "https://doi.org/10.1016/j.media.2013.12.002" + }, + { + "label": "PROMISE12 dataset (direct download)", + "uri": "https://zenodo.org/records/8026660" + } + ] + }, + "training": { + "title": "Training Data", + "text": "The training dataset consists of 50 MRI cases containing the prostate, from the Promise12 challenge. The authors report the following characteristics for the training dataset:", + "tables": [ + { + "label": "Medical Image Decathlon dataset (training)", + "entries": { + "Slice Thickness": "2.2~4 mm", + "In-Plane Resolution": "0.27 mm" + } + } + ], + "references": [ + { + "label": "Evaluation of prostate segmentation algorithms for MRI: The PROMISE12 challenge", + "uri": "https://doi.org/10.1016/j.media.2013.12.002" + }, + { + "label": "PROMISE12 dataset (direct download)", + "uri": "https://zenodo.org/records/8026660" + } + ] + } + } +} \ No newline at end of file From f2221028d26645584a4d662a112762e41d6d5c89 Mon Sep 17 00:00:00 2001 From: Rakkalos <72577931+Rakkalos@users.noreply.github.com> Date: Wed, 29 Nov 2023 16:22:47 -0500 Subject: [PATCH 038/125] changed test to evaluation in meta.json --- models/nnunet_prostate_task24/meta.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/models/nnunet_prostate_task24/meta.json b/models/nnunet_prostate_task24/meta.json index ee24e409..50c4e568 100644 --- a/models/nnunet_prostate_task24/meta.json +++ b/models/nnunet_prostate_task24/meta.json @@ -33,7 +33,7 @@ "training": { "vol_samples": 50 }, - "test": { + "evaluation": { "vol_samples": 30 }, "public": true, @@ -84,8 +84,8 @@ ] }, "evaluation": { - "title": "Testing Data", - "text": "The test dataset consists of 30 test samples coming from the same training collection.", + "title": "Evaluation Data", + "text": "The evaluation dataset consists of 30 test samples coming from the Promise12 challenge.", "tables": [], "references": [ { From 4abafd57479aa8d5b562355e2af83f8da8c9f081 Mon Sep 17 00:00:00 2001 From: Miriam Groeneveld Date: Thu, 30 Nov 2023 14:54:05 +0100 Subject: [PATCH 039/125] PR comments on mata.json --- models/gc_picai_baseline/config/default.yml | 4 +- .../gc_picai_baseline/config/mha-pipeline.yml | 4 +- models/gc_picai_baseline/meta.json | 83 ++++++++++++++----- .../utils/PicaiBaselineRunner.py | 35 ++++---- 4 files changed, 83 insertions(+), 43 deletions(-) diff --git a/models/gc_picai_baseline/config/default.yml b/models/gc_picai_baseline/config/default.yml index fe257f9d..c86cac07 100644 --- a/models/gc_picai_baseline/config/default.yml +++ b/models/gc_picai_baseline/config/default.yml @@ -24,8 +24,8 @@ modules: ReportExporter: format: compact includes: - - data: prostate_cancer_probability - label: prostate_cancer_probability + - data: prostate_cancer_likelihood + label: prostate_cancer_likelihood value: value DataOrganizer: diff --git a/models/gc_picai_baseline/config/mha-pipeline.yml b/models/gc_picai_baseline/config/mha-pipeline.yml index f70fac4b..f20d5abc 100644 --- a/models/gc_picai_baseline/config/mha-pipeline.yml +++ b/models/gc_picai_baseline/config/mha-pipeline.yml @@ -21,8 +21,8 @@ modules: ReportExporter: format: compact includes: - - data: prostate_cancer_probability - label: prostate_cancer_probability + - data: prostate_cancer_likelihood + label: prostate_cancer_likelihood value: value DataOrganizer: diff --git a/models/gc_picai_baseline/meta.json b/models/gc_picai_baseline/meta.json index 59aab4d2..c83ce46d 100644 --- a/models/gc_picai_baseline/meta.json +++ b/models/gc_picai_baseline/meta.json @@ -3,11 +3,31 @@ "name": "picai_baseline", "title": "PI-CAI challenge baseline", "summary": { - "description": "The PI-CAI challenge is to validate modern AI algorithms at clinically significant prostate cancer (csPCa) detection and diagnosis. This model algorithm provides the baseline for the challenge.", + "description": "This algorithm predicts a detection map for the likelihood of clinically significant prostate cancer (csPCa) using biparametric MRI (bpMRI). The algorithm ensembles 5-fold cross-validation models that were trained on the PI-CAI: Public Training and Development Dataset v2.0. The detection map is at the same spatial resolution and physical dimensions as the input axial T2-weighted image.", "inputs": [ { - "label": "Prostate biparametric MRI", - "description": "Prostate biparametric MRI exam", + "label": "Transverse T2-weighted Prostate biparametric MRI", + "description": " Transverse T2-weighted Prostate biparametric MRI exam", + "format": "DICOM", + "modality": "MR", + "bodypartexamined": "Prostate", + "slicethickness": "", + "non-contrast": false, + "contrast": false + }, + { + "label": "High b-value diffusion-weighted maps", + "description": "High b-value diffusion-weighted maps, with b-value of 1400 or 2000, either acquired or vendor-calculated.", + "format": "DICOM", + "modality": "MR", + "bodypartexamined": "Prostate", + "slicethickness": "", + "non-contrast": false, + "contrast": false + }, + { + "label": "ADC map", + "description": "ADC map", "format": "DICOM", "modality": "MR", "bodypartexamined": "Prostate", @@ -19,14 +39,14 @@ "outputs": [ { "type": "Prediction", - "valueType": "Probability", - "label": "Prostate cancer probability", + "valueType": "Likelihood", + "label": "Prostate cancer likelihood", "description": "Case-level likelihood of harboring clinically significant prostate cancer, in range [0,1]", "classes": [] }, { "type": "Prediction", - "valueType": "Probability map", + "valueType": "Likelihood map", "label": "Transverse cancer detection map", "description": "Detection map of clinically significant prostate cancer lesions in 3D, where each voxel represents a floating point in range [0,1]", "classes": [] @@ -34,15 +54,15 @@ ], "model": { "architecture": "3d fullres nnUNet", - "training": "supervised", + "training": "semi-supervised", "cmpapproach": "3D" }, "data": { "training": { - "vol_samples": 1200 + "vol_samples": 1500 }, "evaluation": { - "vol_samples": 300 + "vol_samples": 1000 }, "public": false, "external": false @@ -61,9 +81,18 @@ "cite": "", "license": { "code": "Apache 2.0", - "weights": "Apache 2.0" + "weights": "CC-BY-NC-4.0" }, - "publications": [], + "publications": [ + { + "uri": "https://doi.org/10.5281/zenodo.6667655", + "title": "Artificial Intelligence and Radiologists at Prostate Cancer Detection in MRI: The PI-CAI Challenge (Study Protocol)" + }, + { + "uri": "https://pubs.rsna.org/doi/10.1148/ryai.230031", + "title": "Semisupervised Learning with Report-guided Pseudo Labels for Deep Learning–based Prostate Cancer Detection Using Biparametric MRI" + } + ], "github": "https://github.com/DIAGNijmegen/picai_nnunet_semi_supervised_gc_algorithm", "zenodo": "", "colab": "", @@ -72,7 +101,7 @@ "info": { "use": { "title": "Intended use", - "text": "Prediction of the likelihood of harboring clinically significant prostate cancer (csPCa) in prostate biparametric MRI exams.", + "text": "This algorithm is a deep learning-based detection/diagnosis model, which ensembles 5 independent nnU-Net models (5-fold cross-validation). To predict the likelihood of harboring clinically significant prostate cancer (csPCa), the transversal T2-weighted, apparent diffusion coefficient (ADC) and high b-value diffusion maps are required. The input sequences should be co-registered or aligned reasonably well and the prostate gland should be localized within a volume of 460 cm³ from the centre coordinate. To train these models, a total of 1500 prostate biparametric MRI (bpMRI) scans paired with human-annotated or AI-derived ISUP ≥ 2 delineations were used.", "references": [ { "label": "PI-CAI baseline algorithm on grand-challenge", @@ -83,18 +112,30 @@ }, "analyses": { "title": "Evaluation", - "text": "Patient-level diagnosis performance is evaluated using the Area Under Receiver Operating Characteristic (AUROC) metric. Lesion-level detection performance is evaluated using the Average Precision (AP) metric. Overall score used to rank each AI algorithm is the average of both task-specific metrics: Overall Ranking Score = (AP + AUROC) / 2", + "text": "Patient-level diagnosis performance is evaluated using the Area Under Receiver Operating Characteristic (AUROC) metric. Lesion-level detection performance is evaluated using the Average Precision (AP) metric.", "references": [ { "label": "PI-CAI AI challenge details", "uri": "https://pi-cai.grand-challenge.org/AI/" + }, + { + "label": "PI-CAI baseline algorithm evaluation results on grand-challenge.", + "uri": "https://pi-cai.grand-challenge.org/evaluation/fe187cdb-cb61-4cbb-ab63-2de483a52d60/" } ], - "tables": [] + "tables": [ + { + "label": "Evaluation results on the PI-CAI testing cohort of 1000 cases.", + "entries": { + "AUROC": "0.865", + "AP": "0.576" + } + } + ] }, "evaluation": { "title": "Evaluation data", - "text": "The test sets are two private cohorts of 100 and 1000 biparametric MRI exams respectively. The first was used to tune the algorithms in a public leaderboard, the second was used to determine the top 5 AI algorithms.", + "text": "The PI-CAI Hidden Testing Cohort (1000 cases) includes internal testing data (unseen cases from seen centers) and external testing data (unseen cases from an unseen center).", "references": [ { "label": "PI-CAI data section", @@ -105,19 +146,19 @@ }, "training": { "title": "Training data", - "text": "For the PI-CAI a publicly available training datasets of 1500 biparametric MRI exams including 328 cases from the ProstateX challenge were made available.", + "text": "The publicly available PI-CAI training and development dataset of 1500 biparametric MRI exams was used for training [1]. AI-derived annotations were created for cases without manual annotations [2]. This model was trained using a custom preprocessing step followed by the standard nnU-Net pipeline. The default nnU-Net loss-function was changed to Cross-Entropy + Focal loss. [3]", "references": [ { - "label": "PI-CAI publicly available training data", + "label": "PI-CAI publicly available training and development dataset", "uri": "https://zenodo.org/record/6624726" }, { - "label": "PI-CAI publicly available training data annotations", - "uri": "https://github.com/DIAGNijmegen/picai_labels" + "label": "Method to obtain AI-derived annotations", + "uri": "https://fastmri.eu/research/bosma22a.html" }, { - "label": "ProstateX challenge", - "uri": "https://prostatex.grand-challenge.org/" + "label": "Detailed description of training method", + "uri": "https://github.com/DIAGNijmegen/picai_baseline/blob/main/nnunet_baseline.md" } ], "tables": [] diff --git a/models/gc_picai_baseline/utils/PicaiBaselineRunner.py b/models/gc_picai_baseline/utils/PicaiBaselineRunner.py index d4a122f2..4f1d05ca 100644 --- a/models/gc_picai_baseline/utils/PicaiBaselineRunner.py +++ b/models/gc_picai_baseline/utils/PicaiBaselineRunner.py @@ -18,12 +18,11 @@ from process import csPCaAlgorithm as PicaiClassifier -@ValueOutput.Name('prostate_cancer_probability') -@ValueOutput.Meta(Meta(key="value")) -@ValueOutput.Label('ProstateCancerProbability') +@ValueOutput.Name('prostate_cancer_likelihood') +@ValueOutput.Label('ProstateCancerLikelihood') @ValueOutput.Type(float) -@ValueOutput.Description('Probability of case-level prostate cancer.') -class ProstateCancerProbability(ValueOutput): +@ValueOutput.Description('Likelihood of case-level prostate cancer.') +class ProstateCancerLikelihood(ValueOutput): pass @@ -33,10 +32,10 @@ class PicaiBaselineRunner(Module): @IO.Input('in_data_t2', 'mha:mod=mr:type=t2w', the='input T2 weighted prostate MR image') @IO.Input('in_data_adc', 'mha:mod=mr:type=adc', the='input ADC prostate MR image') @IO.Input('in_data_hbv', 'mha:mod=mr:type=hbv', the='input HBV prostate MR image') - @IO.Output('cancer_probability_json', 'cspca-case-level-likelihood.json', "json", bundle='model', the='output JSON file with PICAI baseline prostate cancer probability') - @IO.Output('cancer_detection_heatmap', 'cspca_detection_map.mha', "mha:mod=hm", bundle='model', the='output heatmap indicating prostate cancer probability') - @IO.OutputData('cancer_probability', ProstateCancerProbability, the='PICAI baseline prostate cancer probability') - def task(self, instance: Instance, in_data_t2: InstanceData, in_data_adc: InstanceData, in_data_hbv: InstanceData, cancer_probability_json: InstanceData, cancer_detection_heatmap: InstanceData, cancer_probability: ProstateCancerProbability) -> None: + @IO.Output('cancer_likelihood_json', 'cspca-case-level-likelihood.json', "json", bundle='model', the='output JSON file with PICAI baseline prostate cancer likelihood') + @IO.Output('cancer_detection_heatmap', 'cspca_detection_map.mha', "mha:mod=hm", bundle='model', the='output heatmap indicating prostate cancer likelihood') + @IO.OutputData('cancer_likelihood', ProstateCancerLikelihood, the='PICAI baseline prostate cancer likelihood') + def task(self, instance: Instance, in_data_t2: InstanceData, in_data_adc: InstanceData, in_data_hbv: InstanceData, cancer_likelihood_json: InstanceData, cancer_detection_heatmap: InstanceData, cancer_likelihood: ProstateCancerLikelihood) -> None: # Initialize classifier object classifier = PicaiClassifier() @@ -49,20 +48,20 @@ def task(self, instance: Instance, in_data_t2: InstanceData, in_data_adc: Instan # Specify output files classifier.cspca_detection_map_path = Path(cancer_detection_heatmap.abspath) - classifier.case_confidence_path = Path(cancer_probability_json.abspath) + classifier.case_confidence_path = Path(cancer_likelihood_json.abspath) # Run the classifier on the input images classifier.process() - # Extract cancer probability value from cancer_probability_file - if not Path(cancer_probability_json.abspath).is_file(): - raise FileNotFoundError(f"Output file {cancer_probability_json.abspath} could not be found!") + # Extract cancer likelihood value from cancer_likelihood_file + if not Path(cancer_likelihood_json.abspath).is_file(): + raise FileNotFoundError(f"Output file {cancer_likelihood_json.abspath} could not be found!") - with open(cancer_probability_json.abspath, "r") as f: - cancer_prob = float(json.load(f)) + with open(cancer_likelihood_json.abspath, "r") as f: + cancer_lh = float(json.load(f)) - if not (isinstance(cancer_prob, (float, int)) and (0.0 <= cancer_prob <= 1.0)): - raise ValueError(f"Cancer probability value should be a probability value, found: {cancer_prob}") + if not (isinstance(cancer_lh, (float, int)) and (0.0 <= cancer_lh <= 1.0)): + raise ValueError(f"Cancer likelihood value should be between 0 and 1, found: {cancer_lh}") # Output the predicted values - cancer_probability.value = cancer_prob + cancer_likelihood.value = cancer_lh From 8d7a6aa516cf22498aacaa0e55f0792e797f7ebe Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Thu, 30 Nov 2023 16:08:13 +0100 Subject: [PATCH 040/125] Dockerfile - minor cleanup and add sklearn deprecation environment flag for nnunet==1.7.0 --- models/gc_picai_baseline/dockerfiles/Dockerfile | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/models/gc_picai_baseline/dockerfiles/Dockerfile b/models/gc_picai_baseline/dockerfiles/Dockerfile index 84a1c607..a91146aa 100644 --- a/models/gc_picai_baseline/dockerfiles/Dockerfile +++ b/models/gc_picai_baseline/dockerfiles/Dockerfile @@ -7,7 +7,9 @@ LABEL authors="sil.vandeleemput@radboudumc.nl" RUN pip3 install --no-cache-dir torch==2.0.1+cu118 -f https://download.pytorch.org/whl/torch_stable.html # Install git-lfs (required for unpacking model weights) -RUN apt update && apt install -y --no-install-recommends git-lfs && rm -rf /var/lib/apt/lists/* +RUN apt update && \ + apt install -y --no-install-recommends git-lfs && \ + rm -rf /var/lib/apt/lists/* # Install PICAI baseline algorithm and model weights # - Git clone the algorithm repository for v2.1.1 (fixed to v2.1.1 tag) @@ -17,6 +19,13 @@ RUN git clone --depth 1 --branch v2.1.1 https://github.com/DIAGNijmegen/picai_nn rm -rf /opt/algorithm/test && \ rm -rf /opt/algorithm/.git +# Set this environment variable as a shortcut to avoid nnunet==1.7.0 crashing the build +# by pulling sklearn instead of scikit-learn +# N.B. this is a known issue: +# https://github.com/MIC-DKFZ/nnUNet/issues/1281 +# https://github.com/MIC-DKFZ/nnUNet/pull/1209 +ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True + # Install additional PICAI requirements RUN pip3 install --no-cache-dir -r /opt/algorithm/requirements.txt From 3b72e0e31002762da35f6f944debab1b787c80ca Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Thu, 30 Nov 2023 16:12:34 +0100 Subject: [PATCH 041/125] meta.json - add citation to cite field --- models/gc_picai_baseline/meta.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/gc_picai_baseline/meta.json b/models/gc_picai_baseline/meta.json index c83ce46d..b15c65ac 100644 --- a/models/gc_picai_baseline/meta.json +++ b/models/gc_picai_baseline/meta.json @@ -78,7 +78,7 @@ "code": "2022-09-05", "pub": "" }, - "cite": "", + "cite": "J. S. Bosma, A. Saha, M. Hosseinzadeh, I. Slootweg, M. de Rooij, and H. Huisman, \"Semisupervised Learning with Report-guided Pseudo Labels for Deep Learning–based Prostate Cancer Detection Using Biparametric MRI\", Radiology: Artificial Intelligence, 230031, 2023. DOI: 10.1148/ryai.230031", "license": { "code": "Apache 2.0", "weights": "CC-BY-NC-4.0" From 66e88c37647a3b59d5bb9cc489ad04d85dd876e0 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Thu, 30 Nov 2023 16:21:22 +0100 Subject: [PATCH 042/125] meta.json - describe relation to the PI-CAI challenge in description --- models/gc_picai_baseline/meta.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/gc_picai_baseline/meta.json b/models/gc_picai_baseline/meta.json index b15c65ac..cfd8697d 100644 --- a/models/gc_picai_baseline/meta.json +++ b/models/gc_picai_baseline/meta.json @@ -3,7 +3,7 @@ "name": "picai_baseline", "title": "PI-CAI challenge baseline", "summary": { - "description": "This algorithm predicts a detection map for the likelihood of clinically significant prostate cancer (csPCa) using biparametric MRI (bpMRI). The algorithm ensembles 5-fold cross-validation models that were trained on the PI-CAI: Public Training and Development Dataset v2.0. The detection map is at the same spatial resolution and physical dimensions as the input axial T2-weighted image.", + "description": "This algorithm predicts a detection map for the likelihood of clinically significant prostate cancer (csPCa) using biparametric MRI (bpMRI). The algorithm ensembles 5-fold cross-validation models that were trained on the PI-CAI: Public Training and Development Dataset v2.0. The detection map is at the same spatial resolution and physical dimensions as the input axial T2-weighted image. This model algorithm was used as a baseline for the PI-CAI challenge hosted on Grand Challenge.", "inputs": [ { "label": "Transverse T2-weighted Prostate biparametric MRI", From addda619be20bff46c9e0dbd2248681f913b3653 Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Fri, 1 Dec 2023 10:54:57 +0100 Subject: [PATCH 043/125] add customizable parameter to optionally disable tta --- .../nnunet_prostate_zonal_task05/utils/ProstateRunner.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/models/nnunet_prostate_zonal_task05/utils/ProstateRunner.py b/models/nnunet_prostate_zonal_task05/utils/ProstateRunner.py index f8e65c3a..d8e5483f 100644 --- a/models/nnunet_prostate_zonal_task05/utils/ProstateRunner.py +++ b/models/nnunet_prostate_zonal_task05/utils/ProstateRunner.py @@ -1,8 +1,11 @@ import os, shutil from mhubio.core import Module, Instance, InstanceData, IO +@IO.Config("disbale_tta", bool, default=False, the="Disable test time augmentation for nnUNet.") class ProstateRunner(Module): + disable_tta: bool + @IO.Instance() @IO.Input('T2', 'nifti:part=T2', the="T2 image") @IO.Input('ADC', 'nifti:part=ADC:resampled_to=T2', the="ADC image resampled to T2") @@ -35,6 +38,10 @@ def task(self, instance: Instance, T2: InstanceData, ADC: InstanceData, P: Insta bash_command += ["--output_folder", str(P.bundle.abspath)] bash_command += ["--task_name", 'Task005_Prostate'] bash_command += ["--model", '3d_fullres'] + + # optional / customizable arguments + if self.disable_tta: + bash_command += ["--disable_tta"] # run command self.subprocess(bash_command, text=True) \ No newline at end of file From 3f6739da6a78b1c7f98167362f17d9cf88322a08 Mon Sep 17 00:00:00 2001 From: cciausu Date: Mon, 4 Dec 2023 16:32:22 -0500 Subject: [PATCH 044/125] Update default.yml --- models/nnunet_prostate_task24/config/default.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/models/nnunet_prostate_task24/config/default.yml b/models/nnunet_prostate_task24/config/default.yml index f12c1f44..0df38e60 100644 --- a/models/nnunet_prostate_task24/config/default.yml +++ b/models/nnunet_prostate_task24/config/default.yml @@ -27,9 +27,7 @@ modules: nnunet_task: Task024_Promise nnunet_model: 3d_fullres roi: PROSTATE - - BamfProcessorRunner: - + DsegConverter: source_segs: nifti:mod=seg target_dicom: dicom:mod=mr @@ -38,4 +36,4 @@ modules: DataOrganizer: targets: - - dicomseg-->[i:sid]/nnunet_mr_prostate.seg.dcm \ No newline at end of file + - dicomseg-->[i:sid]/nnunet_mr_prostate.seg.dcm From 2dfcd554b1219db663479f474482cb336e7e4a6d Mon Sep 17 00:00:00 2001 From: ccosmin97 Date: Mon, 4 Dec 2023 21:37:30 +0000 Subject: [PATCH 045/125] init commit for prostate158 model --- models/monai_prostate158/config/default.yml | 39 ++++++ .../monai_prostate158/dockerfiles/dockerfile | 28 ++++ models/monai_prostate158/meta.json | 125 ++++++++++++++++++ .../utils/Prostate158Runner.py | 1 + 4 files changed, 193 insertions(+) create mode 100644 models/monai_prostate158/config/default.yml create mode 100644 models/monai_prostate158/dockerfiles/dockerfile create mode 100644 models/monai_prostate158/meta.json create mode 100644 models/monai_prostate158/utils/Prostate158Runner.py diff --git a/models/monai_prostate158/config/default.yml b/models/monai_prostate158/config/default.yml new file mode 100644 index 00000000..20ca9eb1 --- /dev/null +++ b/models/monai_prostate158/config/default.yml @@ -0,0 +1,39 @@ +general: + data_base_dir: /app/data + version: 1.0 + description: default configuration for MONAI Prostate158 MR Prostate zonal regions segmentation (dicom to dicom) + +execute: +- DicomImporter +- NiftiConverter +- NNUnetRunner +- DsegConverter +- DataOrganizer + +modules: + DicomImporter: + source_dir: input_data + import_dir: sorted_data + sort_data: true + meta: + mod: 'mr' + + NiftiConverter: + in_datas: dicom:mod=mr + engine: dcm2niix + + Prostate158Runner: + in_data: nifti:mod=mr + nnunet_task: prostate_mri_anatomy + # nnunet_model: 3d_fullres + roi: PROSTATE + + DsegConverter: + source_segs: nifti:mod=seg + target_dicom: dicom:mod=mr + model_name: 'Prostate158' + skip_empty_slices: True + + DataOrganizer: + targets: + - dicomseg-->[i:sid]/nnunet_mr_prostate.seg.dcm \ No newline at end of file diff --git a/models/monai_prostate158/dockerfiles/dockerfile b/models/monai_prostate158/dockerfiles/dockerfile new file mode 100644 index 00000000..c5debeee --- /dev/null +++ b/models/monai_prostate158/dockerfiles/dockerfile @@ -0,0 +1,28 @@ +FROM mhubai/base:latest + +# FIXME: set this environment variable as a shortcut to avoid nnunet crashing the build +# by pulling sklearn instead of scikit-learn +# N.B. this is a known issue: +# https://github.com/MIC-DKFZ/nnUNet/issues/1281 +# https://github.com/MIC-DKFZ/nnUNet/pull/1209 +ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True + +# Install nnunet and platipy +RUN pip3 install --no-cache-dir \ + "monai[fire]" + +# Clone the main branch of MHubAI/models +ARG MHUB_MODELS_REPO +RUN buildutils/import_mhub_model.sh monai_prostate158 ${MHUB_MODELS_REPO} + +# Pull weights into the container +ENV WEIGHTS_DIR=/root/.monai/bundles/ +RUN mkdir -p $WEIGHTS_DIR +RUN python -m monai.bundle download "prostate_mri_anatomy" --bundle_dir ${WEIGHTS_DIR} + +# specify nnunet specific environment variables +# ENV WEIGHTS_FOLDER=$WEIGHTS_DIR + +# Default run script +ENTRYPOINT ["mhub.run"] +CMD ["--config", "/app/models/monai_prostate158/config/default.yml"] \ No newline at end of file diff --git a/models/monai_prostate158/meta.json b/models/monai_prostate158/meta.json new file mode 100644 index 00000000..50c4e568 --- /dev/null +++ b/models/monai_prostate158/meta.json @@ -0,0 +1,125 @@ +{ + "id": "...", + "name": "nnunet_prostate_task24_promise", + "title": "nnU-Net (Whole prostate segmentation)", + "summary": { + "description": "nnU-Net's whole prostate segmentation model is a single-modality (i.e. T2) input AI-based pipeline for the automated segmentation of the whole prostate on MRI scans.", + "inputs": [ + { + "label": "T2 input image", + "description": "The T2 axial-acquired sequence being the input image", + "format": "DICOM", + "modality": "MR", + "bodypartexamined": "Prostate", + "slicethickness": "3.6 mm", + "non-contrast": true, + "contrast": false + } + ], + "outputs": [ + { + "type": "Segmentation", + "classes": [ + "PROSTATE" + ] + } + ], + "model": { + "architecture": "U-net", + "training": "supervised", + "cmpapproach": "3D" + }, + "data": { + "training": { + "vol_samples": 50 + }, + "evaluation": { + "vol_samples": 30 + }, + "public": true, + "external": false + } + }, + "details": { + "name": "nnU-Net whole prostate segmentation model", + "version": "1.0.0", + "devteam": "MIC-DKFZ (Helmholtz Imaging Applied Computer Vision Lab)", + "type": "nnU-Net (U-Net structure, optimized by data-driven heuristics)", + "date": { + "weights": "2020", + "code": "2020", + "pub": "2020" + }, + "cite": "Isensee, F., Jaeger, P. F., Kohl, S. A., Petersen, J., & Maier-Hein, K. H. (2020). nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation. Nature Methods, 1-9.", + "license": { + "code": "Apache 2.0", + "weights": "CC BY-NC 4.0" + }, + "publications": [ + { + "title": "nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation", + "uri": "https://www.nature.com/articles/s41592-020-01008-z" + } + ], + "github": "https://github.com/MIC-DKFZ/nnUNet/tree/nnunetv1", + "zenodo": "https://zenodo.org/record/4485926" + }, + "info": { + "use": { + "title": "Intended Use", + "text": "This model is intended to perform prostate anatomy segmentation in MR T2 scans. The slice thickness of the training data is 2.2~4mm. Endorectal coil was present during training." + }, + "analyses": { + "title": "Quantitative Analyses", + "text": "The model's performance was assessed using the Dice Coefficient, in the context of the Promise12 challenge. The complete breakdown of the metrics can be consulted on GrandChallenge [1] and is reported in the supplementary material to the publication [2].", + "references": [ + { + "label": "Evaluation of prostate segmentation algorithms for MRI: The PROMISE12 challenge", + "uri": "https://doi.org/10.1016/j.media.2013.12.002" + }, + { + "label": "nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation", + "uri": "https://www.nature.com/articles/s41592-020-01008-z" + } + ] + }, + "evaluation": { + "title": "Evaluation Data", + "text": "The evaluation dataset consists of 30 test samples coming from the Promise12 challenge.", + "tables": [], + "references": [ + { + "label": "Evaluation of prostate segmentation algorithms for MRI: The PROMISE12 challenge", + "uri": "https://doi.org/10.1016/j.media.2013.12.002" + }, + { + "label": "PROMISE12 dataset (direct download)", + "uri": "https://zenodo.org/records/8026660" + } + ] + }, + "training": { + "title": "Training Data", + "text": "The training dataset consists of 50 MRI cases containing the prostate, from the Promise12 challenge. The authors report the following characteristics for the training dataset:", + "tables": [ + { + "label": "Medical Image Decathlon dataset (training)", + "entries": { + "Slice Thickness": "2.2~4 mm", + "In-Plane Resolution": "0.27 mm" + } + } + ], + "references": [ + { + "label": "Evaluation of prostate segmentation algorithms for MRI: The PROMISE12 challenge", + "uri": "https://doi.org/10.1016/j.media.2013.12.002" + }, + { + "label": "PROMISE12 dataset (direct download)", + "uri": "https://zenodo.org/records/8026660" + } + ] + } + } +} \ No newline at end of file diff --git a/models/monai_prostate158/utils/Prostate158Runner.py b/models/monai_prostate158/utils/Prostate158Runner.py new file mode 100644 index 00000000..dbd265c6 --- /dev/null +++ b/models/monai_prostate158/utils/Prostate158Runner.py @@ -0,0 +1 @@ +#.... \ No newline at end of file From 39734f9a7151f80d63f836adadb7b56cf3e5f7ab Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Tue, 5 Dec 2023 13:54:52 +0100 Subject: [PATCH 046/125] meta.json - update input/output descriptions, extended intended use section --- models/gc_picai_baseline/meta.json | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/models/gc_picai_baseline/meta.json b/models/gc_picai_baseline/meta.json index cfd8697d..e172b4e1 100644 --- a/models/gc_picai_baseline/meta.json +++ b/models/gc_picai_baseline/meta.json @@ -6,32 +6,32 @@ "description": "This algorithm predicts a detection map for the likelihood of clinically significant prostate cancer (csPCa) using biparametric MRI (bpMRI). The algorithm ensembles 5-fold cross-validation models that were trained on the PI-CAI: Public Training and Development Dataset v2.0. The detection map is at the same spatial resolution and physical dimensions as the input axial T2-weighted image. This model algorithm was used as a baseline for the PI-CAI challenge hosted on Grand Challenge.", "inputs": [ { - "label": "Transverse T2-weighted Prostate biparametric MRI", - "description": " Transverse T2-weighted Prostate biparametric MRI exam", + "label": "Transverse T2-weighted prostate biparametric MRI", + "description": "Transverse T2-weighted prostate biparametric MRI exam.", "format": "DICOM", "modality": "MR", "bodypartexamined": "Prostate", - "slicethickness": "", + "slicethickness": "0.5 x 0.5 x 3.0 mm", "non-contrast": false, "contrast": false }, { - "label": "High b-value diffusion-weighted maps", - "description": "High b-value diffusion-weighted maps, with b-value of 1400 or 2000, either acquired or vendor-calculated.", + "label": "Transverse high b-value diffusion-weighted maps of the prostate", + "description": "Transverse high b-value diffusion-weighted (DWI) maps, with b-value of 1400 or 2000, either acquired or vendor-calculated.", "format": "DICOM", "modality": "MR", "bodypartexamined": "Prostate", - "slicethickness": "", + "slicethickness": "0.5 x 0.5 x 3.0 mm", "non-contrast": false, "contrast": false }, { - "label": "ADC map", - "description": "ADC map", + "label": "Transverse apparent diffusion coefficient map of the prostate", + "description": "Transverse apparent diffusion coefficient (ADC) prostate MRI map.", "format": "DICOM", "modality": "MR", "bodypartexamined": "Prostate", - "slicethickness": "", + "slicethickness": "0.5 x 0.5 x 3.0 mm", "non-contrast": false, "contrast": false } @@ -41,14 +41,14 @@ "type": "Prediction", "valueType": "Likelihood", "label": "Prostate cancer likelihood", - "description": "Case-level likelihood of harboring clinically significant prostate cancer, in range [0,1]", + "description": "Case-level likelihood of harboring clinically significant prostate cancer, in range [0,1].", "classes": [] }, { "type": "Prediction", "valueType": "Likelihood map", "label": "Transverse cancer detection map", - "description": "Detection map of clinically significant prostate cancer lesions in 3D, where each voxel represents a floating point in range [0,1]", + "description": "Detection map of clinically significant prostate cancer lesions in 3D, where each voxel represents a floating point in range [0,1]. This map is at the same spatial resolution and physical dimensions as the input transversal T2-weighted image.", "classes": [] } ], @@ -101,7 +101,7 @@ "info": { "use": { "title": "Intended use", - "text": "This algorithm is a deep learning-based detection/diagnosis model, which ensembles 5 independent nnU-Net models (5-fold cross-validation). To predict the likelihood of harboring clinically significant prostate cancer (csPCa), the transversal T2-weighted, apparent diffusion coefficient (ADC) and high b-value diffusion maps are required. The input sequences should be co-registered or aligned reasonably well and the prostate gland should be localized within a volume of 460 cm³ from the centre coordinate. To train these models, a total of 1500 prostate biparametric MRI (bpMRI) scans paired with human-annotated or AI-derived ISUP ≥ 2 delineations were used.", + "text": "This algorithm is a deep learning-based detection/diagnosis model, which ensembles 5 independent nnU-Net models (5-fold cross-validation). To predict the likelihood of harboring clinically significant prostate cancer (csPCa), the transversal T2-weighted, apparent diffusion coefficient (ADC) and high b-value diffusion weighted maps are required. The input sequences should be co-registered or aligned reasonably well and the prostate gland should be localized within a volume of 460 cm³ from the centre coordinate. The nnU-Net framework will internally resample all input scans to 0.5 x 0.5 x 3.0 mm. Per case the input data should be put into the following folder structure: `case1/adc`, `case1/hbv`, `case1/t2w`, corresponding respectively with the ADC, high b-value DWI, and the T2 weighted MR inputs for a case called `case1`.", "references": [ { "label": "PI-CAI baseline algorithm on grand-challenge", @@ -146,7 +146,7 @@ }, "training": { "title": "Training data", - "text": "The publicly available PI-CAI training and development dataset of 1500 biparametric MRI exams was used for training [1]. AI-derived annotations were created for cases without manual annotations [2]. This model was trained using a custom preprocessing step followed by the standard nnU-Net pipeline. The default nnU-Net loss-function was changed to Cross-Entropy + Focal loss. [3]", + "text": "The publicly available PI-CAI training and development dataset of 1500 biparametric MRI exams was used for training [1]. AI-derived annotations were created for cases without manual annotations [2]. This model was trained using a custom preprocessing step followed by the standard nnU-Net pipeline. The default nnU-Net loss-function was changed to Cross-Entropy + Focal loss [3].", "references": [ { "label": "PI-CAI publicly available training and development dataset", From 2b035fee5fa473c4d2f173638101df3808ebf43f Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Thu, 7 Dec 2023 12:29:55 +0100 Subject: [PATCH 047/125] Added segdb export, removed dseg.json, added remapped output to runner, updated Dockerfile and config --- models/gc_nnunet_pancreas/config/default.yml | 8 +- models/gc_nnunet_pancreas/config/dseg.json | 168 ------------------ .../gc_nnunet_pancreas/dockerfiles/Dockerfile | 21 ++- .../utils/GCNNUnetPancreasRunner.py | 33 +++- 4 files changed, 46 insertions(+), 184 deletions(-) delete mode 100644 models/gc_nnunet_pancreas/config/dseg.json diff --git a/models/gc_nnunet_pancreas/config/default.yml b/models/gc_nnunet_pancreas/config/default.yml index 5ae2cae2..b13691ac 100644 --- a/models/gc_nnunet_pancreas/config/default.yml +++ b/models/gc_nnunet_pancreas/config/default.yml @@ -16,20 +16,20 @@ modules: import_dir: sorted_data sort_data: true meta: - mod: ct + mod: '%Modality' MhaConverter: engine: panimg + targets: [dicom:mod=ct] DsegConverter: model_name: 'GC NNUnet Pancreas' - source_segs: ['mha:mod=seg'] + source_segs: ['mha:mod=seg:type=remapped'] target_dicom: dicom:mod=ct skip_empty_slices: True - json_config_path: /app/models/gc_nnunet_pancreas/config/dseg.json DataOrganizer: targets: - mha:mod=heatmap-->[i:sid]/nnunet_pancreas_heatmap.mha - - mha:mod=seg-->[i:sid]/nnunet_pancreas.seg.mha + - mha:mod=seg:type=original-->[i:sid]/nnunet_pancreas.seg.mha - dicomseg:mod=seg-->[i:sid]/nnunet_pancreas.seg.dcm diff --git a/models/gc_nnunet_pancreas/config/dseg.json b/models/gc_nnunet_pancreas/config/dseg.json deleted file mode 100644 index 1e52a967..00000000 --- a/models/gc_nnunet_pancreas/config/dseg.json +++ /dev/null @@ -1,168 +0,0 @@ -{ - "ContentCreatorName": "IDC", - "ClinicalTrialSeriesID": "0", - "ClinicalTrialTimePointID": "1", - "SeriesDescription": "Segmentation", - "SeriesNumber": "42", - "InstanceNumber": "1", - "BodyPartExamined": "ABDOMEN", - "segmentAttributes": [ - [ - { - "labelID": 1, - "SegmentDescription": "Veins", - "SegmentAlgorithmType": "AUTOMATIC", - "SegmentAlgorithmName": "GC nnUNet Pancreas", - "SegmentedPropertyCategoryCodeSequence": { - "CodeValue": "123037004", - "CodingSchemeDesignator": "SCT", - "CodeMeaning": "Anatomical Structure" - }, - "SegmentedPropertyTypeCodeSequence": { - "CodeValue": "29092000", - "CodingSchemeDesignator": "SCT", - "CodeMeaning": "Vein" - }, - "SegmentedPropertyTypeModifierCodeSequence": { - "CodeValue": "51440002", - "CodingSchemeDesignator": "SCT", - "CodeMeaning": "Right and left" - }, - "recommendedDisplayRGBValue": [ - 0, - 151, - 206 - ] - }, - { - "labelID": 2, - "SegmentDescription": "Artery", - "SegmentAlgorithmType": "AUTOMATIC", - "SegmentAlgorithmName": "GC nnUNet Pancreas", - "SegmentedPropertyCategoryCodeSequence": { - "CodeValue": "123037004", - "CodingSchemeDesignator": "SCT", - "CodeMeaning": "Anatomical Structure" - }, - "SegmentedPropertyTypeCodeSequence": { - "CodeValue": "51114001", - "CodingSchemeDesignator": "SCT", - "CodeMeaning": "Artery" - }, - "SegmentedPropertyTypeModifierCodeSequence": { - "CodeValue": "51440002", - "CodingSchemeDesignator": "SCT", - "CodeMeaning": "Right and left" - }, - "recommendedDisplayRGBValue": [ - 216, - 101, - 79 - ] - }, - { - "labelID": 3, - "SegmentDescription": "Pancreas", - "SegmentAlgorithmType": "AUTOMATIC", - "SegmentAlgorithmName": "GC nnUNet Pancreas", - "SegmentedPropertyCategoryCodeSequence": { - "CodeValue": "123037004", - "CodingSchemeDesignator": "SCT", - "CodeMeaning": "Anatomical Structure" - }, - "SegmentedPropertyTypeCodeSequence": { - "CodeValue": "15776009", - "CodingSchemeDesignator": "SCT", - "CodeMeaning": "Pancreas" - }, - "recommendedDisplayRGBValue": [ - 249, - 180, - 111 - ] - }, - { - "labelID": 4, - "SegmentDescription": "Pancreatic duct", - "SegmentAlgorithmType": "AUTOMATIC", - "SegmentAlgorithmName": "GC nnUNet Pancreas", - "SegmentedPropertyCategoryCodeSequence": { - "CodeValue": "123037004", - "CodingSchemeDesignator": "SCT", - "CodeMeaning": "Anatomical Structure" - }, - "SegmentedPropertyTypeCodeSequence": { - "CodeValue": "69930009", - "CodingSchemeDesignator": "SCT", - "CodeMeaning": "Pancreatic duct" - } - }, - { - "labelID": 5, - "SegmentDescription": "Bile duct", - "SegmentAlgorithmType": "AUTOMATIC", - "SegmentAlgorithmName": "GC nnUNet Pancreas", - "SegmentedPropertyCategoryCodeSequence": { - "CodeValue": "123037004", - "CodingSchemeDesignator": "SCT", - "CodeMeaning": "Anatomical Structure" - }, - "SegmentedPropertyTypeCodeSequence": { - "CodeValue": "28273000", - "CodingSchemeDesignator": "SCT", - "CodeMeaning": "Bile duct" - }, - "SegmentedPropertyTypeModifierCodeSequence": { - "CodeValue": "51440002", - "CodingSchemeDesignator": "SCT", - "CodeMeaning": "Right and left" - }, - "recommendedDisplayRGBValue": [ - 0, - 145, - 30 - ] - }, - { - "labelID": 6, - "SegmentDescription": "Cysts", - "SegmentAlgorithmType": "AUTOMATIC", - "SegmentAlgorithmName": "GC nnUNet Pancreas", - "SegmentedPropertyCategoryCodeSequence": { - "CodeValue": "49755003", - "CodingSchemeDesignator": "SCT", - "CodeMeaning": "Morphologically Altered Structure" - }, - "SegmentedPropertyTypeCodeSequence": { - "CodeValue": "367643001", - "CodingSchemeDesignator": "SCT", - "CodeMeaning": "Cyst" - }, - "recommendedDisplayRGBValue": [ - 205, - 205, - 100 - ] - }, - { - "labelID": 7, - "SegmentDescription": "Renal vein", - "SegmentAlgorithmType": "AUTOMATIC", - "SegmentAlgorithmName": "GC nnUNet Pancreas", - "SegmentedPropertyCategoryCodeSequence": { - "CodeValue": "123037004", - "CodingSchemeDesignator": "SCT", - "CodeMeaning": "Anatomical Structure" - }, - "SegmentedPropertyTypeCodeSequence": { - "CodeValue": "56400007", - "CodingSchemeDesignator": "SCT", - "CodeMeaning": "Renal vein" - } - } - ] - ], - "ContentLabel": "SEGMENTATION", - "ContentDescription": "Image segmentation", - "ClinicalTrialCoordinatingCenterName": "dcmqi" -} \ No newline at end of file diff --git a/models/gc_nnunet_pancreas/dockerfiles/Dockerfile b/models/gc_nnunet_pancreas/dockerfiles/Dockerfile index 68577762..74cd3946 100644 --- a/models/gc_nnunet_pancreas/dockerfiles/Dockerfile +++ b/models/gc_nnunet_pancreas/dockerfiles/Dockerfile @@ -3,27 +3,38 @@ FROM mhubai/base:latest # Specify/override authors label LABEL authors="sil.vandeleemput@radboudumc.nl" +# Install PyTorch 2.0.1 (CUDA enabled) +RUN pip3 install --no-cache-dir torch==2.0.1+cu118 -f https://download.pytorch.org/whl/torch_stable.html + # Install git-lfs (required for downloading the model weights) -RUN apt update && apt install -y --no-install-recommends git-lfs && rm -rf /var/lib/apt/lists/* +RUN apt update && \ + apt install -y --no-install-recommends git-lfs && \ + rm -rf /var/lib/apt/lists/* # Install the model weights and the algorithm files # * Pull algorithm from repo into /opt/algorithm (main branch, commit e4f4008c6e18e60a79f693448562a340a9252aa8) # * Remove .git folder to keep docker layer small # * Replace input images path in process.py with an existing folder to avoid errors +# * Add specific data types and compression options to output data structures in process.py to reduce generated output footprint RUN git clone https://github.com/DIAGNijmegen/CE-CT_PDAC_AutomaticDetection_nnUnet.git /opt/algorithm && \ cd /opt/algorithm && \ git reset --hard e4f4008c6e18e60a79f693448562a340a9252aa8 && \ rm -rf /opt/algorithm/.git && \ - sed -i 's/Path("\/input\/images\/")/Path("\/app")/g' /opt/algorithm/process.py - -# FIXME: set this environment variable as a shortcut to avoid nnunet crashing the build + sed -i 's/Path("\/input\/images\/")/Path("\/app")/g' /opt/algorithm/process.py && \ + sed -i 's/pred_2_np = sitk\.GetArrayFromImage(pred_2_nii)/pred_2_np = sitk\.GetArrayFromImage(pred_2_nii)\.astype(np\.uint8)/g' /opt/algorithm/process.py && \ + sed -i 's/pm_image = np\.zeros(image_np\.shape)/pm_image = np\.zeros(image_np\.shape, dtype=np\.float32)/g' /opt/algorithm/process.py && \ + sed -i 's/segmentation_np = np\.zeros(image_np\.shape)/segmentation_np = np\.zeros(image_np\.shape, dtype=np\.uint8)/g' /opt/algorithm/process.py && \ + sed -i 's/sitk\.WriteImage(segmentation_image, str(self\.segmentation))/sitk\.WriteImage(segmentation_image, str(self\.segmentation), True)/g' /opt/algorithm/process.py && \ + sed -i 's/sitk\.WriteImage(pred_itk_resampled, str(self\.heatmap))/sitk\.WriteImage(pred_itk_resampled, str(self\.heatmap), True)/g' /opt/algorithm/process.py + +# Set this environment variable as a shortcut to avoid nnunet 1.7.0 crashing the build # by pulling sklearn instead of scikit-learn # N.B. this is a known issue: # https://github.com/MIC-DKFZ/nnUNet/issues/1281 # https://github.com/MIC-DKFZ/nnUNet/pull/1209 ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True -# Install nnUNet and other requirements (should install PyTorch as well...) +# Install nnUNet 1.7.0 and other requirements RUN pip3 install --no-cache-dir -r /opt/algorithm/requirements.txt # Extend the nnUNet installation with custom trainers diff --git a/models/gc_nnunet_pancreas/utils/GCNNUnetPancreasRunner.py b/models/gc_nnunet_pancreas/utils/GCNNUnetPancreasRunner.py index e57fac33..65d9ff29 100644 --- a/models/gc_nnunet_pancreas/utils/GCNNUnetPancreasRunner.py +++ b/models/gc_nnunet_pancreas/utils/GCNNUnetPancreasRunner.py @@ -12,26 +12,45 @@ from mhubio.core import Module, Instance, InstanceData, DataType, Meta, IO from pathlib import Path +import SimpleITK +import numpy as np # Import the algorithm pipeline class from the CE-CT_PDAC_AutomaticDetection_nnUnet repository from process import PDACDetectionContainer -# TODO should move to MHubio/core/templates.py -HEATMAP = Meta(mod="heatmap") - class GCNNUnetPancreasRunner(Module): @IO.Instance() @IO.Input('in_data', 'mha:mod=ct', the="input data") @IO.Output('heatmap', 'heatmap.mha', 'mha:mod=heatmap:model=GCNNUnetPancreas', data="in_data", the="heatmap of the pancreatic tumor likelihood") - @IO.Output('segmentation', 'segmentation.mha', 'mha:mod=seg:model=GCNNUnetPancreas', data="in_data", - the="segmentation of the pancreas, with the following classes: " - "1-veins, 2-arteries, 3-pancreas, 4-pancreatic duct, 5-bile duct, 6-cysts, 7-renal vein") - def task(self, instance: Instance, in_data: InstanceData, heatmap: InstanceData, segmentation: InstanceData, **kwargs) -> None: + @IO.Output('segmentation', 'segmentation.mha', 'mha:mod=seg:type=original:model=GCNNUnetPancreas', data="in_data", + the="original segmentation of the pancreas, with the following classes: " + "0-background, 1-veins, 2-arteries, 3-pancreas, 4-pancreatic duct, 5-bile duct, 6-cysts, 7-renal vein") + @IO.Output('segmentation_remapped', 'segmentation_remapped.mha', 'mha:mod=seg:type=remapped:model=GCNNUnetPancreas:roi=PANCREAS,PANCREATIC_DUCT,BILE_DUCT,PANCREAS+CYST,RENAL_VEIN', data="in_data", + the="remapped segmentation of the pancreas (without the veins and arteries), with the following classes: " + "0-background, 1-pancreas, 2-pancreatic duct, 3-bile duct, 4-cysts, 5-renal vein") + def task(self, instance: Instance, in_data: InstanceData, heatmap: InstanceData, segmentation: InstanceData, segmentation_remapped: InstanceData, **kwargs) -> None: # Configure the algorithm pipeline class and run it algorithm = PDACDetectionContainer() algorithm.ct_image = in_data.abspath # set as str not Path algorithm.heatmap = Path(heatmap.abspath) algorithm.segmentation = Path(segmentation.abspath) algorithm.process() + + # Generate remapped segmentation + self.remap_segementation( + segmentation=segmentation, + segmentation_remapped=segmentation_remapped + ) + + def remap_segementation(self, segmentation: InstanceData, segmentation_remapped: InstanceData): + mapping = {0:0, 1:0, 2:0, 3:1, 4:2, 5:3, 6:4, 7:5} + mapping_numpy = np.array(list(mapping.values()), dtype=np.uint8) + self.log("Creating remapped segmentation", level="NOTICE") + seg_sitk = SimpleITK.ReadImage(segmentation.abspath) + seg_numpy = SimpleITK.GetArrayFromImage(seg_sitk) + remapped_numpy = mapping_numpy[seg_numpy] + remapped_sitk = SimpleITK.GetImageFromArray(remapped_numpy) + remapped_sitk.CopyInformation(seg_sitk) + SimpleITK.WriteImage(remapped_sitk, segmentation_remapped.abspath, True) From 1f9dd941c0aa9413b342a801257d261ba61d326c Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Thu, 7 Dec 2023 12:56:51 +0100 Subject: [PATCH 048/125] update model/algorithm tag to 2.1.2 for nnunet pipe fix --- models/gc_picai_baseline/dockerfiles/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/models/gc_picai_baseline/dockerfiles/Dockerfile b/models/gc_picai_baseline/dockerfiles/Dockerfile index a91146aa..7c2af162 100644 --- a/models/gc_picai_baseline/dockerfiles/Dockerfile +++ b/models/gc_picai_baseline/dockerfiles/Dockerfile @@ -12,10 +12,10 @@ RUN apt update && \ rm -rf /var/lib/apt/lists/* # Install PICAI baseline algorithm and model weights -# - Git clone the algorithm repository for v2.1.1 (fixed to v2.1.1 tag) +# - Git clone the algorithm repository for v2.1.2 (fixed to v2.1.2 tag) # - We remove unnecessary files for a compacter docker layer # - Subsequently we remove the .git directory to procuce a compacter docker layer -RUN git clone --depth 1 --branch v2.1.1 https://github.com/DIAGNijmegen/picai_nnunet_semi_supervised_gc_algorithm.git /opt/algorithm && \ +RUN git clone --depth 1 --branch v2.1.2 https://github.com/DIAGNijmegen/picai_nnunet_semi_supervised_gc_algorithm.git /opt/algorithm && \ rm -rf /opt/algorithm/test && \ rm -rf /opt/algorithm/.git From a56e86002f68b9ff50a5c2a08936957211d842db Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Thu, 7 Dec 2023 17:34:06 +0100 Subject: [PATCH 049/125] add cli for running the pdac_detection model --- .../utils/GCNNUnetPancreasRunner.py | 20 ++++--- models/gc_nnunet_pancreas/utils/cli.py | 57 +++++++++++++++++++ 2 files changed, 69 insertions(+), 8 deletions(-) create mode 100644 models/gc_nnunet_pancreas/utils/cli.py diff --git a/models/gc_nnunet_pancreas/utils/GCNNUnetPancreasRunner.py b/models/gc_nnunet_pancreas/utils/GCNNUnetPancreasRunner.py index 65d9ff29..1142d8b2 100644 --- a/models/gc_nnunet_pancreas/utils/GCNNUnetPancreasRunner.py +++ b/models/gc_nnunet_pancreas/utils/GCNNUnetPancreasRunner.py @@ -14,9 +14,10 @@ from pathlib import Path import SimpleITK import numpy as np +import sys -# Import the algorithm pipeline class from the CE-CT_PDAC_AutomaticDetection_nnUnet repository -from process import PDACDetectionContainer + +CLI_PATH = Path(__file__).parent / "cli.py" class GCNNUnetPancreasRunner(Module): @@ -31,12 +32,15 @@ class GCNNUnetPancreasRunner(Module): the="remapped segmentation of the pancreas (without the veins and arteries), with the following classes: " "0-background, 1-pancreas, 2-pancreatic duct, 3-bile duct, 4-cysts, 5-renal vein") def task(self, instance: Instance, in_data: InstanceData, heatmap: InstanceData, segmentation: InstanceData, segmentation_remapped: InstanceData, **kwargs) -> None: - # Configure the algorithm pipeline class and run it - algorithm = PDACDetectionContainer() - algorithm.ct_image = in_data.abspath # set as str not Path - algorithm.heatmap = Path(heatmap.abspath) - algorithm.segmentation = Path(segmentation.abspath) - algorithm.process() + # Call the PDAC CLI + cmd = [ + sys.executable, + str(CLI_PATH), + in_data.abspath, + heatmap.abspath, + segmentation.abspath + ] + self.subprocess(cmd, text=True) # Generate remapped segmentation self.remap_segementation( diff --git a/models/gc_nnunet_pancreas/utils/cli.py b/models/gc_nnunet_pancreas/utils/cli.py new file mode 100644 index 00000000..460b5a64 --- /dev/null +++ b/models/gc_nnunet_pancreas/utils/cli.py @@ -0,0 +1,57 @@ +""" +---------------------------------------------------- +GC / MHub - CLI for the GC nnUnet Pancreas Algorithm +---------------------------------------------------- + +---------------------------------------------------- +Author: Sil van de Leemput +Email: sil.vandeleemput@radboudumc.nl +---------------------------------------------------- +""" +import argparse +from pathlib import Path + +# Import the algorithm pipeline class from the CE-CT_PDAC_AutomaticDetection_nnUnet repository +from process import PDACDetectionContainer + + +def run_pdac_detection( + input_ct_image: Path, output_heatmap: Path, output_segmentation: Path +): + # Configure the algorithm pipeline class and run it + algorithm = PDACDetectionContainer() + algorithm.ct_image = str(input_ct_image) # set as str not Path + algorithm.heatmap = output_heatmap + algorithm.segmentation = output_segmentation + algorithm.process() + + +def run_pdac_detection_cli(): + parser = argparse.ArgumentParser("CLI for the GC nnUNet Pancreas Algorithm") + parser.add_argument( + "input_ct_image", + type=str, + help="input CT scan (MHA)" + ) + parser.add_argument( + "output_heatmap", + type=str, + help="heatmap of the pancreatic tumor likelihood (MHA)", + ) + parser.add_argument( + "output_segmentation", + type=str, + help="segmentation map of the pancreas (MHA), with the following classes: " + "0-background, 1-veins, 2-arteries, 3-pancreas, 4-pancreatic duct, 5-bile duct, " + "6-cysts, 7-renal vein", + ) + args = parser.parse_args() + run_pdac_detection( + input_ct_image=Path(args.input_ct_image), + output_heatmap=Path(args.output_heatmap), + output_segmentation=Path(args.output_segmentation), + ) + + +if __name__ == "__main__": + run_pdac_detection_cli() From 46f6357d35d044671b5beaa9ec702630dccda4c3 Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Thu, 7 Dec 2023 19:04:38 +0100 Subject: [PATCH 050/125] add model configurable parameter and invert tta to be in line with our NNUnetRunner module --- .../utils/ProstateRunner.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/models/nnunet_prostate_zonal_task05/utils/ProstateRunner.py b/models/nnunet_prostate_zonal_task05/utils/ProstateRunner.py index d8e5483f..e450d490 100644 --- a/models/nnunet_prostate_zonal_task05/utils/ProstateRunner.py +++ b/models/nnunet_prostate_zonal_task05/utils/ProstateRunner.py @@ -1,17 +1,19 @@ import os, shutil from mhubio.core import Module, Instance, InstanceData, IO -@IO.Config("disbale_tta", bool, default=False, the="Disable test time augmentation for nnUNet.") +@IO.Config('use_tta', bool, False, the='flag to enable test time augmentation') +@IO.Config('nnunet_model', str, None, the='nnunet model name (2d, 3d_lowres, 3d_fullres, 3d_cascade_fullres)') class ProstateRunner(Module): - disable_tta: bool + use_tta: bool + nnunet_model: str @IO.Instance() @IO.Input('T2', 'nifti:part=T2', the="T2 image") @IO.Input('ADC', 'nifti:part=ADC:resampled_to=T2', the="ADC image resampled to T2") @IO.Output('P', 'VOLUME_001.nii.gz', 'nifti:mod=seg:model=nnunet_t005_prostate:roi=PROSTATE_PERIPHERAL_ZONE,PROSTATE_TRANSITION_ZONE', bundle='nnunet-out', the="Prostate segmentation") def task(self, instance: Instance, T2: InstanceData, ADC: InstanceData, P: InstanceData) -> None: - + # copy input files to align with the nnunet input folder and file name format # T2: 0000 # ADC: 0001 @@ -37,10 +39,10 @@ def task(self, instance: Instance, T2: InstanceData, ADC: InstanceData, P: Insta bash_command += ["--input_folder", str(inp_dir)] bash_command += ["--output_folder", str(P.bundle.abspath)] bash_command += ["--task_name", 'Task005_Prostate'] - bash_command += ["--model", '3d_fullres'] + bash_command += ["--model", self.nnunet_model] # optional / customizable arguments - if self.disable_tta: + if not self.use_tta: bash_command += ["--disable_tta"] # run command From f3c73f3282b29c576bd0346c2c9e71c3713e94a8 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Tue, 12 Dec 2023 10:51:39 +0100 Subject: [PATCH 051/125] add cli and configure runner to use cli --- .../utils/PicaiBaselineRunner.py | 30 +++++------ models/gc_picai_baseline/utils/cli.py | 54 +++++++++++++++++++ 2 files changed, 68 insertions(+), 16 deletions(-) create mode 100644 models/gc_picai_baseline/utils/cli.py diff --git a/models/gc_picai_baseline/utils/PicaiBaselineRunner.py b/models/gc_picai_baseline/utils/PicaiBaselineRunner.py index 4f1d05ca..f3958222 100644 --- a/models/gc_picai_baseline/utils/PicaiBaselineRunner.py +++ b/models/gc_picai_baseline/utils/PicaiBaselineRunner.py @@ -10,12 +10,13 @@ """ import json +import sys from pathlib import Path from mhubio.core import Instance, InstanceData, IO, Module, ValueOutput, ClassOutput, Meta -# Import the PICAI Classifier algorithm class from /opt/algorithm -from process import csPCaAlgorithm as PicaiClassifier + +CLI_PATH = Path(__file__).parent / "cli.py" @ValueOutput.Name('prostate_cancer_likelihood') @@ -36,22 +37,19 @@ class PicaiBaselineRunner(Module): @IO.Output('cancer_detection_heatmap', 'cspca_detection_map.mha', "mha:mod=hm", bundle='model', the='output heatmap indicating prostate cancer likelihood') @IO.OutputData('cancer_likelihood', ProstateCancerLikelihood, the='PICAI baseline prostate cancer likelihood') def task(self, instance: Instance, in_data_t2: InstanceData, in_data_adc: InstanceData, in_data_hbv: InstanceData, cancer_likelihood_json: InstanceData, cancer_detection_heatmap: InstanceData, cancer_likelihood: ProstateCancerLikelihood) -> None: - # Initialize classifier object - classifier = PicaiClassifier() - - # Specify input files (the order is important!) - classifier.scan_paths = [ - Path(in_data_t2.abspath), - Path(in_data_adc.abspath), - Path(in_data_hbv.abspath), + # build command (order matters!) + cmd = [ + sys.executable, + str(CLI_PATH), + in_data_t2.abspath, + in_data_adc.abspath, + in_data_hbv.abspath, + cancer_likelihood_json.abspath, + cancer_detection_heatmap.abspath, ] - # Specify output files - classifier.cspca_detection_map_path = Path(cancer_detection_heatmap.abspath) - classifier.case_confidence_path = Path(cancer_likelihood_json.abspath) - - # Run the classifier on the input images - classifier.process() + # run the command as subprocess + self.subprocess(cmd, text=True) # Extract cancer likelihood value from cancer_likelihood_file if not Path(cancer_likelihood_json.abspath).is_file(): diff --git a/models/gc_picai_baseline/utils/cli.py b/models/gc_picai_baseline/utils/cli.py new file mode 100644 index 00000000..64d51a73 --- /dev/null +++ b/models/gc_picai_baseline/utils/cli.py @@ -0,0 +1,54 @@ +""" +-------------------------------------------------- +Mhub / DIAG - CLI for the PICAI baseline Algorithm +-------------------------------------------------- + +-------------------------------------------------- +Author: Sil van de Leemput +Email: sil.vandeleemput@radboudumc.nl +-------------------------------------------------- +""" + +import argparse +from pathlib import Path +from process import csPCaAlgorithm as PicaiClassifier + + +def run_classifier(t2: Path, adc: Path, hbv: Path, cancer_likelihood_json: Path, cancer_detection_heatmap: Path): + # Initialize classifier object + classifier = PicaiClassifier() + + # Specify input files (the order is important!) + classifier.scan_paths = [ + t2, + adc, + hbv, + ] + + # Specify output files + classifier.cspca_detection_map_path = cancer_detection_heatmap + classifier.case_confidence_path = cancer_likelihood_json + + # Run the classifier on the input images + classifier.process() + + +def run_classifier_cli(): + parser = argparse.ArgumentParser("CLI to run the PICAI baseline classifier") + parser.add_argument("input_t2", type=str, help="input T2 weighted prostate MR image (MHA)") + parser.add_argument("input_adc", type=str, help="input ADC prostate MR image (MHA") + parser.add_argument("input_hbv", type=str, help="input HBV prostate MR image (MHA)") + parser.add_argument("output_cancer_likelihood_json", type=str, help="output JSON file with PICAI baseline prostate cancer likelihood (JSON)") + parser.add_argument("output_cancer_detection_heatmap", type=str, help="output heatmap indicating prostate cancer likelihood (MHA)") + args = parser.parse_args() + run_classifier( + t2=Path(args.input_t2), + adc=Path(args.input_adc), + hbv=Path(args.input_hbv), + cancer_likelihood_json=Path(args.output_cancer_likelihood_json), + cancer_detection_heatmap=Path(args.output_cancer_detection_heatmap), + ) + + +if __name__ == "__main__": + run_classifier_cli() From 713b0f3ded2126348fb2e7a15506b28966f5e3fe Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Tue, 12 Dec 2023 11:19:13 +0100 Subject: [PATCH 052/125] added VEIN,ARTERY rois to output segmentation, cleaned config and runner code --- models/gc_nnunet_pancreas/config/default.yml | 4 ++-- .../utils/GCNNUnetPancreasRunner.py | 24 ++----------------- 2 files changed, 4 insertions(+), 24 deletions(-) diff --git a/models/gc_nnunet_pancreas/config/default.yml b/models/gc_nnunet_pancreas/config/default.yml index b13691ac..526099e1 100644 --- a/models/gc_nnunet_pancreas/config/default.yml +++ b/models/gc_nnunet_pancreas/config/default.yml @@ -24,12 +24,12 @@ modules: DsegConverter: model_name: 'GC NNUnet Pancreas' - source_segs: ['mha:mod=seg:type=remapped'] + source_segs: ['mha:mod=seg'] target_dicom: dicom:mod=ct skip_empty_slices: True DataOrganizer: targets: - mha:mod=heatmap-->[i:sid]/nnunet_pancreas_heatmap.mha - - mha:mod=seg:type=original-->[i:sid]/nnunet_pancreas.seg.mha + - mha:mod=seg-->[i:sid]/nnunet_pancreas.seg.mha - dicomseg:mod=seg-->[i:sid]/nnunet_pancreas.seg.dcm diff --git a/models/gc_nnunet_pancreas/utils/GCNNUnetPancreasRunner.py b/models/gc_nnunet_pancreas/utils/GCNNUnetPancreasRunner.py index 1142d8b2..6ffa6844 100644 --- a/models/gc_nnunet_pancreas/utils/GCNNUnetPancreasRunner.py +++ b/models/gc_nnunet_pancreas/utils/GCNNUnetPancreasRunner.py @@ -25,13 +25,10 @@ class GCNNUnetPancreasRunner(Module): @IO.Input('in_data', 'mha:mod=ct', the="input data") @IO.Output('heatmap', 'heatmap.mha', 'mha:mod=heatmap:model=GCNNUnetPancreas', data="in_data", the="heatmap of the pancreatic tumor likelihood") - @IO.Output('segmentation', 'segmentation.mha', 'mha:mod=seg:type=original:model=GCNNUnetPancreas', data="in_data", + @IO.Output('segmentation', 'segmentation.mha', 'mha:mod=seg:model=GCNNUnetPancreas:roi=VEIN,ARTERY,PANCREAS,PANCREATIC_DUCT,BILE_DUCT,PANCREAS+CYST,RENAL_VEIN', data="in_data", the="original segmentation of the pancreas, with the following classes: " "0-background, 1-veins, 2-arteries, 3-pancreas, 4-pancreatic duct, 5-bile duct, 6-cysts, 7-renal vein") - @IO.Output('segmentation_remapped', 'segmentation_remapped.mha', 'mha:mod=seg:type=remapped:model=GCNNUnetPancreas:roi=PANCREAS,PANCREATIC_DUCT,BILE_DUCT,PANCREAS+CYST,RENAL_VEIN', data="in_data", - the="remapped segmentation of the pancreas (without the veins and arteries), with the following classes: " - "0-background, 1-pancreas, 2-pancreatic duct, 3-bile duct, 4-cysts, 5-renal vein") - def task(self, instance: Instance, in_data: InstanceData, heatmap: InstanceData, segmentation: InstanceData, segmentation_remapped: InstanceData, **kwargs) -> None: + def task(self, instance: Instance, in_data: InstanceData, heatmap: InstanceData, segmentation: InstanceData, **kwargs) -> None: # Call the PDAC CLI cmd = [ sys.executable, @@ -41,20 +38,3 @@ def task(self, instance: Instance, in_data: InstanceData, heatmap: InstanceData, segmentation.abspath ] self.subprocess(cmd, text=True) - - # Generate remapped segmentation - self.remap_segementation( - segmentation=segmentation, - segmentation_remapped=segmentation_remapped - ) - - def remap_segementation(self, segmentation: InstanceData, segmentation_remapped: InstanceData): - mapping = {0:0, 1:0, 2:0, 3:1, 4:2, 5:3, 6:4, 7:5} - mapping_numpy = np.array(list(mapping.values()), dtype=np.uint8) - self.log("Creating remapped segmentation", level="NOTICE") - seg_sitk = SimpleITK.ReadImage(segmentation.abspath) - seg_numpy = SimpleITK.GetArrayFromImage(seg_sitk) - remapped_numpy = mapping_numpy[seg_numpy] - remapped_sitk = SimpleITK.GetImageFromArray(remapped_numpy) - remapped_sitk.CopyInformation(seg_sitk) - SimpleITK.WriteImage(remapped_sitk, segmentation_remapped.abspath, True) From 2a8ef5963df1220fd0e35983a7a07d4b2fe654b0 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Wed, 13 Dec 2023 11:02:10 +0100 Subject: [PATCH 053/125] meta.json - change name into gc_picai_baseline --- models/gc_picai_baseline/meta.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/gc_picai_baseline/meta.json b/models/gc_picai_baseline/meta.json index e172b4e1..7cd3fec4 100644 --- a/models/gc_picai_baseline/meta.json +++ b/models/gc_picai_baseline/meta.json @@ -1,6 +1,6 @@ { "id": "c5f886fb-9f54-4555-a954-da02b22d6d3f", - "name": "picai_baseline", + "name": "gc_picai_baseline", "title": "PI-CAI challenge baseline", "summary": { "description": "This algorithm predicts a detection map for the likelihood of clinically significant prostate cancer (csPCa) using biparametric MRI (bpMRI). The algorithm ensembles 5-fold cross-validation models that were trained on the PI-CAI: Public Training and Development Dataset v2.0. The detection map is at the same spatial resolution and physical dimensions as the input axial T2-weighted image. This model algorithm was used as a baseline for the PI-CAI challenge hosted on Grand Challenge.", From 7055664293f44dab5b8916d69a1faa911b6d09fe Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Wed, 13 Dec 2023 15:04:11 +0100 Subject: [PATCH 054/125] meta.json - correction for license weights --- models/gc_picai_baseline/meta.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/gc_picai_baseline/meta.json b/models/gc_picai_baseline/meta.json index 7cd3fec4..1dbab230 100644 --- a/models/gc_picai_baseline/meta.json +++ b/models/gc_picai_baseline/meta.json @@ -81,7 +81,7 @@ "cite": "J. S. Bosma, A. Saha, M. Hosseinzadeh, I. Slootweg, M. de Rooij, and H. Huisman, \"Semisupervised Learning with Report-guided Pseudo Labels for Deep Learning–based Prostate Cancer Detection Using Biparametric MRI\", Radiology: Artificial Intelligence, 230031, 2023. DOI: 10.1148/ryai.230031", "license": { "code": "Apache 2.0", - "weights": "CC-BY-NC-4.0" + "weights": "CC-BY-NC-SA-4.0" }, "publications": [ { From ed79ebd134801862eb6e5e2c2810a4a45cb9f6c9 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Thu, 14 Dec 2023 13:17:38 +0100 Subject: [PATCH 055/125] runner & cli - changed heatmap output descriptions to detection map and updated descriptions --- models/gc_picai_baseline/config/default.yml | 2 +- models/gc_picai_baseline/utils/PicaiBaselineRunner.py | 6 +++--- models/gc_picai_baseline/utils/cli.py | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/models/gc_picai_baseline/config/default.yml b/models/gc_picai_baseline/config/default.yml index c86cac07..bb5d5deb 100644 --- a/models/gc_picai_baseline/config/default.yml +++ b/models/gc_picai_baseline/config/default.yml @@ -31,4 +31,4 @@ modules: DataOrganizer: targets: - json:mod=report-->[i:sid]/cspca-case-level-likelihood.json - - mha:mod=hm-->[i:sid]/cspca-detection-map.mha + - mha:mod=dm-->[i:sid]/cspca-detection-map.mha diff --git a/models/gc_picai_baseline/utils/PicaiBaselineRunner.py b/models/gc_picai_baseline/utils/PicaiBaselineRunner.py index f3958222..84dc1474 100644 --- a/models/gc_picai_baseline/utils/PicaiBaselineRunner.py +++ b/models/gc_picai_baseline/utils/PicaiBaselineRunner.py @@ -34,9 +34,9 @@ class PicaiBaselineRunner(Module): @IO.Input('in_data_adc', 'mha:mod=mr:type=adc', the='input ADC prostate MR image') @IO.Input('in_data_hbv', 'mha:mod=mr:type=hbv', the='input HBV prostate MR image') @IO.Output('cancer_likelihood_json', 'cspca-case-level-likelihood.json', "json", bundle='model', the='output JSON file with PICAI baseline prostate cancer likelihood') - @IO.Output('cancer_detection_heatmap', 'cspca_detection_map.mha', "mha:mod=hm", bundle='model', the='output heatmap indicating prostate cancer likelihood') + @IO.Output('cancer_lesion_detection_map', 'cspca-detection-map.mha', "mha:mod=dm", bundle='model', the='output detection map of clinically significant prostate cancer lesions in 3D, where each voxel represents a floating point in range [0,1]') @IO.OutputData('cancer_likelihood', ProstateCancerLikelihood, the='PICAI baseline prostate cancer likelihood') - def task(self, instance: Instance, in_data_t2: InstanceData, in_data_adc: InstanceData, in_data_hbv: InstanceData, cancer_likelihood_json: InstanceData, cancer_detection_heatmap: InstanceData, cancer_likelihood: ProstateCancerLikelihood) -> None: + def task(self, instance: Instance, in_data_t2: InstanceData, in_data_adc: InstanceData, in_data_hbv: InstanceData, cancer_likelihood_json: InstanceData, cancer_lesion_detection_map: InstanceData, cancer_likelihood: ProstateCancerLikelihood) -> None: # build command (order matters!) cmd = [ sys.executable, @@ -45,7 +45,7 @@ def task(self, instance: Instance, in_data_t2: InstanceData, in_data_adc: Instan in_data_adc.abspath, in_data_hbv.abspath, cancer_likelihood_json.abspath, - cancer_detection_heatmap.abspath, + cancer_lesion_detection_map.abspath, ] # run the command as subprocess diff --git a/models/gc_picai_baseline/utils/cli.py b/models/gc_picai_baseline/utils/cli.py index 64d51a73..deaf9ecf 100644 --- a/models/gc_picai_baseline/utils/cli.py +++ b/models/gc_picai_baseline/utils/cli.py @@ -14,7 +14,7 @@ from process import csPCaAlgorithm as PicaiClassifier -def run_classifier(t2: Path, adc: Path, hbv: Path, cancer_likelihood_json: Path, cancer_detection_heatmap: Path): +def run_classifier(t2: Path, adc: Path, hbv: Path, cancer_likelihood_json: Path, cancer_lesion_detection_map: Path): # Initialize classifier object classifier = PicaiClassifier() @@ -26,7 +26,7 @@ def run_classifier(t2: Path, adc: Path, hbv: Path, cancer_likelihood_json: Path, ] # Specify output files - classifier.cspca_detection_map_path = cancer_detection_heatmap + classifier.cspca_detection_map_path = cancer_lesion_detection_map classifier.case_confidence_path = cancer_likelihood_json # Run the classifier on the input images @@ -39,14 +39,14 @@ def run_classifier_cli(): parser.add_argument("input_adc", type=str, help="input ADC prostate MR image (MHA") parser.add_argument("input_hbv", type=str, help="input HBV prostate MR image (MHA)") parser.add_argument("output_cancer_likelihood_json", type=str, help="output JSON file with PICAI baseline prostate cancer likelihood (JSON)") - parser.add_argument("output_cancer_detection_heatmap", type=str, help="output heatmap indicating prostate cancer likelihood (MHA)") + parser.add_argument("output_cancer_lesion_detection_map", type=str, help="output detection map of clinically significant prostate cancer lesions in 3D (MHA)") args = parser.parse_args() run_classifier( t2=Path(args.input_t2), adc=Path(args.input_adc), hbv=Path(args.input_hbv), cancer_likelihood_json=Path(args.output_cancer_likelihood_json), - cancer_detection_heatmap=Path(args.output_cancer_detection_heatmap), + cancer_lesion_detection_map=Path(args.output_cancer_lesion_detection_map), ) From c7fb52b18a3dc4edf38b3c60d0d437f1496e3ad3 Mon Sep 17 00:00:00 2001 From: ccosmin97 Date: Fri, 15 Dec 2023 22:30:07 +0000 Subject: [PATCH 056/125] added meta.json, config and dockerfiles, tried testing with and without center cropping --- models/monai_prostate158/config/default.yml | 12 ++- .../monai_prostate158/dockerfiles/dockerfile | 13 ++- models/monai_prostate158/meta.json | 82 ++++++++---------- .../utils/Prostate158Runner.py | 86 ++++++++++++++++++- models/monai_prostate158/utils/__init__.py | 1 + 5 files changed, 135 insertions(+), 59 deletions(-) create mode 100644 models/monai_prostate158/utils/__init__.py diff --git a/models/monai_prostate158/config/default.yml b/models/monai_prostate158/config/default.yml index 20ca9eb1..d56939bc 100644 --- a/models/monai_prostate158/config/default.yml +++ b/models/monai_prostate158/config/default.yml @@ -6,7 +6,7 @@ general: execute: - DicomImporter - NiftiConverter -- NNUnetRunner +- Prostate158Runner - DsegConverter - DataOrganizer @@ -24,16 +24,14 @@ modules: Prostate158Runner: in_data: nifti:mod=mr - nnunet_task: prostate_mri_anatomy - # nnunet_model: 3d_fullres - roi: PROSTATE + apply_center_crop: False DsegConverter: - source_segs: nifti:mod=seg + source_segs: nifti:mod=seg:roi=PROSTATE_TRANSITION_ZONE,PROSTATE_PERIPHERAL_ZONE target_dicom: dicom:mod=mr - model_name: 'Prostate158' + model_name: 'Segmentation of prostate regions, Prostate158' skip_empty_slices: True DataOrganizer: targets: - - dicomseg-->[i:sid]/nnunet_mr_prostate.seg.dcm \ No newline at end of file + - dicomseg-->[i:sid]/prostate158.seg.dcm \ No newline at end of file diff --git a/models/monai_prostate158/dockerfiles/dockerfile b/models/monai_prostate158/dockerfiles/dockerfile index c5debeee..9e741b49 100644 --- a/models/monai_prostate158/dockerfiles/dockerfile +++ b/models/monai_prostate158/dockerfiles/dockerfile @@ -7,18 +7,23 @@ FROM mhubai/base:latest # https://github.com/MIC-DKFZ/nnUNet/pull/1209 ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True +ARG MONAI_BUNDLE_DIR='https://github.com/Project-MONAI/model-zoo/releases/download/hosting_storage_v1/prostate_mri_anatomy_v0.1.0.zip' +ARG MONAI_MODEL_NAME='prostate_mri_anatomy' + # Install nnunet and platipy -RUN pip3 install --no-cache-dir \ - "monai[fire]" +RUN python3 -m pip install --upgrade pip && pip3 install --no-cache-dir "monai[ignite]" fire nibabel simpleITK # Clone the main branch of MHubAI/models ARG MHUB_MODELS_REPO RUN buildutils/import_mhub_model.sh monai_prostate158 ${MHUB_MODELS_REPO} # Pull weights into the container -ENV WEIGHTS_DIR=/root/.monai/bundles/ +ENV WEIGHTS_DIR=/app/models/monai_prostate158/bundle RUN mkdir -p $WEIGHTS_DIR -RUN python -m monai.bundle download "prostate_mri_anatomy" --bundle_dir ${WEIGHTS_DIR} +RUN python3 -m monai.bundle download "prostate_mri_anatomy" --bundle_dir ${WEIGHTS_DIR} + +#define path to bundle root +ENV BUNDLE_ROOT=/app/models/monai_prostate158/bundle/prostate_mri_anatomy # specify nnunet specific environment variables # ENV WEIGHTS_FOLDER=$WEIGHTS_DIR diff --git a/models/monai_prostate158/meta.json b/models/monai_prostate158/meta.json index 50c4e568..01f9514a 100644 --- a/models/monai_prostate158/meta.json +++ b/models/monai_prostate158/meta.json @@ -1,17 +1,17 @@ { "id": "...", - "name": "nnunet_prostate_task24_promise", - "title": "nnU-Net (Whole prostate segmentation)", + "name": "monai_prostate158", + "title": "Prostate158 (Prostate transitional zone and peripheral zone segmentation)", "summary": { - "description": "nnU-Net's whole prostate segmentation model is a single-modality (i.e. T2) input AI-based pipeline for the automated segmentation of the whole prostate on MRI scans.", + "description": "Prostate158 is a zonal prostate segmentation model, a multi-modality input AI-based pipeline for the automated segmentation of the peripheral and central gland of the prostate on MRI T2 axial scans.", "inputs": [ { "label": "T2 input image", - "description": "The T2 axial-acquired sequence being the input image", + "description": "The T2 axial sequence being one of the two input image", "format": "DICOM", "modality": "MR", "bodypartexamined": "Prostate", - "slicethickness": "3.6 mm", + "slicethickness": "3 mm", "non-contrast": true, "contrast": false } @@ -20,7 +20,8 @@ { "type": "Segmentation", "classes": [ - "PROSTATE" + "PROSTATE_TRANSITION_ZONE", + "PROSTATE_PERIPHERAL_ZONE" ] } ], @@ -31,93 +32,80 @@ }, "data": { "training": { - "vol_samples": 50 + "vol_samples": 139 }, "evaluation": { - "vol_samples": 30 + "vol_samples": 20 }, "public": true, "external": false } }, "details": { - "name": "nnU-Net whole prostate segmentation model", + "name": "Prostate158 - An expert-annotated 3T MRI dataset and algorithm for prostate cancer detection", "version": "1.0.0", - "devteam": "MIC-DKFZ (Helmholtz Imaging Applied Computer Vision Lab)", - "type": "nnU-Net (U-Net structure, optimized by data-driven heuristics)", + "devteam": "Lisa C. Adams, Keno K. Bressem", + "type": "Prostate158 (U-Net structure for prostate segmentation)", "date": { - "weights": "2020", - "code": "2020", - "pub": "2020" + "weights": "2022", + "code": "2022", + "pub": "2022" }, - "cite": "Isensee, F., Jaeger, P. F., Kohl, S. A., Petersen, J., & Maier-Hein, K. H. (2020). nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation. Nature Methods, 1-9.", + "cite": "Lisa C. Adams and Marcus R. Makowski and Günther Engel and Maximilian Rattunde and Felix Busch and Patrick Asbach and Stefan M. Niehues and Shankeeth Vinayahalingam and Bram {van Ginneken} and Geert Litjens and Keno K. Bressem, Prostate158 - An expert-annotated 3T MRI dataset and algorithm for prostate cancer detection", "license": { "code": "Apache 2.0", - "weights": "CC BY-NC 4.0" + "weights": "" }, "publications": [ { - "title": "nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation", - "uri": "https://www.nature.com/articles/s41592-020-01008-z" + "title": "Prostate158 - An expert-annotated 3T MRI dataset and algorithm for prostate cancer detection", + "uri": "https://doi.org/10.1016/j.compbiomed.2022.105817" } ], - "github": "https://github.com/MIC-DKFZ/nnUNet/tree/nnunetv1", - "zenodo": "https://zenodo.org/record/4485926" + "github": "https://github.com/Project-MONAI/model-zoo/tree/dev/models/prostate_mri_anatomy", + "zenodo": "https://zenodo.org/records/6481141" }, "info": { "use": { "title": "Intended Use", - "text": "This model is intended to perform prostate anatomy segmentation in MR T2 scans. The slice thickness of the training data is 2.2~4mm. Endorectal coil was present during training." + "text": "This model is intended to perform prostate regions anatomy segmentation in MR ADC and T2 scans. The slice thickness of the training data is 3mm. T2 input modality is used during training. To align with the model training pre-processing scheme, center-cropping of the input T2 image is recommended. No endorectal coil was present during training." }, "analyses": { "title": "Quantitative Analyses", - "text": "The model's performance was assessed using the Dice Coefficient, in the context of the Promise12 challenge. The complete breakdown of the metrics can be consulted on GrandChallenge [1] and is reported in the supplementary material to the publication [2].", + "text": "The model's performance was assessed using the Dice Coefficient, on an internal test set and ProstateX collection. The complete breakdown of the metrics can be consulted in the publication.", "references": [ { - "label": "Evaluation of prostate segmentation algorithms for MRI: The PROMISE12 challenge", - "uri": "https://doi.org/10.1016/j.media.2013.12.002" - }, - { - "label": "nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation", - "uri": "https://www.nature.com/articles/s41592-020-01008-z" + "label": "Prostate158 - An expert-annotated 3T MRI dataset and algorithm for prostate cancer detection", + "uri": "https://doi.org/10.1016/j.compbiomed.2022.105817" } ] }, "evaluation": { "title": "Evaluation Data", - "text": "The evaluation dataset consists of 30 test samples coming from the Promise12 challenge.", + "text": "The evaluation dataset consists of 20 internal validation samples.", "tables": [], - "references": [ - { - "label": "Evaluation of prostate segmentation algorithms for MRI: The PROMISE12 challenge", - "uri": "https://doi.org/10.1016/j.media.2013.12.002" - }, - { - "label": "PROMISE12 dataset (direct download)", - "uri": "https://zenodo.org/records/8026660" - } - ] + "references": [] }, "training": { "title": "Training Data", - "text": "The training dataset consists of 50 MRI cases containing the prostate, from the Promise12 challenge. The authors report the following characteristics for the training dataset:", + "text": "The training dataset consists of 139 MRI cases containing the prostate, from the Prostate158 collection. The authors report the following characteristics for the T2 imaging sequeneces:", "tables": [ { - "label": "Medical Image Decathlon dataset (training)", + "label": "Prostate158 dataset (training)", "entries": { - "Slice Thickness": "2.2~4 mm", - "In-Plane Resolution": "0.27 mm" + "Slice Thickness": "3 mm", + "In-Plane Resolution": "0.47 mm" } } ], "references": [ { - "label": "Evaluation of prostate segmentation algorithms for MRI: The PROMISE12 challenge", - "uri": "https://doi.org/10.1016/j.media.2013.12.002" + "label": "Medical Segmentation Decathlon", + "uri": "https://www.nature.com/articles/s41467-022-30695-9" }, { - "label": "PROMISE12 dataset (direct download)", - "uri": "https://zenodo.org/records/8026660" + "label": "Prostate158 dataset (Zenodo access)", + "uri": "https://zenodo.org/records/6481141" } ] } diff --git a/models/monai_prostate158/utils/Prostate158Runner.py b/models/monai_prostate158/utils/Prostate158Runner.py index dbd265c6..24a631ca 100644 --- a/models/monai_prostate158/utils/Prostate158Runner.py +++ b/models/monai_prostate158/utils/Prostate158Runner.py @@ -1 +1,85 @@ -#.... \ No newline at end of file +""" +------------------------------------------------- +MHub - MONAI Prostate158 Runner +------------------------------------------------- + +------------------------------------------------- +Author: Cosmin Ciausu +Email: cciausu97@gmail.com +------------------------------------------------- +""" +# TODO: support multi-i/o and batch processing on multiple instances + +from typing import List, Optional +import os, subprocess, shutil, glob, sys +import SimpleITK as sitk, numpy as np +from mhubio.core import Module, Instance, InstanceData, DataType, FileType, IO +from mhubio.modules.runner.ModelRunner import ModelRunner +import json + +@IO.Config('apply_center_crop', bool, False, the='flag to apply center cropping to input_image') + +class Prostate158Runner(ModelRunner): + + apply_center_crop : bool + + @IO.Instance() + @IO.Input("in_data", 'nifti:mod=mr', the="input T2 sequence data to run prostate158 on") + @IO.Output('out_data', 'monai_prostate158.nii.gz', + 'nifti:mod=seg:model=Prostate158:roi=PROSTATE_TRANSITION_ZONE,PROSTATE_PERIPHERAL_ZONE', + data='in_data', bundle='model', the="predicted segmentation model") + + def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData) -> None: + + # bring input data in nnunet specific format + # NOTE: only for nifti data as we hardcode the nnunet-formatted-filename (and extension) for now. + assert in_data.type.ftype == FileType.NIFTI + assert in_data.abspath.endswith('.nii.gz') + datalist = [in_data.abspath] + + if self.apply_center_crop: + in_dir_cropped = self.config.data.requestTempDir(label="monai-crop-in") + in_data_processed = os.path.join(in_dir_cropped, "image_cropped.nii.gz") + self.subprocess([sys.executable, f"{os.path.join(os.environ['BUNDLE_ROOT'], 'scripts', 'center_crop.py')}", + "--file_name", in_data.abspath, "--out_name",in_data_processed], text=True) + datalist = [in_data_processed] + + # define output folder (temp dir) and also override environment variable for nnunet + out_dir = self.config.data.requestTempDir(label="monai-model-out") + + bash_command = [sys.executable, + "-m", "monai.bundle", "run", "evaluating"] + bash_command += ["--meta_file", os.path.join(os.environ['BUNDLE_ROOT'], "configs", "metadata.json")] + bash_command += ["--config_file", os.path.join(os.environ['BUNDLE_ROOT'], "configs", "inference.json")] + bash_command += ["--datalist", str(datalist)] + bash_command += ["--output_dir", out_dir] + bash_command += ["--bundle_root", os.environ['BUNDLE_ROOT']] + bash_command += ["--dataloader#num_workers", "0"] + print(bash_command) + self.subprocess(bash_command, text=True) + + # get output data + out_path = glob.glob(os.path.join(out_dir, "**", + "*.nii.gz"), recursive=True)[0] + + if self.apply_center_crop: + out_dir_padded = self.config.data.requestTempDir(label="monai-padded-out") + out_data_padded = os.path.join(out_dir_padded, "seg_padded.nii.gz") + paddedFilter = sitk.ConstantPadImageFilter() + seg_image = sitk.ReadImage(out_path) + t2_image = sitk.ReadImage(in_data.abspath) + out_seg_shape = sitk.GetArrayFromImage(seg_image).shape + t2_image_shape = sitk.GetArrayFromImage(t2_image).shape + x_bound_lower = int((t2_image_shape[2] - out_seg_shape[2])/2) + x_bound_upper = int(int(t2_image_shape[2] - out_seg_shape[2])/2 + ((t2_image_shape[2] - out_seg_shape[2]) % 2)) + y_bound_lower = int((t2_image_shape[1] - out_seg_shape[1])/2) + y_bound_upper = int(int(t2_image_shape[1] - out_seg_shape[1])/2 + ((t2_image_shape[1] - out_seg_shape[1]) % 2)) + paddedFilter.SetConstant(0) + paddedFilter.SetPadLowerBound([x_bound_lower, y_bound_lower, 0]) + paddedFilter.SetPadUpperBound([x_bound_upper, y_bound_upper, 0]) + padded_img = paddedFilter.Execute(seg_image) + sitk.WriteImage(padded_img, out_data_padded) + out_path = out_data_padded + + # copy output data to instance + shutil.copyfile(out_path, out_data.abspath) diff --git a/models/monai_prostate158/utils/__init__.py b/models/monai_prostate158/utils/__init__.py new file mode 100644 index 00000000..d03d6b1f --- /dev/null +++ b/models/monai_prostate158/utils/__init__.py @@ -0,0 +1 @@ +from .Prostate158Runner import * \ No newline at end of file From f3be4986dadc50087f617f3f979520ae4437ef42 Mon Sep 17 00:00:00 2001 From: ccosmin97 Date: Fri, 15 Dec 2023 22:42:24 +0000 Subject: [PATCH 057/125] fixed dockerfile --- models/monai_prostate158/config/default.yml | 2 +- models/monai_prostate158/dockerfiles/{dockerfile => Dockerfile} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename models/monai_prostate158/dockerfiles/{dockerfile => Dockerfile} (100%) diff --git a/models/monai_prostate158/config/default.yml b/models/monai_prostate158/config/default.yml index d56939bc..e62ac281 100644 --- a/models/monai_prostate158/config/default.yml +++ b/models/monai_prostate158/config/default.yml @@ -24,7 +24,7 @@ modules: Prostate158Runner: in_data: nifti:mod=mr - apply_center_crop: False + apply_center_crop: True DsegConverter: source_segs: nifti:mod=seg:roi=PROSTATE_TRANSITION_ZONE,PROSTATE_PERIPHERAL_ZONE diff --git a/models/monai_prostate158/dockerfiles/dockerfile b/models/monai_prostate158/dockerfiles/Dockerfile similarity index 100% rename from models/monai_prostate158/dockerfiles/dockerfile rename to models/monai_prostate158/dockerfiles/Dockerfile From 49cdd7a0359c553039b1eac1cbcdb4488f66ca55 Mon Sep 17 00:00:00 2001 From: Miriam Groeneveld Date: Wed, 3 Jan 2024 10:51:38 +0100 Subject: [PATCH 058/125] PR comments on mata.json --- models/gc_grt123_lung_cancer/meta.json | 32 +++++++++++++++++++------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/models/gc_grt123_lung_cancer/meta.json b/models/gc_grt123_lung_cancer/meta.json index 000cd2ce..0abc7b87 100644 --- a/models/gc_grt123_lung_cancer/meta.json +++ b/models/gc_grt123_lung_cancer/meta.json @@ -3,7 +3,7 @@ "name": "lung_cancer_risk_estimation", "title": "Lung cancer risk estimation on thorax CT scans", "summary": { - "description": "This algorithm analyzes non-contrast CT scans of the thorax and predicts the lung cancer risk. ", + "description": "This algorithm analyzes non-contrast CT scans of the thorax and predicts the lung cancer risk. The model consists of two modules. The first one is a 3D region proposal network for nodule detection, which outputs all suspicious nodules for a subject. The second one selects the top five nodules based on the detection confidence, evaluates their cancer probabilities and combines them with a leaky noisy-or gate to obtain the probability of lung cancer for the subject", "inputs": [ { "label": "CT", @@ -71,19 +71,35 @@ "use": { "title": "Intended use", "text": "This algorithm analyzes non-contrast CT scans of the thorax and predicts the lung cancer risk. ", - "references": [], + "references": [ + { + "label": "Lung cancer risk estimation algorithm on grand-challenge", + "url": "https://grand-challenge.org/algorithms/dsb2017-grt123/" + } + ], "tables": [] }, "analyses": { - "title": "", - "text": "", + "title": "Evaluation", + "text": "The nodule detection was evaluated on the validation set of DSB. It contains data from 198 cases and there are 71 (7 nodules smaller than 6 mm are ruled out) nodules in total. The Free Response Operating Characteristic (FROC) is used to evaluate the performance and can be viewed in the publication. The Case classification is evaluated using the Area Under the Curve (AUC) metric.", "references": [], - "tables": [] + "tables": [{ + "label": "AUC for the Case classification", + "entries": { + "AUC training set": 0.90, + "AUC test set": 0.87 + } + }] }, "evaluation": { - "title": "", - "text": "", - "references": [], + "title": "Evaluation data", + "text": "The model was evaluated against a private dataset of 300 low-dose CT images. 150 patient scans were from the competition set and 150 were from an independent dataset. Both test datasets contained 50 cancer-positive scans and 100 cancer-negative scans.", + "references": [ + { + "label": "Evaluation paper", + "uri": "https://pubmed.ncbi.nlm.nih.gov/34870218/" + } + ], "tables": [] }, "training": { From 0b4ac0adb63241a45bc46a58eb0dd795bcaefd77 Mon Sep 17 00:00:00 2001 From: Miriam Groeneveld Date: Wed, 3 Jan 2024 13:34:20 +0100 Subject: [PATCH 059/125] PR comments on mata.json --- models/gc_grt123_lung_cancer/meta.json | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/models/gc_grt123_lung_cancer/meta.json b/models/gc_grt123_lung_cancer/meta.json index 0abc7b87..669de299 100644 --- a/models/gc_grt123_lung_cancer/meta.json +++ b/models/gc_grt123_lung_cancer/meta.json @@ -60,6 +60,10 @@ { "title": "JOURNAL OF LATEX CLASS FILES, VOL. 14, NO. 8, AUGUST 2015 1 Evaluate the Malignancy of Pulmonary Nodules Using the 3D Deep Leaky Noisy-or Network", "uri": "https://arxiv.org/pdf/1711.08324.pdf" + }, + { + "title": "Deep Learning for Lung Cancer Detection on Screening CT Scans: Results of a Large-Scale Public Competition and an Observer Study with 11 Radiologists", + "uri": "https://pubmed.ncbi.nlm.nih.gov/34870218/" } ], "github": "https://github.com/DIAGNijmegen/bodyct-dsb2017-grt123", @@ -70,11 +74,11 @@ "info": { "use": { "title": "Intended use", - "text": "This algorithm analyzes non-contrast CT scans of the thorax and predicts the lung cancer risk. ", + "text": "This algorithm analyzes non-contrast CT scans of the thorax and predicts the lung cancer risk.", "references": [ { "label": "Lung cancer risk estimation algorithm on grand-challenge", - "url": "https://grand-challenge.org/algorithms/dsb2017-grt123/" + "uri": "https://grand-challenge.org/algorithms/dsb2017-grt123/" } ], "tables": [] From c0d9076f8f5d7bb9fae1ec09fa6d3d23604fab51 Mon Sep 17 00:00:00 2001 From: Miriam Groeneveld Date: Wed, 3 Jan 2024 13:40:28 +0100 Subject: [PATCH 060/125] PR comments on mata.json --- models/gc_grt123_lung_cancer/meta.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/models/gc_grt123_lung_cancer/meta.json b/models/gc_grt123_lung_cancer/meta.json index 669de299..413557fc 100644 --- a/models/gc_grt123_lung_cancer/meta.json +++ b/models/gc_grt123_lung_cancer/meta.json @@ -90,8 +90,8 @@ "tables": [{ "label": "AUC for the Case classification", "entries": { - "AUC training set": 0.90, - "AUC test set": 0.87 + "AUC training set": "0.90", + "AUC test set": "0.87" } }] }, From f2532293eabd977455423475a9a91f73db21b215 Mon Sep 17 00:00:00 2001 From: Miriam Groeneveld Date: Wed, 3 Jan 2024 14:30:10 +0100 Subject: [PATCH 061/125] DSB and evaluation dataset --- models/gc_grt123_lung_cancer/meta.json | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/models/gc_grt123_lung_cancer/meta.json b/models/gc_grt123_lung_cancer/meta.json index 413557fc..b523b9e7 100644 --- a/models/gc_grt123_lung_cancer/meta.json +++ b/models/gc_grt123_lung_cancer/meta.json @@ -85,8 +85,11 @@ }, "analyses": { "title": "Evaluation", - "text": "The nodule detection was evaluated on the validation set of DSB. It contains data from 198 cases and there are 71 (7 nodules smaller than 6 mm are ruled out) nodules in total. The Free Response Operating Characteristic (FROC) is used to evaluate the performance and can be viewed in the publication. The Case classification is evaluated using the Area Under the Curve (AUC) metric.", - "references": [], + "text": "The nodule detection was evaluated on the validation set of Data Science Bowl 2017 challenge. It contains data from 198 cases and there are 71 (7 nodules smaller than 6 mm are ruled out) nodules in total. The Free Response Operating Characteristic (FROC) is used to evaluate the performance and can be viewed in the publication. The Case classification is evaluated using the Area Under the Curve (AUC) metric.", + "references": [{ + "label": "Data Science Bowl 2017 challenge", + "uri": "https://www.kaggle.com/c/data-science-bowl-2017" + }], "tables": [{ "label": "AUC for the Case classification", "entries": { @@ -97,7 +100,7 @@ }, "evaluation": { "title": "Evaluation data", - "text": "The model was evaluated against a private dataset of 300 low-dose CT images. 150 patient scans were from the competition set and 150 were from an independent dataset. Both test datasets contained 50 cancer-positive scans and 100 cancer-negative scans.", + "text": "The model was evaluated against a private dataset of 300 low-dose CT images, containing 100 cancer-positive scans and 200 cancer-negative scans.", "references": [ { "label": "Evaluation paper", From fb5f2c939d1c1be317936c61ede13f578874d860 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Thu, 11 Jan 2024 15:48:22 +0100 Subject: [PATCH 062/125] meta.json - update links and data details #27 --- models/gc_grt123_lung_cancer/meta.json | 75 +++++++++++++++++--------- 1 file changed, 51 insertions(+), 24 deletions(-) diff --git a/models/gc_grt123_lung_cancer/meta.json b/models/gc_grt123_lung_cancer/meta.json index b523b9e7..07024e69 100644 --- a/models/gc_grt123_lung_cancer/meta.json +++ b/models/gc_grt123_lung_cancer/meta.json @@ -3,7 +3,7 @@ "name": "lung_cancer_risk_estimation", "title": "Lung cancer risk estimation on thorax CT scans", "summary": { - "description": "This algorithm analyzes non-contrast CT scans of the thorax and predicts the lung cancer risk. The model consists of two modules. The first one is a 3D region proposal network for nodule detection, which outputs all suspicious nodules for a subject. The second one selects the top five nodules based on the detection confidence, evaluates their cancer probabilities and combines them with a leaky noisy-or gate to obtain the probability of lung cancer for the subject", + "description": "This algorithm analyzes non-contrast CT scans of the thorax and predicts the lung cancer risk. The model consists of two modules. The first one is a 3D region proposal network for nodule detection, which outputs all suspicious nodules for a subject. The second one selects the top five nodules based on the detection confidence, evaluates their cancer probabilities and combines them with a leaky noisy-or gate to obtain the probability of lung cancer for the subject. This model was the winner of the Data Science Bowl 2017 competition hosted on Kaggle.", "inputs": [ { "label": "CT", @@ -32,7 +32,7 @@ }, "data": { "training": { - "vol_samples": 2285 + "vol_samples": 2483 }, "evaluation": { "vol_samples": 506 @@ -51,15 +51,15 @@ "code": "2023-07-04", "pub": "2017-11-22" }, - "cite": "Liao F, Liang M, Li Z, Hu X, Song S. Evaluate the Malignancy of Pulmonary Nodules Using the 3-D Deep Leaky Noisy-OR Network. IEEE Trans Neural Netw Learning Syst. 2019;30(11):3484-3495.", + "cite": "F. Liao, M. Liang, Z. Li, X. Hu and S. Song, 'Evaluate the Malignancy of Pulmonary Nodules Using the 3D Deep Leaky Noisy-or Network', in IEEE Transactions on Neural Networks and Learning Systems, vol. 30, no. 11, pp. 3484-3495, Nov. 2019, doi: 10.1109/TNNLS.2019.2892409.", "license": { "code": "MIT", - "weights": "" + "weights": "MIT" }, "publications": [ { - "title": "JOURNAL OF LATEX CLASS FILES, VOL. 14, NO. 8, AUGUST 2015 1 Evaluate the Malignancy of Pulmonary Nodules Using the 3D Deep Leaky Noisy-or Network", - "uri": "https://arxiv.org/pdf/1711.08324.pdf" + "title": "Evaluate the Malignancy of Pulmonary Nodules Using the 3D Deep Leaky Noisy-or Network", + "uri": "https://ieeexplore.ieee.org/abstract/document/8642524" }, { "title": "Deep Learning for Lung Cancer Detection on Screening CT Scans: Results of a Large-Scale Public Competition and an Observer Study with 11 Radiologists", @@ -74,44 +74,67 @@ "info": { "use": { "title": "Intended use", - "text": "This algorithm analyzes non-contrast CT scans of the thorax and predicts the lung cancer risk.", + "text": "This algorithm analyzes non-contrast CT scans of the thorax, first it segments the lungs, subsequently it detects lung nodules within the lungs, and finally it predicts the lung cancer risk for the individual nodules and the scan as a whole. The algorithm is also hosted on Grand Challenge [1] and was the winner of the Data Science Bowl 2017 challenge on Kaggle [2]. ", "references": [ { "label": "Lung cancer risk estimation algorithm on grand-challenge", "uri": "https://grand-challenge.org/algorithms/dsb2017-grt123/" + }, + { + "label": "Data Science Bowl 2017 challenge", + "uri": "https://www.kaggle.com/c/data-science-bowl-2017" } ], "tables": [] }, "analyses": { "title": "Evaluation", - "text": "The nodule detection was evaluated on the validation set of Data Science Bowl 2017 challenge. It contains data from 198 cases and there are 71 (7 nodules smaller than 6 mm are ruled out) nodules in total. The Free Response Operating Characteristic (FROC) is used to evaluate the performance and can be viewed in the publication. The Case classification is evaluated using the Area Under the Curve (AUC) metric.", - "references": [{ - "label": "Data Science Bowl 2017 challenge", - "uri": "https://www.kaggle.com/c/data-science-bowl-2017" - }], - "tables": [{ - "label": "AUC for the Case classification", - "entries": { - "AUC training set": "0.90", - "AUC test set": "0.87" + "text": "The evaluation of the model was done on the Data Science Bowl 2017 (DSB) dataset hosted on Kaggle [1] (this is no longer publicly available). The nodule detection was evaluated on the validation of the DSB dataset, which contained data from 198 cases and there were 71 (7 nodules smaller than 6 mm are ruled out) nodules in total. The Free Response Operating Characteristic (FROC) is used to evaluate the performance of the nodule detection. The case cancer classification was evaluated using the Area Under the Curve (AUC) metric on the training set and the testing set of respectively 1397 and 506 patient cases. The AUC and FROC graphs can be viewed in the publication [2]. For the final evaluation on the Data Science Bowl 2017 challenge, the model's performance was evaluated using the logistic loss on a private external dataset of 300 low-dose CT images [3], containing 100 cancer-positive scans and 200 cancer-negative scans. See tables for a summary of the results.", + "references": [ + { + "label": "Data Science Bowl 2017 challenge", + "uri": "https://www.kaggle.com/c/data-science-bowl-2017" + }, + { + "label": "Evaluate the Malignancy of Pulmonary Nodules Using the 3D Deep Leaky Noisy-or Network", + "uri": "https://ieeexplore.ieee.org/abstract/document/8642524" + }, + { + "label": "Evaluation paper external dataset Data Science Bowl 2017", + "uri": "https://pubmed.ncbi.nlm.nih.gov/34870218/" } - }] + ], + "tables": [ + { + "label": "Case cancer classification results on the DSB 2017 dataset", + "entries": { + "AUC on training set": "0.90", + "AUC on test set": "0.87", + "Logistic loss on test set": "0.39975" + } + }, + { + "label": "Case cancer classification results on private external evaluation dataset.", + "entries": { + "AUC on all scans": "0.877 (95% CI: 0.842, 0.910)" + } + } + ] }, "evaluation": { "title": "Evaluation data", - "text": "The model was evaluated against a private dataset of 300 low-dose CT images, containing 100 cancer-positive scans and 200 cancer-negative scans.", + "text": "The model was evaluated on the testing set of 506 patient cases the Data Science Bowl 2017 (DSB) hosted on Kaggle [1] (this is no longer publicly available). ", "references": [ { - "label": "Evaluation paper", - "uri": "https://pubmed.ncbi.nlm.nih.gov/34870218/" + "label": "Data Science Bowl 2017 challenge", + "uri": "https://www.kaggle.com/c/data-science-bowl-2017" } ], "tables": [] }, "training": { "title": "Training data", - "text": "The Luna dataset includes the images from the LIDC/IDRI dataset in a different format, together with additional annotations. The LIDC/IDRI dataset is available at https://wiki.cancerimagingarchive.net/display/Public/LIDC-IDRI under a Creative Commons Attribution 3.0 Unported License.", + "text": "Two lung scan datasets were used to train the model: the LUng Nodule Analysis 2016 (LUNA16) dataset [1] [2] and the training set of the Data Science Bowl 2017 (DSB) hosted on Kaggle [3] (this is no longer publicly available). Nodules smaller than 6 mm were removed from the LUNA16 annotations before training. The LUNA16 dataset includes 1186 nodule labels in 888 patient cases annotated by radiologists. The DSB dataset includes 1397 and 198 patient cases in its training and validation sets respectively. The LUNA16 dataset is a subset from the images from the LIDC/IDRI dataset [3] that is available under a Creative Commons Attribution 3.0 Unported License.", "references": [ { "label": "LUng Nodule Analysis 2016 dataset part 1", @@ -122,8 +145,12 @@ "uri": "https://zenodo.org/record/4121926" }, { - "label": "Data Science Bowl 2017 dataset", - "uri": "https://www.kaggle.com/competitions/data-science-bowl-2017/data" + "label": "Data Science Bowl 2017 challenge", + "uri": "https://www.kaggle.com/c/data-science-bowl-2017" + }, + { + "label": "The LIDC/IDRI dataset", + "uri": "https://www.cancerimagingarchive.net/collection/lidc-idri/" } ], "tables": [] From 4cca7fb1e1a386dee6d166d7999b39af8b861e6d Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Fri, 12 Jan 2024 11:33:29 +0100 Subject: [PATCH 063/125] fix missing default value for nnunet model configurable in ProstateRunner module --- models/nnunet_prostate_zonal_task05/utils/ProstateRunner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/nnunet_prostate_zonal_task05/utils/ProstateRunner.py b/models/nnunet_prostate_zonal_task05/utils/ProstateRunner.py index e450d490..1feca9e7 100644 --- a/models/nnunet_prostate_zonal_task05/utils/ProstateRunner.py +++ b/models/nnunet_prostate_zonal_task05/utils/ProstateRunner.py @@ -2,7 +2,7 @@ from mhubio.core import Module, Instance, InstanceData, IO @IO.Config('use_tta', bool, False, the='flag to enable test time augmentation') -@IO.Config('nnunet_model', str, None, the='nnunet model name (2d, 3d_lowres, 3d_fullres, 3d_cascade_fullres)') +@IO.Config('nnunet_model', str, '3d_fullres', the='nnunet model name (2d, 3d_lowres, 3d_fullres, 3d_cascade_fullres)') class ProstateRunner(Module): use_tta: bool From 15123ca88f9a1c07d0a7fb632d2e96d6abe9f225 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Fri, 12 Jan 2024 23:55:52 +0100 Subject: [PATCH 064/125] move main import inside the task method of the runner to squelch import print statement #27 --- .../utils/LungCancerClassifierRunner.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py b/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py index 56181b72..803a8126 100644 --- a/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py +++ b/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py @@ -17,9 +17,6 @@ import torch -# Import the main module for the grt123 algorithm, which must be used for running the classification -import main - @ValueOutput.Name('lncancerprob') @ValueOutput.Meta(Meta(min=0.0, max=1.0, type="probability")) @@ -84,6 +81,9 @@ def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData self.log("Running on the CPU, might be slow...", "NOTICE") n_gpu = 0 + # Import the main module for the grt123 algorithm, which must be used for running the classification + import main + # apply grt123 algorithm results = main.main( skip_detect=False, From 55e34cfe2c6417043222dfe3521546db26295f44 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Mon, 15 Jan 2024 13:22:06 +0100 Subject: [PATCH 065/125] add comments to CLI, add clean method and case-level likelihood extraction to runner, add case-level likelihood to config and meta #39 --- models/gc_nnunet_pancreas/config/default.yml | 15 ++++- models/gc_nnunet_pancreas/meta.json | 15 +++-- .../utils/GCNNUnetPancreasRunner.py | 55 +++++++++++++++++-- models/gc_nnunet_pancreas/utils/cli.py | 11 ++-- 4 files changed, 79 insertions(+), 17 deletions(-) diff --git a/models/gc_nnunet_pancreas/config/default.yml b/models/gc_nnunet_pancreas/config/default.yml index 526099e1..ca3a2a3e 100644 --- a/models/gc_nnunet_pancreas/config/default.yml +++ b/models/gc_nnunet_pancreas/config/default.yml @@ -1,13 +1,14 @@ general: version: 1.0 data_base_dir: /app/data - description: base configuration for GC NNUnet Pancreas model (dicom to dicom) + description: base configuration for GC NNUnet Pancreas model (dicom to dicom, and json output) execute: - DicomImporter - MhaConverter - GCNNUnetPancreasRunner - DsegConverter +- ReportExporter - DataOrganizer modules: @@ -24,12 +25,20 @@ modules: DsegConverter: model_name: 'GC NNUnet Pancreas' - source_segs: ['mha:mod=seg'] + source_segs: ['mha:mod=seg:src=cleaned'] target_dicom: dicom:mod=ct skip_empty_slices: True + ReportExporter: + format: compact + includes: + - data: prostate_cancer_likelihood + label: prostate_cancer_likelihood + value: value + DataOrganizer: targets: - mha:mod=heatmap-->[i:sid]/nnunet_pancreas_heatmap.mha - - mha:mod=seg-->[i:sid]/nnunet_pancreas.seg.mha + - mha:mod=seg:src=cleaned-->[i:sid]/nnunet_pancreas.seg.mha - dicomseg:mod=seg-->[i:sid]/nnunet_pancreas.seg.dcm + - json:mod=report-->[i:sid]/nnunet_pancreas_case_level_likelihood.json diff --git a/models/gc_nnunet_pancreas/meta.json b/models/gc_nnunet_pancreas/meta.json index bce7005b..b24823be 100644 --- a/models/gc_nnunet_pancreas/meta.json +++ b/models/gc_nnunet_pancreas/meta.json @@ -24,16 +24,21 @@ "arteries", "pancreas", "pancreatic duct", - "bile duct", - "cysts", - "renal vein" + "bile duct" ] }, { "type": "Prediction", - "valueType": "number", + "valueType": "Likelihood map", + "label": "Pancreatic tumor likelihood heatmap", + "description": "Pancreatic tumor likelihood heatmap, where each voxel represents a floating point in range [0,1].", + "classes": [] + }, + { + "type": "Prediction", + "valueType": "Likelihood", "label": "Pancreatic tumor likelihood", - "description": "Pancreatic tumor likelihood map with values between 0 and 1", + "description": "Case-level pancreatic tumor likelihood value with a value in range [0,1].", "classes": [] } ], diff --git a/models/gc_nnunet_pancreas/utils/GCNNUnetPancreasRunner.py b/models/gc_nnunet_pancreas/utils/GCNNUnetPancreasRunner.py index 6ffa6844..9942705c 100644 --- a/models/gc_nnunet_pancreas/utils/GCNNUnetPancreasRunner.py +++ b/models/gc_nnunet_pancreas/utils/GCNNUnetPancreasRunner.py @@ -9,32 +9,77 @@ ----------------------------------------------------------- """ -from mhubio.core import Module, Instance, InstanceData, DataType, Meta, IO +from mhubio.core import Module, Instance, InstanceData, DataType, Meta, IO, ValueOutput from pathlib import Path import SimpleITK -import numpy as np import sys CLI_PATH = Path(__file__).parent / "cli.py" +@ValueOutput.Name('prostate_cancer_likelihood') +@ValueOutput.Label('ProstateCancerLikelihood') +@ValueOutput.Meta(Meta(min=0.0, max=1.0, type="likelihood")) +@ValueOutput.Type(float) +@ValueOutput.Description('Likelihood of case-level prostate cancer.') +class ProstateCancerLikelihood(ValueOutput): + pass + + class GCNNUnetPancreasRunner(Module): @IO.Instance() @IO.Input('in_data', 'mha:mod=ct', the="input data") @IO.Output('heatmap', 'heatmap.mha', 'mha:mod=heatmap:model=GCNNUnetPancreas', data="in_data", the="heatmap of the pancreatic tumor likelihood") - @IO.Output('segmentation', 'segmentation.mha', 'mha:mod=seg:model=GCNNUnetPancreas:roi=VEIN,ARTERY,PANCREAS,PANCREATIC_DUCT,BILE_DUCT,PANCREAS+CYST,RENAL_VEIN', data="in_data", + @IO.Output('segmentation_raw', 'segmentation_raw.mha', 'mha:mod=seg:src=original:model=GCNNUnetPancreas:roi=VEIN,ARTERY,PANCREAS,PANCREATIC_DUCT,BILE_DUCT,PANCREAS+CYST,RENAL_VEIN', data="in_data", the="original segmentation of the pancreas, with the following classes: " "0-background, 1-veins, 2-arteries, 3-pancreas, 4-pancreatic duct, 5-bile duct, 6-cysts, 7-renal vein") - def task(self, instance: Instance, in_data: InstanceData, heatmap: InstanceData, segmentation: InstanceData, **kwargs) -> None: + @IO.Output('segmentation', 'segmentation.mha', 'mha:mod=seg:src=cleaned:model=GCNNUnetPancreas:roi=VEIN,ARTERY,PANCREAS,PANCREATIC_DUCT,BILE_DUCT', data="in_data", + the="cleaned segmentation of the pancreas, with the following classes: " + "0-background, 1-veins, 2-arteries, 3-pancreas, 4-pancreatic duct, 5-bile duct") + @IO.OutputData('cancer_likelihood', ProstateCancerLikelihood, the='Case-level pancreatic tumor likelihood. This is equivalent to the maximum of the pancreatic tumor likelihood heatmap.') + def task(self, instance: Instance, in_data: InstanceData, heatmap: InstanceData, segmentation_raw: InstanceData, segmentation: InstanceData, cancer_likelihood: ProstateCancerLikelihood, **kwargs) -> None: # Call the PDAC CLI + # A CLI was used here to ensure the mhub framework properly captures the nnUNet stdout output cmd = [ sys.executable, str(CLI_PATH), in_data.abspath, heatmap.abspath, - segmentation.abspath + segmentation_raw.abspath ] self.subprocess(cmd, text=True) + + # Remove cysts and renal vein classes from the original segmentation. + # Insufficient training samples were present in the training data for these classes. + # Hence, these classes should be omitted from the final output, since these are not + # expected to produce reliable segmentations. + cancer_likelihood.value = self.clean_segementation( + segmentation_in=segmentation_raw, + segmentation_out=segmentation + ) + + # Extract case-level cancer likelihood + cancer_likelihood.value = self.extract_case_level_cancer_likelihood( + heatmap=heatmap + ) + + def clean_segementation(self, segmentation_in: InstanceData, segmentation_out: InstanceData): + self.log("Cleaning output segmentation", level="NOTICE") + seg_sitk = SimpleITK.ReadImage(segmentation_in.abspath) + seg_numpy = SimpleITK.GetArrayFromImage(seg_sitk) + seg_numpy[seg_numpy >= 6] = 0 # remove cysts and renal vein segmentation from original segmentation + remapped_sitk = SimpleITK.GetImageFromArray(seg_numpy) + remapped_sitk.CopyInformation(seg_sitk) + SimpleITK.WriteImage(remapped_sitk, segmentation_out.abspath, True) + + def extract_case_level_cancer_likelihood(self, heatmap: InstanceData): + self.log("Extracting case-level cancer likelihood", level="NOTICE") + heatmap_sitk = SimpleITK.ReadImage(heatmap.abspath) + f = SimpleITK.MinimumMaximumImageFilter() + f.Execute(heatmap_sitk) + cancer_likelihood = f.GetMaximum() + assert 0.0 <= cancer_likelihood <= 1.0, "Cancer likelihood value must be in range [0.0, 1.0]" + return cancer_likelihood diff --git a/models/gc_nnunet_pancreas/utils/cli.py b/models/gc_nnunet_pancreas/utils/cli.py index 460b5a64..b12ef31f 100644 --- a/models/gc_nnunet_pancreas/utils/cli.py +++ b/models/gc_nnunet_pancreas/utils/cli.py @@ -1,12 +1,15 @@ """ ----------------------------------------------------- +------------------------------------------------------------- GC / MHub - CLI for the GC nnUnet Pancreas Algorithm ----------------------------------------------------- + The model algorith was wrapped in a CLI to ensure + the mhub framework is able to properly capture the nnUNet + stdout/stderr outputs +------------------------------------------------------------- ----------------------------------------------------- +------------------------------------------------------------- Author: Sil van de Leemput Email: sil.vandeleemput@radboudumc.nl ----------------------------------------------------- +------------------------------------------------------------- """ import argparse from pathlib import Path From 9c9508dd74cb64106197ea7604b1d80251e39d36 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Mon, 15 Jan 2024 22:23:34 +0100 Subject: [PATCH 066/125] meta.json - update analysis section and evaluation data section #39 --- models/gc_nnunet_pancreas/meta.json | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/models/gc_nnunet_pancreas/meta.json b/models/gc_nnunet_pancreas/meta.json index b24823be..924857d5 100644 --- a/models/gc_nnunet_pancreas/meta.json +++ b/models/gc_nnunet_pancreas/meta.json @@ -93,20 +93,33 @@ }, "analyses": { "title": "Analysis", - "text": "The study evaluated a medical model's performance for tumor detection by analyzing receiver operating characteristic (ROC) and free-response receiver operating characteristic (FROC) curves, assessing both tumor presence and lesion localization, and compared three configurations using statistical tests and ensemble modeling.", - "references": [], - "tables": [] + "text": "The study evaluated a medical model's performance for tumor detection by analyzing receiver operating characteristic (ROC) and free-response receiver operating characteristic (FROC) curves, assessing both tumor presence and lesion localization, and compared three configurations using statistical tests and ensemble modeling. The table below lists the model's performance on an external evaluation dataset of 361 cases. Additional analysis details and results can be found in the original paper [1].", + "references": [ + { + "label": "Fully Automatic Deep Learning Framework for Pancreatic Ductal Adenocarcinoma Detection on Computed Tomography", + "uri": "https://www.mdpi.com/2072-6694/14/2/376" + } + ], + "tables": [ + { + "label": "Evaluation results of the nnUnet_MS model on the external test set of 361 cases.", + "entries": { + "Mean AUC-ROC (95% CI)": "0.991 (0.970-1.0)", + "Mean pAUC-FROC (95% CI)": "3.996 (3.027-4.965)" + } + } + ] }, "evaluation": { "title": "Evaluation Data", - "text": "This framework was tested in an independent, external cohort consisting of two publicly available datasets.", + "text": "This framework was tested in an independent, external cohort consisting of two publicly available datasets of respectively 281 and 80 patients each. The Medical Segmentation Decathlon pancreas dataset (training portion) [1] consisting of 281 patients with pancreatic malignancies (including lesions in the head, neck, body, and tail of the pancreas) and voxel-level annotations for the pancreas and lesion. The Cancer Imaging Archive dataset from the US National Institutes of Health Clinical Center [2], containing 80 patients with normal pancreas and respective voxel-level annotations.", "references": [ { - "label": "The Medical Segmentation Decathlon pancreas dataset (training portion) consisting of 281 patients with pancreatic malignancies (including lesions in the head, neck, body, and tail of the pancreas) and voxel-level annotations for the pancreas and lesion.", + "label": "The Medical Segmentation Decathlon pancreas dataset (training portion)", "uri": "http://medicaldecathlon.com/" }, { - "label": "The Cancer Imaging Archive dataset from the US National Institutes of Health Clinical Center, containing 80 patients with normal pancreas and respective voxel-level annotations.", + "label": "The Cancer Imaging Archive dataset from the US National Institutes of Health Clinical Center", "uri": "https://wiki.cancerimagingarchive.net/display/Public/Pancreas-CT" } ], From 38514a041c199aad60fc99f10aed2aa63e1770e6 Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Tue, 16 Jan 2024 09:56:35 +0100 Subject: [PATCH 067/125] update fs importer (omit wrapping dicom folder) --- models/nnunet_prostate_zonal_task05/config/default.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/models/nnunet_prostate_zonal_task05/config/default.yml b/models/nnunet_prostate_zonal_task05/config/default.yml index 8c82102c..84aadbfa 100644 --- a/models/nnunet_prostate_zonal_task05/config/default.yml +++ b/models/nnunet_prostate_zonal_task05/config/default.yml @@ -16,9 +16,9 @@ modules: outsource_instances: True import_id: patientID/studyID structures: - - $patientID/$studyID@instance/$part@/dicom@dicom - - $patientID@instance:studyID=none/ADC$part@/dicom@dicom - - $patientID@instance:studyID=none/T2$part@/dicom@dicom + - $patientID/$studyID@instance/$part@bundle@dicom + - $patientID@instance:studyID=none/ADC$part@bundle@dicom + - $patientID@instance:studyID=none/T2$part@bundle@dicom NiftiConverter: in_datas: dicom:part=ADC|T2 @@ -36,5 +36,5 @@ modules: DataOrganizer: targets: - DICOMSEG:mod=seg-->[i:patientID]/[i:studyID]/nnunet_prostate_zonal_task05.seg.dcm - - NIFTI:mod=seg-->[i:patientID]/[i:studyID]/results.nii.gz - - LOG-->[i:patientID]/[i:studyID]/logs/[d:part]/[d:log-task]_[path] \ No newline at end of file +# - NIFTI:mod=seg-->[i:patientID]/[i:studyID]/results.nii.gz +# - LOG-->[i:patientID]/[i:studyID]/logs/[d:part]/[basename] \ No newline at end of file From cdf9a0bbbf43cbaac66480f96df1e0cd8e81c18b Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Tue, 16 Jan 2024 10:35:02 +0100 Subject: [PATCH 068/125] update github checks, include new test for correct model name in model meta file --- .github/scripts/mhub_check.py | 5 ++++- .github/scripts/utils.py | 10 ++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/.github/scripts/mhub_check.py b/.github/scripts/mhub_check.py index 6a1c0c2e..48cc162c 100644 --- a/.github/scripts/mhub_check.py +++ b/.github/scripts/mhub_check.py @@ -34,9 +34,12 @@ # check folder structure utils.validateModelFolder(base='models', model_name=model_name) - # check meta.json + # check meta.json (schema) utils.validateModelMetaJson(model_meta_json_file=os.path.join('models', model_name, 'meta.json')) + # check additional requirements for meta.json + utils.validateModelMetaJson_modelName(model_meta_json_file=os.path.join('models', model_name, 'meta.json'), model_name=model_name) + # validate dockerfile utils.validateDockerfile(base='models', model_name=model_name) diff --git a/.github/scripts/utils.py b/.github/scripts/utils.py index eb0eddb6..6da3e60c 100644 --- a/.github/scripts/utils.py +++ b/.github/scripts/utils.py @@ -110,6 +110,16 @@ def validateModelMetaJson(model_meta_json_file: str): except jsonschema.ValidationError as e: raise MHubComplianceError(f"Model meta json is not compliant with the schema: {e.message}", DocuRef.MODEL_META_JSON) +def validateModelMetaJson_modelName(model_meta_json_file: str, model_name: str): + + # load model meta json + with open(model_meta_json_file, "r") as f: + model_meta_json = json.load(f) + + # check that the model name is correct + if model_meta_json["name"] != model_name: + raise MHubComplianceError(f"Model name in meta.json does not match model name in folder structure: {model_meta_json['name']} != {model_name}", DocuRef.MODEL_META_JSON) + def validateDockerfile(base: str, model_name: str): # get dockerfile path From 4969367701b8c3dc17140535b77bd3bb292d8402 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Tue, 16 Jan 2024 14:26:35 +0100 Subject: [PATCH 069/125] updated model/algorithm version to latest commit, removed manual code patches #39 --- models/gc_nnunet_pancreas/dockerfiles/Dockerfile | 14 +++----------- models/gc_nnunet_pancreas/utils/cli.py | 2 +- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/models/gc_nnunet_pancreas/dockerfiles/Dockerfile b/models/gc_nnunet_pancreas/dockerfiles/Dockerfile index 74cd3946..4b84e0f3 100644 --- a/models/gc_nnunet_pancreas/dockerfiles/Dockerfile +++ b/models/gc_nnunet_pancreas/dockerfiles/Dockerfile @@ -12,20 +12,12 @@ RUN apt update && \ rm -rf /var/lib/apt/lists/* # Install the model weights and the algorithm files -# * Pull algorithm from repo into /opt/algorithm (main branch, commit e4f4008c6e18e60a79f693448562a340a9252aa8) +# * Pull algorithm from repo into /opt/algorithm (main branch, commit 2d74e98f66f0a57da66ed26e97448e53571199db) # * Remove .git folder to keep docker layer small -# * Replace input images path in process.py with an existing folder to avoid errors -# * Add specific data types and compression options to output data structures in process.py to reduce generated output footprint RUN git clone https://github.com/DIAGNijmegen/CE-CT_PDAC_AutomaticDetection_nnUnet.git /opt/algorithm && \ cd /opt/algorithm && \ - git reset --hard e4f4008c6e18e60a79f693448562a340a9252aa8 && \ - rm -rf /opt/algorithm/.git && \ - sed -i 's/Path("\/input\/images\/")/Path("\/app")/g' /opt/algorithm/process.py && \ - sed -i 's/pred_2_np = sitk\.GetArrayFromImage(pred_2_nii)/pred_2_np = sitk\.GetArrayFromImage(pred_2_nii)\.astype(np\.uint8)/g' /opt/algorithm/process.py && \ - sed -i 's/pm_image = np\.zeros(image_np\.shape)/pm_image = np\.zeros(image_np\.shape, dtype=np\.float32)/g' /opt/algorithm/process.py && \ - sed -i 's/segmentation_np = np\.zeros(image_np\.shape)/segmentation_np = np\.zeros(image_np\.shape, dtype=np\.uint8)/g' /opt/algorithm/process.py && \ - sed -i 's/sitk\.WriteImage(segmentation_image, str(self\.segmentation))/sitk\.WriteImage(segmentation_image, str(self\.segmentation), True)/g' /opt/algorithm/process.py && \ - sed -i 's/sitk\.WriteImage(pred_itk_resampled, str(self\.heatmap))/sitk\.WriteImage(pred_itk_resampled, str(self\.heatmap), True)/g' /opt/algorithm/process.py + git reset --hard 2d74e98f66f0a57da66ed26e97448e53571199db && \ + rm -rf /opt/algorithm/.git # Set this environment variable as a shortcut to avoid nnunet 1.7.0 crashing the build # by pulling sklearn instead of scikit-learn diff --git a/models/gc_nnunet_pancreas/utils/cli.py b/models/gc_nnunet_pancreas/utils/cli.py index b12ef31f..67181709 100644 --- a/models/gc_nnunet_pancreas/utils/cli.py +++ b/models/gc_nnunet_pancreas/utils/cli.py @@ -23,7 +23,7 @@ def run_pdac_detection( ): # Configure the algorithm pipeline class and run it algorithm = PDACDetectionContainer() - algorithm.ct_image = str(input_ct_image) # set as str not Path + algorithm.ct_image = input_ct_image algorithm.heatmap = output_heatmap algorithm.segmentation = output_segmentation algorithm.process() From 1b248256ba62d17124a702f518e34e5b4e967dfb Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Tue, 16 Jan 2024 14:30:00 +0100 Subject: [PATCH 070/125] meta.json - match model name #39 --- models/gc_nnunet_pancreas/meta.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/gc_nnunet_pancreas/meta.json b/models/gc_nnunet_pancreas/meta.json index 924857d5..9fdba73a 100644 --- a/models/gc_nnunet_pancreas/meta.json +++ b/models/gc_nnunet_pancreas/meta.json @@ -1,6 +1,6 @@ { "id": "bf7ae4bb-c6f5-4b1e-89aa-a8de246def57", - "name": "pdac_detection_in_ct", + "name": "gc_nnunet_pancreas", "title": "Pancreatic Ductal Adenocarcinoma Detection in CT", "summary": { "description": "This algorithm produces a tumor likelihood heatmap for the presence of pancreatic ductal adenocarcinoma (PDAC) in an input venous-phase contrast-enhanced computed tomography scan (CECT). Additionally, the algorithm provides the segmentation of multiple surrounding anatomical structures such as the pancreatic duct, common bile duct, veins and arteries. The heatmap and segmentations are resampled to the same spatial resolution and physical dimensions as the input CECT image for easier visualisation.", From 62871f2b24515bdf5289fced3a333cdd5b22bc72 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Thu, 18 Jan 2024 10:32:06 +0100 Subject: [PATCH 071/125] meta.json - matched model name, updated output label and description --- models/gc_grt123_lung_cancer/meta.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/models/gc_grt123_lung_cancer/meta.json b/models/gc_grt123_lung_cancer/meta.json index 07024e69..27ee2088 100644 --- a/models/gc_grt123_lung_cancer/meta.json +++ b/models/gc_grt123_lung_cancer/meta.json @@ -1,6 +1,6 @@ { "id": "2e67a3cc-4680-4058-bf4e-f965cf50f06f", - "name": "lung_cancer_risk_estimation", + "name": "gc_grt123_lung_cancer", "title": "Lung cancer risk estimation on thorax CT scans", "summary": { "description": "This algorithm analyzes non-contrast CT scans of the thorax and predicts the lung cancer risk. The model consists of two modules. The first one is a 3D region proposal network for nodule detection, which outputs all suspicious nodules for a subject. The second one selects the top five nodules based on the detection confidence, evaluates their cancer probabilities and combines them with a leaky noisy-or gate to obtain the probability of lung cancer for the subject. This model was the winner of the Data Science Bowl 2017 competition hosted on Kaggle.", @@ -20,8 +20,8 @@ { "type": "Prediction", "valueType": "number", - "label": "Cancer probability score", - "description": "Probability that the scan contains cancer nodules", + "label": "Lung thorax cancer nodule probability score", + "description": "The likelihood of the presence of cancer nodules in the lungs.", "classes": [] } ], From 4169241c3021bc94c9240f54dc0d81a64b323d3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Leonard=20N=C3=BCrnberg?= Date: Thu, 18 Jan 2024 11:57:18 +0100 Subject: [PATCH 072/125] Create submission_review.yml Create a new workflow that automatically applies the `REQUEST REVIEW` label when starting a comment with `/review`. External collaborators without repository write access can use this magic keyword to alert the MHub team that a submission PR is ready to be reviewed. --- .github/workflows/submission_review.yml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 .github/workflows/submission_review.yml diff --git a/.github/workflows/submission_review.yml b/.github/workflows/submission_review.yml new file mode 100644 index 00000000..38544b41 --- /dev/null +++ b/.github/workflows/submission_review.yml @@ -0,0 +1,21 @@ +name: MHub Contribution Magic Keywords + +on: + issue_comment: + types: [created, edited] + +jobs: + autolabel: + if: ${{ github.event.issue.pull_request }} + name: Add labels to PR + runs-on: [ubuntu-latest] + + permissions: + pull-requests: write + + steps: + - name: Add Request Review Label + uses: actions-ecosystem/action-add-labels@v1 + if: ${{ startsWith(github.event.comment.body, '/review') }} + with: + labels: REQUEST REVIEW From 2287b3736e94fba178c2f03c2ae479c1c2c989c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Leonard=20N=C3=BCrnberg?= Date: Thu, 18 Jan 2024 12:30:36 +0100 Subject: [PATCH 073/125] Update submission_review.yml Add a new magic keyword to set the `REQUEST TEST` label automatically. --- .github/workflows/submission_review.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/submission_review.yml b/.github/workflows/submission_review.yml index 38544b41..8631a91d 100644 --- a/.github/workflows/submission_review.yml +++ b/.github/workflows/submission_review.yml @@ -19,3 +19,9 @@ jobs: if: ${{ startsWith(github.event.comment.body, '/review') }} with: labels: REQUEST REVIEW + + - name: Add Request Test Label + uses: actions-ecosystem/action-add-labels@v1 + if: ${{ startsWith(github.event.comment.body, '/test') }} + with: + labels: REQUEST TEST From 561883ba1e65fc5be2dae57dd37a2ed82de263bb Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Thu, 18 Jan 2024 14:28:16 +0100 Subject: [PATCH 074/125] meta.json - added disclaimer for output segmentation map --- models/gc_nnunet_pancreas/meta.json | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/models/gc_nnunet_pancreas/meta.json b/models/gc_nnunet_pancreas/meta.json index 9fdba73a..96b8ccbc 100644 --- a/models/gc_nnunet_pancreas/meta.json +++ b/models/gc_nnunet_pancreas/meta.json @@ -17,16 +17,6 @@ } ], "outputs": [ - { - "type": "Segmentation", - "classes": [ - "veins", - "arteries", - "pancreas", - "pancreatic duct", - "bile duct" - ] - }, { "type": "Prediction", "valueType": "Likelihood map", @@ -40,6 +30,17 @@ "label": "Pancreatic tumor likelihood", "description": "Case-level pancreatic tumor likelihood value with a value in range [0,1].", "classes": [] + }, + { + "type": "Segmentation", + "label": "Segmentation of pancreas related tissues. These segmentation classes were not thoroughly validated, use them on your own risk!", + "classes": [ + "veins", + "arteries", + "pancreas", + "pancreatic duct", + "bile duct" + ] } ], "model": { From 02af716e84ceda607d30eec786d8771c3505611b Mon Sep 17 00:00:00 2001 From: Cosmin Ciausu Date: Fri, 19 Jan 2024 17:04:17 -0500 Subject: [PATCH 075/125] Update default.yml -- Modality tag instead of mod --- models/nnunet_prostate_task24/config/default.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/nnunet_prostate_task24/config/default.yml b/models/nnunet_prostate_task24/config/default.yml index 0df38e60..bde6f648 100644 --- a/models/nnunet_prostate_task24/config/default.yml +++ b/models/nnunet_prostate_task24/config/default.yml @@ -16,7 +16,7 @@ modules: import_dir: sorted_data sort_data: true meta: - mod: 'mr' + mod: '%Modality' NiftiConverter: in_datas: dicom:mod=mr From 625f6b703ee0f1a880f6d5e566a76181dfd99eb4 Mon Sep 17 00:00:00 2001 From: Cosmin Ciausu Date: Mon, 22 Jan 2024 01:12:53 -0500 Subject: [PATCH 076/125] Update meta.json added tables data and changed model name --- models/nnunet_prostate_task24/meta.json | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/models/nnunet_prostate_task24/meta.json b/models/nnunet_prostate_task24/meta.json index 50c4e568..19a3cc15 100644 --- a/models/nnunet_prostate_task24/meta.json +++ b/models/nnunet_prostate_task24/meta.json @@ -1,6 +1,6 @@ { "id": "...", - "name": "nnunet_prostate_task24_promise", + "name": "nnunet_prostate_task24", "title": "nnU-Net (Whole prostate segmentation)", "summary": { "description": "nnU-Net's whole prostate segmentation model is a single-modality (i.e. T2) input AI-based pipeline for the automated segmentation of the whole prostate on MRI scans.", @@ -86,7 +86,23 @@ "evaluation": { "title": "Evaluation Data", "text": "The evaluation dataset consists of 30 test samples coming from the Promise12 challenge.", - "tables": [], + "tables": [ + { + "label": "Promise12 training set Average DSC using five fold cross-validation", + "entries": { + "2D": "0.8932", + "3d_fullres": "0.8891", + "Best ensemble (2D + 3D_fullres)": "0.9029", + "Postprocessed": "0.9030", + } + }, + { + "label": "Promise12 test set Average DSC", + "entries": { + "Test set average DSC": "0.9194m" + } + } + ], "references": [ { "label": "Evaluation of prostate segmentation algorithms for MRI: The PROMISE12 challenge", @@ -122,4 +138,4 @@ ] } } -} \ No newline at end of file +} From ab4ac7d47b225be0fca6c7b70037c786778fe84f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Leonard=20N=C3=BCrnberg?= Date: Mon, 22 Jan 2024 15:21:06 +0100 Subject: [PATCH 077/125] Update mhub_check.py display error message for unexpected exceptions --- .github/scripts/mhub_check.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/scripts/mhub_check.py b/.github/scripts/mhub_check.py index 48cc162c..a53c3c70 100644 --- a/.github/scripts/mhub_check.py +++ b/.github/scripts/mhub_check.py @@ -55,6 +55,7 @@ print() print("---------------- CHECK FAILED ----------------") print("An unexpected error occured during compliance checks.") + print(str(e)) print() sys.exit(1) @@ -63,4 +64,4 @@ print("---------------- CHECK PASSED ----------------") print("All compliance checks passed.") print("Note: compliance checks are a beta feature. Passing all automated compliance checks does not guarantee that your model is compliant with the MHub standard. We will now perform a manual review of your model. Testing your model on a public dataset is obligatory.") -print() \ No newline at end of file +print() From 1ce9947bb799416a1123165392ba32278f04edfa Mon Sep 17 00:00:00 2001 From: Cosmin Ciausu Date: Mon, 22 Jan 2024 09:28:03 -0500 Subject: [PATCH 078/125] Update meta.json fixed formatting issue, added reference to the evaluation section. --- models/nnunet_prostate_task24/meta.json | 26 ++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/models/nnunet_prostate_task24/meta.json b/models/nnunet_prostate_task24/meta.json index 19a3cc15..c21f91b5 100644 --- a/models/nnunet_prostate_task24/meta.json +++ b/models/nnunet_prostate_task24/meta.json @@ -71,7 +71,7 @@ }, "analyses": { "title": "Quantitative Analyses", - "text": "The model's performance was assessed using the Dice Coefficient, in the context of the Promise12 challenge. The complete breakdown of the metrics can be consulted on GrandChallenge [1] and is reported in the supplementary material to the publication [2].", + "text": "The model's performance was assessed using the Dice Coefficient, in the context of the Promise12 challenge. A brief summary of the evaluation results on internal data can be found in the evaluation section. The complete breakdown of the metrics can be consulted on GrandChallenge [1] and is reported in the supplementary material to the publication [2].", "references": [ { "label": "Evaluation of prostate segmentation algorithms for MRI: The PROMISE12 challenge", @@ -88,20 +88,20 @@ "text": "The evaluation dataset consists of 30 test samples coming from the Promise12 challenge.", "tables": [ { - "label": "Promise12 training set Average DSC using five fold cross-validation", - "entries": { - "2D": "0.8932", - "3d_fullres": "0.8891", - "Best ensemble (2D + 3D_fullres)": "0.9029", - "Postprocessed": "0.9030", - } - }, + "label": "Promise12 training set Average DSC using five fold cross-validation", + "entries": { + "2D": "0.8932", + "3d_fullres": "0.8891", + "Best ensemble (2D + 3D_fullres)": "0.9029", + "Postprocessed": "0.9030" + } + }, { - "label": "Promise12 test set Average DSC", - "entries": { - "Test set average DSC": "0.9194m" + "label": "Promise12 test set Average DSC", + "entries": { + "Test set average DSC": "0.9194" + } } - } ], "references": [ { From d36dc19ea0cd2211e62a0db7ff30faddf99373f9 Mon Sep 17 00:00:00 2001 From: Cosmin Ciausu Date: Mon, 22 Jan 2024 12:35:06 -0500 Subject: [PATCH 079/125] Update Dockerfile removed commented code --- models/monai_prostate158/dockerfiles/Dockerfile | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/models/monai_prostate158/dockerfiles/Dockerfile b/models/monai_prostate158/dockerfiles/Dockerfile index 9e741b49..22cab1b7 100644 --- a/models/monai_prostate158/dockerfiles/Dockerfile +++ b/models/monai_prostate158/dockerfiles/Dockerfile @@ -25,9 +25,6 @@ RUN python3 -m monai.bundle download "prostate_mri_anatomy" --bundle_dir ${WEIGH #define path to bundle root ENV BUNDLE_ROOT=/app/models/monai_prostate158/bundle/prostate_mri_anatomy -# specify nnunet specific environment variables -# ENV WEIGHTS_FOLDER=$WEIGHTS_DIR - # Default run script ENTRYPOINT ["mhub.run"] -CMD ["--config", "/app/models/monai_prostate158/config/default.yml"] \ No newline at end of file +CMD ["--config", "/app/models/monai_prostate158/config/default.yml"] From 459790bba1209f55d6e4ac8db9e28fe46bbf02b4 Mon Sep 17 00:00:00 2001 From: Cosmin Ciausu Date: Mon, 22 Jan 2024 14:29:11 -0500 Subject: [PATCH 080/125] Update default.yml delete center_crop = True argument, since its enabled by default renamed model_name in DsegConverter --- models/monai_prostate158/config/default.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/models/monai_prostate158/config/default.yml b/models/monai_prostate158/config/default.yml index e62ac281..b4284bbb 100644 --- a/models/monai_prostate158/config/default.yml +++ b/models/monai_prostate158/config/default.yml @@ -24,14 +24,13 @@ modules: Prostate158Runner: in_data: nifti:mod=mr - apply_center_crop: True - + DsegConverter: source_segs: nifti:mod=seg:roi=PROSTATE_TRANSITION_ZONE,PROSTATE_PERIPHERAL_ZONE target_dicom: dicom:mod=mr - model_name: 'Segmentation of prostate regions, Prostate158' + model_name: 'MONAI Prostate158' skip_empty_slices: True DataOrganizer: targets: - - dicomseg-->[i:sid]/prostate158.seg.dcm \ No newline at end of file + - dicomseg-->[i:sid]/prostate158.seg.dcm From e003d046d80f7e15d6db58b08d2ff655029de13f Mon Sep 17 00:00:00 2001 From: Cosmin Ciausu Date: Mon, 22 Jan 2024 14:36:48 -0500 Subject: [PATCH 081/125] Update Prostate158Runner.py --- models/monai_prostate158/utils/Prostate158Runner.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/models/monai_prostate158/utils/Prostate158Runner.py b/models/monai_prostate158/utils/Prostate158Runner.py index 24a631ca..c0b42a5e 100644 --- a/models/monai_prostate158/utils/Prostate158Runner.py +++ b/models/monai_prostate158/utils/Prostate158Runner.py @@ -17,18 +17,16 @@ from mhubio.modules.runner.ModelRunner import ModelRunner import json -@IO.Config('apply_center_crop', bool, False, the='flag to apply center cropping to input_image') - -class Prostate158Runner(ModelRunner): +@IO.Config('apply_center_crop', bool, True, the='flag to apply center cropping to input_image') +class Prostate158Runner(Module): apply_center_crop : bool @IO.Instance() @IO.Input("in_data", 'nifti:mod=mr', the="input T2 sequence data to run prostate158 on") @IO.Output('out_data', 'monai_prostate158.nii.gz', - 'nifti:mod=seg:model=Prostate158:roi=PROSTATE_TRANSITION_ZONE,PROSTATE_PERIPHERAL_ZONE', + 'nifti:mod=seg:model=MonaiProstate158:roi=PROSTATE_TRANSITION_ZONE,PROSTATE_PERIPHERAL_ZONE', data='in_data', bundle='model', the="predicted segmentation model") - def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData) -> None: # bring input data in nnunet specific format From fca4f16b30bd99474ebdccf12744d0ad8b956dd4 Mon Sep 17 00:00:00 2001 From: Cosmin Ciausu Date: Mon, 22 Jan 2024 14:53:04 -0500 Subject: [PATCH 082/125] Update meta.json added external evaluation tables and references --- models/monai_prostate158/meta.json | 36 ++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/models/monai_prostate158/meta.json b/models/monai_prostate158/meta.json index 01f9514a..95092f2a 100644 --- a/models/monai_prostate158/meta.json +++ b/models/monai_prostate158/meta.json @@ -81,10 +81,32 @@ ] }, "evaluation": { - "title": "Evaluation Data", - "text": "The evaluation dataset consists of 20 internal validation samples.", - "tables": [], - "references": [] + "title": "External Evaluation Data", + "text": "The evaluation datasets consist of 186 ProstateX samples and 32 prostate MRI Medical Decathlon dataset samples.", + "tables": [ + { + "label": "Medical Decathlon mean DSC for the segmentation of the central gland and peripheral zone", + "entries": { + "Central gland": "0.82", + "Peripheral zone": "0.64" + } + }, + { + "label": "ProstateX mean DSC for the segmentation of the central gland and peripheral zone", + "entries": { + "Central gland": "0.86", + "Peripheral zone": "0.71" + } + } + ], + "references": [{ + "label": "Medical Segmentation Decathlon", + "uri": "https://www.nature.com/articles/s41467-022-30695-9" + }, + { + "label": "Quality control and whole-gland, zonal and lesion annotations for the PROSTATEx challenge public dataset", + "uri": "https://www.sciencedirect.com/science/article/abs/pii/S0720048X21001273" + }] }, "training": { "title": "Training Data", @@ -99,10 +121,6 @@ } ], "references": [ - { - "label": "Medical Segmentation Decathlon", - "uri": "https://www.nature.com/articles/s41467-022-30695-9" - }, { "label": "Prostate158 dataset (Zenodo access)", "uri": "https://zenodo.org/records/6481141" @@ -110,4 +128,4 @@ ] } } -} \ No newline at end of file +} From 653d8e77228e78cff7a8807f386f153cff5f27ec Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Thu, 25 Jan 2024 10:28:53 +0100 Subject: [PATCH 083/125] update model import build utility to preserve the models git commit before removing the git folder --- base/buildutils/import_mhub_model.sh | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/base/buildutils/import_mhub_model.sh b/base/buildutils/import_mhub_model.sh index 7968b452..2bdd6f5c 100755 --- a/base/buildutils/import_mhub_model.sh +++ b/base/buildutils/import_mhub_model.sh @@ -40,4 +40,16 @@ git init git fetch ${REPO_URL} ${REPO_BRANCH} git merge FETCH_HEAD git sparse-checkout set "models/${MODEL_NAME}" + +# get the commit hash, store it in a file and print it out +MODEL_COMMIT_HASH=$(git rev-parse HEAD) +echo ${MODEL_COMMIT_HASH} > buildutils/model_commit_hash.txt + +# print models commit +echo +echo "Imported model definition from MHub models repository." +echo "└── COMMIT HASH .... ${MODEL_COMMIT_HASH}" +echo + +# remove the .git folder rm -r .git \ No newline at end of file From 6a9c22c23a45874c794522112d6975e0b3af216f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Leonard=20N=C3=BCrnberg?= Date: Thu, 25 Jan 2024 13:06:54 +0100 Subject: [PATCH 084/125] Update mhub.version Read model commit from a file instead of git. --- base/bin/mhub.version | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/base/bin/mhub.version b/base/bin/mhub.version index 9c161feb..d87eca7d 100755 --- a/base/bin/mhub.version +++ b/base/bin/mhub.version @@ -15,7 +15,7 @@ echo -e "segdb==${SEGDB}" if [ -d "$MODEL_FOLDER" ]; then # model repo commit - MODEL=$(git -C $MODEL_FOLDER show -s | grep commit | cut -d" " -f 2) + { MODEL=$(< /app/buildutils/model_commit_hash.txt); } 2> /dev/null echo -e "model==${MODEL}" echo -e "+++" @@ -36,4 +36,21 @@ fi # pip freeze without segdb and mhubio (already on top of the lists, # since for now they are commits). Ideally, this should return only pip versions # (although some package might be installed from git by contributors) -pip freeze | grep -v "segdb" | grep -v "mhubio" \ No newline at end of file +pip freeze | grep -v "segdb" | grep -v "mhubio" + +# collect additional information on installed system dependencies. +# to allow contributors to include additional dependencies, we should use a environment variable or a file instead. + +# versions of python, pip, plastimatch, jq, git, libopenslide-dev, libvips-dev, dcm2niix, ffmpeg, libsm6, libxext6 +# echo -e "+++" +# echo -e "python==$(python3 --version 2>&1)" +# echo -e "pip==$(pip --version 2>&1)" +# echo -e "plastimatch==$(plastimatch --version 2>&1)" +# echo -e "jq==$(jq --version 2>&1)" +# echo -e "git==$(git --version 2>&1)" +# echo -e "libopenslide-dev==$(dpkg -s libopenslide-dev | grep Version)" +# echo -e "libvips-dev==$(dpkg -s libvips-dev | grep Version)" +# echo -e "dcm2niix==$(dcm2niix -h | grep "dcm2niiX version" | cut -d"v" -f3)" +# echo -e "ffmpeg==$(ffmpeg -version 2>&1 | grep ffmpeg | cut -d" " -f3)" +# echo -e "libsm6==$(dpkg -s libsm6 | grep Version)" +# echo -e "libxext6==$(dpkg -s libxext6 | grep Version)" From 2df25f31616977f071b5ba3807f4a5e194c50880 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Thu, 1 Feb 2024 13:19:12 +0100 Subject: [PATCH 085/125] updated to lastest version of algorithm, changed to output the raw heatmap --- models/gc_nnunet_pancreas/dockerfiles/Dockerfile | 4 ++-- models/gc_nnunet_pancreas/utils/GCNNUnetPancreasRunner.py | 4 ++-- models/gc_nnunet_pancreas/utils/cli.py | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/models/gc_nnunet_pancreas/dockerfiles/Dockerfile b/models/gc_nnunet_pancreas/dockerfiles/Dockerfile index 4b84e0f3..77cc05b9 100644 --- a/models/gc_nnunet_pancreas/dockerfiles/Dockerfile +++ b/models/gc_nnunet_pancreas/dockerfiles/Dockerfile @@ -12,11 +12,11 @@ RUN apt update && \ rm -rf /var/lib/apt/lists/* # Install the model weights and the algorithm files -# * Pull algorithm from repo into /opt/algorithm (main branch, commit 2d74e98f66f0a57da66ed26e97448e53571199db) +# * Pull algorithm from repo into /opt/algorithm (main branch, commit 15dd550beada43a8a55b81a32d9b3904a1cf8d30) # * Remove .git folder to keep docker layer small RUN git clone https://github.com/DIAGNijmegen/CE-CT_PDAC_AutomaticDetection_nnUnet.git /opt/algorithm && \ cd /opt/algorithm && \ - git reset --hard 2d74e98f66f0a57da66ed26e97448e53571199db && \ + git reset --hard 15dd550beada43a8a55b81a32d9b3904a1cf8d30 && \ rm -rf /opt/algorithm/.git # Set this environment variable as a shortcut to avoid nnunet 1.7.0 crashing the build diff --git a/models/gc_nnunet_pancreas/utils/GCNNUnetPancreasRunner.py b/models/gc_nnunet_pancreas/utils/GCNNUnetPancreasRunner.py index 9942705c..770c203d 100644 --- a/models/gc_nnunet_pancreas/utils/GCNNUnetPancreasRunner.py +++ b/models/gc_nnunet_pancreas/utils/GCNNUnetPancreasRunner.py @@ -32,7 +32,7 @@ class GCNNUnetPancreasRunner(Module): @IO.Instance() @IO.Input('in_data', 'mha:mod=ct', the="input data") @IO.Output('heatmap', 'heatmap.mha', 'mha:mod=heatmap:model=GCNNUnetPancreas', data="in_data", - the="heatmap of the pancreatic tumor likelihood") + the="raw heatmap of the pancreatic tumor likelihood (not masked with any pancreas segmentations).") @IO.Output('segmentation_raw', 'segmentation_raw.mha', 'mha:mod=seg:src=original:model=GCNNUnetPancreas:roi=VEIN,ARTERY,PANCREAS,PANCREATIC_DUCT,BILE_DUCT,PANCREAS+CYST,RENAL_VEIN', data="in_data", the="original segmentation of the pancreas, with the following classes: " "0-background, 1-veins, 2-arteries, 3-pancreas, 4-pancreatic duct, 5-bile duct, 6-cysts, 7-renal vein") @@ -56,7 +56,7 @@ def task(self, instance: Instance, in_data: InstanceData, heatmap: InstanceData, # Insufficient training samples were present in the training data for these classes. # Hence, these classes should be omitted from the final output, since these are not # expected to produce reliable segmentations. - cancer_likelihood.value = self.clean_segementation( + self.clean_segementation( segmentation_in=segmentation_raw, segmentation_out=segmentation ) diff --git a/models/gc_nnunet_pancreas/utils/cli.py b/models/gc_nnunet_pancreas/utils/cli.py index 67181709..99af524e 100644 --- a/models/gc_nnunet_pancreas/utils/cli.py +++ b/models/gc_nnunet_pancreas/utils/cli.py @@ -22,9 +22,9 @@ def run_pdac_detection( input_ct_image: Path, output_heatmap: Path, output_segmentation: Path ): # Configure the algorithm pipeline class and run it - algorithm = PDACDetectionContainer() + algorithm = PDACDetectionContainer(output_raw_heatmap=True) algorithm.ct_image = input_ct_image - algorithm.heatmap = output_heatmap + algorithm.heatmap_raw = output_heatmap algorithm.segmentation = output_segmentation algorithm.process() @@ -39,7 +39,7 @@ def run_pdac_detection_cli(): parser.add_argument( "output_heatmap", type=str, - help="heatmap of the pancreatic tumor likelihood (MHA)", + help="raw heatmap of the pancreatic tumor likelihood (MHA)", ) parser.add_argument( "output_segmentation", From f189937687c05cbb9143bb632d73faec91a4a47c Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Thu, 1 Feb 2024 13:23:56 +0100 Subject: [PATCH 086/125] meta.json - moved segmentation disclaimer to description and modified labels #39 --- models/gc_nnunet_pancreas/meta.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/models/gc_nnunet_pancreas/meta.json b/models/gc_nnunet_pancreas/meta.json index 96b8ccbc..c751bfcf 100644 --- a/models/gc_nnunet_pancreas/meta.json +++ b/models/gc_nnunet_pancreas/meta.json @@ -33,7 +33,8 @@ }, { "type": "Segmentation", - "label": "Segmentation of pancreas related tissues. These segmentation classes were not thoroughly validated, use them on your own risk!", + "label": "Pancreas segmentation", + "description": "Segmentation of pancreas related tissues, these segmentation classes were not thoroughly validated, use them on your own risk!", "classes": [ "veins", "arteries", From 726fdcb45e12b3b4c267d357326218cd4b44979a Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Thu, 8 Feb 2024 13:14:40 +0100 Subject: [PATCH 087/125] fix dependencies conflict new base image #39 --- models/gc_nnunet_pancreas/dockerfiles/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/gc_nnunet_pancreas/dockerfiles/Dockerfile b/models/gc_nnunet_pancreas/dockerfiles/Dockerfile index 77cc05b9..22b43174 100644 --- a/models/gc_nnunet_pancreas/dockerfiles/Dockerfile +++ b/models/gc_nnunet_pancreas/dockerfiles/Dockerfile @@ -27,7 +27,7 @@ RUN git clone https://github.com/DIAGNijmegen/CE-CT_PDAC_AutomaticDetection_nnUn ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True # Install nnUNet 1.7.0 and other requirements -RUN pip3 install --no-cache-dir -r /opt/algorithm/requirements.txt +RUN pip3 install --no-cache-dir evalutils==0.3.0 nnunet==1.7.0 # Extend the nnUNet installation with custom trainers RUN SITE_PKG=`pip3 show nnunet | grep "Location:" | awk '{print $2}'` && \ From a52b616c215bbad879f7d2c3d1b31ff8c23e21a8 Mon Sep 17 00:00:00 2001 From: Suraj Pai Date: Fri, 16 Feb 2024 11:09:53 -0500 Subject: [PATCH 088/125] Added first working FMCIB model container --- models/fmcib/config/default.yml | 20 +++++++++++ models/fmcib/dockerfiles/Dockerfile | 16 +++++++++ models/fmcib/meta.json | 35 +++++++++++++++++++ models/fmcib/utils/FMCIBRunner.py | 53 +++++++++++++++++++++++++++++ models/fmcib/utils/__init__.py | 1 + 5 files changed, 125 insertions(+) create mode 100644 models/fmcib/config/default.yml create mode 100644 models/fmcib/dockerfiles/Dockerfile create mode 100644 models/fmcib/meta.json create mode 100644 models/fmcib/utils/FMCIBRunner.py create mode 100644 models/fmcib/utils/__init__.py diff --git a/models/fmcib/config/default.yml b/models/fmcib/config/default.yml new file mode 100644 index 00000000..4f91e608 --- /dev/null +++ b/models/fmcib/config/default.yml @@ -0,0 +1,20 @@ +general: + data_base_dir: /app/data + version: 1.0 + description: "FMCIB pipeline" + +execute: +- FileStructureImporter +- FMCIBRunner +- DataOrganizer + +modules: + FileStructureImporter: + structures: + - $patientID/CT.nrrd@instance@nrrd:mod=ct + - $patientID/masks/GTV-1.nrrd@nrrd + import_id: patientID + + DataOrganizer: + targets: + - json-->[i:patientID]/features.json \ No newline at end of file diff --git a/models/fmcib/dockerfiles/Dockerfile b/models/fmcib/dockerfiles/Dockerfile new file mode 100644 index 00000000..9f3d9603 --- /dev/null +++ b/models/fmcib/dockerfiles/Dockerfile @@ -0,0 +1,16 @@ +FROM mhubai/base:latest + +LABEL authors="bspai@bwh.harvard.edu" + + + +RUN wget https://zenodo.org/records/10528450/files/model_weights.torch?download=1 -O /app/model_weights.torch + +RUN mkdir models +RUN mkdir models/fmcib + +# Install FMCIB package, should install everything else ... +RUN pip install foundation-cancer-image-biomarker --pre + +ENTRYPOINT ["python3", "-m", "mhubio.run"] +CMD ["--config", "/app/models/fmcib/config/default.yml", "--print"] diff --git a/models/fmcib/meta.json b/models/fmcib/meta.json new file mode 100644 index 00000000..9256e570 --- /dev/null +++ b/models/fmcib/meta.json @@ -0,0 +1,35 @@ +{ + "id": "", + "name": "fmcib", + "title": "Foundation Model for Cancer Imaging Biomarkers", + "summary": { + "description": "This algorithm extracts a 4096 dimensonal feature set for a volume centered on the tumor location", + "inputs": [ + { + + }, + { + + } + ], + "outputs": [ + { + + } + ], + "model": { + "architecture": "Resnet50 (2x wide)", + "training": "weakly-supervised contrastive learning", + "cmpapproach": "3D" + }, + "data": { + "training": { + "vol_samples": 11467 + }, + "evaluation": { + }, + "public": true, + "external": false + } + } +} \ No newline at end of file diff --git a/models/fmcib/utils/FMCIBRunner.py b/models/fmcib/utils/FMCIBRunner.py new file mode 100644 index 00000000..4e783867 --- /dev/null +++ b/models/fmcib/utils/FMCIBRunner.py @@ -0,0 +1,53 @@ +""" +--------------------------------------------------------- +Author: Suraj Pia +Email: bspai@bwh.harvard.edu +--------------------------------------------------------- +""" + +import json +import torch +from fmcib.models import fmcib_model +import SimpleITK as sitk +from mhubio.core import Instance, InstanceData, IO, Module +from fmcib.preprocessing import preprocess + + +class FMCIBRunner(Module): + @IO.Instance() + @IO.Input('in_data', 'nrrd:mod=ct', the='Input NRRD file') + @IO.Input('in_mask', 'nrrd|json', the='Tumor mask for the input NRRD file') + @IO.Output('feature_json', 'features.json', "json", bundle='model', the='output JSON file') + def task(self, instance: Instance, in_data: InstanceData, in_mask: InstanceData, feature_json: InstanceData) -> None: + mask_path = in_mask.abspath + mask = sitk.ReadImage(mask_path) + + # Get the CoM of the mask + label_shape_filter = sitk.LabelShapeStatisticsImageFilter() + label_shape_filter.Execute(mask) + try: + centroid = label_shape_filter.GetCentroid(255) + except: + centroid = label_shape_filter.GetCentroid(1) + + x, y, z = centroid + + input_dict = { + "image_path": in_data.abspath, + "coordX": x, + "coordY": y, + "coordZ": z, + } + + image = preprocess(input_dict) + image = image.unsqueeze(0) + model = fmcib_model() + + model.eval() + with torch.no_grad(): + features = model(image) + + feature_dict = {f"feature_{idx}": feature for idx, feature in enumerate(features.flatten().tolist())} + + with open(feature_json.abspath, "w") as f: + json.dump(feature_dict, f) diff --git a/models/fmcib/utils/__init__.py b/models/fmcib/utils/__init__.py new file mode 100644 index 00000000..6d0f2d8d --- /dev/null +++ b/models/fmcib/utils/__init__.py @@ -0,0 +1 @@ +from .FMCIBRunner import FMCIBRunner \ No newline at end of file From 2c8a2f90b26493fa677a3aa74bb1391fe8041198 Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Mon, 19 Feb 2024 20:32:25 +0100 Subject: [PATCH 089/125] update model version in meta schema --- .github/schemas/meta.schema.json | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/schemas/meta.schema.json b/.github/schemas/meta.schema.json index 245ab7ea..47a93327 100644 --- a/.github/schemas/meta.schema.json +++ b/.github/schemas/meta.schema.json @@ -298,6 +298,7 @@ }, "version": { "type": "string", + "pattern": "^\\d+(\\.\\d+)*$", "description": "The version of the model." }, "devteam": { From 343c6ab79ca02478501c8c6d53c26744426dd383 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Mon, 26 Feb 2024 11:49:08 +0100 Subject: [PATCH 090/125] meta.json - added version 0.1.0 to details --- models/gc_nnunet_pancreas/meta.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/gc_nnunet_pancreas/meta.json b/models/gc_nnunet_pancreas/meta.json index c751bfcf..c38dcd9a 100644 --- a/models/gc_nnunet_pancreas/meta.json +++ b/models/gc_nnunet_pancreas/meta.json @@ -62,7 +62,7 @@ }, "details": { "name": "Fully Automatic Deep Learning Framework for Pancreatic Ductal Adenocarcinoma Detection on Computed Tomography", - "version": "", + "version": "0.1.0", "devteam": "DIAGNijmegen (Diagnostic Image Analysis Group, Radboud UMC, The Netherlands)", "type": "The models were developed using nnUnet. All models employed a 3D U-Net as the base architecture and were trained for 250.000 training steps with five-fold cross-validation.", "date": { From 2f6a999336845fa14f2c78906bffd070240776df Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Mon, 26 Feb 2024 11:51:29 +0100 Subject: [PATCH 091/125] meta.json - add version 2.0.0 to details --- models/gc_grt123_lung_cancer/meta.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/gc_grt123_lung_cancer/meta.json b/models/gc_grt123_lung_cancer/meta.json index 27ee2088..b3d24de0 100644 --- a/models/gc_grt123_lung_cancer/meta.json +++ b/models/gc_grt123_lung_cancer/meta.json @@ -43,7 +43,7 @@ }, "details": { "name": " bodyct-dsb2017-grt123", - "version": "", + "version": "2.0.0", "devteam": "DIAGNijmegen (Diagnostic Image Analysis Group, Radboud UMC, The Netherlands)", "type": "3D Deep Leaky Noisy-or Network", "date": { From 717d3032065dc3cc015ad658d121d5bd9e385966 Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Tue, 27 Feb 2024 13:25:10 +0100 Subject: [PATCH 092/125] Updated nnunet t05 zonal prostate model metadata. Co-authored-by: Cosmin Ciausu --- models/nnunet_prostate_zonal_task05/meta.json | 48 +++++++++++++++++-- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/models/nnunet_prostate_zonal_task05/meta.json b/models/nnunet_prostate_zonal_task05/meta.json index 08c73346..43eac000 100644 --- a/models/nnunet_prostate_zonal_task05/meta.json +++ b/models/nnunet_prostate_zonal_task05/meta.json @@ -78,7 +78,7 @@ "info": { "use": { "title": "Intended Use", - "text": "This model is intended to perform prostate regions anatomy segmentation in MR ADC and T2 scans. The slice thickness of the training data is 3.6mm. ADC and T2 input modalities are co-registered during training. To assure optimal results during inference, co-registration of ADC and T2 input sequences is recommended. No endorectal coil was present during training." + "text": "This model is intended to perform prostate regions anatomy segmentation in MR ADC and T2 scans. The slice thickness of the training data is 3.6mm. Input ADC and T2 modalities are co-registered during training. No endorectal coil was present during training." }, "analyses": { "title": "Quantitative Analyses", @@ -96,8 +96,32 @@ }, "evaluation": { "title": "Evaluation Data", - "text": "The evaluation dataset consists of 16 validation samples coming from the same training collection.", - "tables": [], + "text": "The evaluation dataset consists of 16 validation samples coming from the Medical Decathlon collection.", + "tables": [{ + "label": "mean DSC peripheral zone results on internal training data, using five fold cross-validation", + "entries": { + "2D": "0.6285", + "3D_fullres": "0.6663", + "Best ensemble (2D + 3D_fullres)": "0.6611", + "Postprocessed": "0.6611" + } + }, + { + "label": "mean DSC transition zone results on internal training data, using five fold cross-validation", + "entries": { + "2D": "0.8380", + "3D_fullres": "0.8410", + "Best ensemble (2D + 3D_fullres)": "0.8575", + "Postprocessed": "0.8577" + } + }, + { + "label": "mean DSC prostate zonal regions results on internal test data", + "entries": { + "mean DSC for PZ": "0.77", + "mean DSC for TZ": "0.90" + } + }], "references": [ { "label": "Medical Segmentation Decathlon", @@ -131,6 +155,24 @@ "uri": "https://drive.google.com/drive/folders/1HqEgzS8BV2c7xYNrZdEAnrHk7osJJ--2" } ] + }, + "limitations":{ + "title": "Dealing with multi-modality input", + "text": "Authors recommend co-registration of ADC and T2 input sequences, as applied during training. At the very least, the ADC and T2 sequence need to have identical geometry for nnUNet to run. Since evaluated ADC and T2 sequences during evaluation might more often that not fail this requirement, we apply resampling of the ADC sequence to the T2 sequence, since T2 tends to have a higher resolution. Below are some references regarding nnUnet recommendations for multi-modality input, alongside the paper describing the registration process of Medical Image Decathlon dataset for the ADC and T2 sequences.", + "references": [ + { + "label": "Litjens et al., A pattern recognition approach to zonal segmentation of the prostate on MRI", + "uri": "https://pubmed.ncbi.nlm.nih.gov/23286075/" + }, + { + "label": "Alignment of multi channel inputs for nnunet #502", + "uri": "https://github.com/MIC-DKFZ/nnUNet/issues/502" + }, + { + "label": "Multi-modality dataset conversion issue #306", + "uri": "https://github.com/MIC-DKFZ/nnUNet/issues/306" + } + ] } } } \ No newline at end of file From c25f532b1880f6c2f52848ee61db7cb33412a625 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Tue, 27 Feb 2024 16:10:43 +0100 Subject: [PATCH 093/125] meta.json - update date formats, data.public, and model.training fields --- models/gc_lunglobes/meta.json | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/models/gc_lunglobes/meta.json b/models/gc_lunglobes/meta.json index 47229053..a1fdc8c2 100644 --- a/models/gc_lunglobes/meta.json +++ b/models/gc_lunglobes/meta.json @@ -26,7 +26,7 @@ } ], "model": { "architecture": "Relational two-stage U-net", - "training": "Supervised", + "training": "supervised", "cmpapproach": "3D" }, "data": { @@ -36,7 +36,7 @@ "evaluation": { "vol_samples": 1155 }, - "public": "Partially", + "public": "false", "external": true } }, @@ -46,9 +46,9 @@ "devteam": "DIAGNijmegen (Diagnostic Image Analysis Group, Radboud UMC, The Netherlands)", "type": "Relational two-stage U-Net (Cascade of two relational U-Net, trained end-to-end)", "date": { - "weights": "14/02/22", - "code": "n/a", - "pub": "n/a" + "weights": "2022-02-14", + "code": "2023-11-27", + "pub": "2020-05-15" }, "cite": "W. Xie, C. Jacobs, J. -P. Charbonnier and B. van Ginneken, 'Relational Modeling for Robust and Efficient Pulmonary Lobe Segmentation in CT Scans,' in IEEE Transactions on Medical Imaging, vol. 39, no. 8, pp. 2664-2675, Aug. 2020, doi: 10.1109/TMI.2020.2995108.", "license": { From dfd6dd988988e01f532cdb5925b58eb01aab31e3 Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Wed, 28 Feb 2024 16:02:23 +0100 Subject: [PATCH 094/125] adding highdicom dependency --- base/dockerfiles/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/base/dockerfiles/Dockerfile b/base/dockerfiles/Dockerfile index ba55e8f2..9b916203 100644 --- a/base/dockerfiles/Dockerfile +++ b/base/dockerfiles/Dockerfile @@ -47,6 +47,7 @@ RUN pip3 install --upgrade pip && pip3 install --no-cache-dir \ panimg \ pydicom \ pydicom-seg \ + highdicom \ rt_utils \ PyYAML \ pyplastimatch \ From 510b6a61e0ffcbe41e9ca50b7454457cc80d0f56 Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Thu, 29 Feb 2024 11:49:10 +0100 Subject: [PATCH 095/125] patch metadata to comply with schema and polish casust metadata --- models/casust/meta.json | 102 +++++++++++++++++++++++++++++ models/gc_picai_baseline/meta.json | 2 +- models/lungmask/meta.json | 4 +- models/nnunet_liver/meta.json | 6 +- models/nnunet_pancreas/meta.json | 6 +- models/platipy/meta.json | 6 +- models/totalsegmentator/meta.json | 6 +- 7 files changed, 117 insertions(+), 15 deletions(-) create mode 100644 models/casust/meta.json diff --git a/models/casust/meta.json b/models/casust/meta.json new file mode 100644 index 00000000..b007c70c --- /dev/null +++ b/models/casust/meta.json @@ -0,0 +1,102 @@ +{ + "id": "abaa7929-b02c-422f-8c97-7e4217d63487", + "name": "casust", + "title": "CaSuSt", + "summary": { + "description": "A deep learning model for cardiac sub-structure delineation on planning CT scans. The model delineates the heart contours and seven cardiac substructures based on individually trained binary models.", + "inputs": [ { + "label": "Input Image", + "description": "The planning chest CT scan of a RT patient.", + "format": "DICOM", + "modality": "CT", + "bodypartexamined": "Chest", + "slicethickness": "2.5mm", + "non-contrast": true, + "contrast": false + } ], + "outputs": [ { + "type": "Segmentation", + "classes": [ + "HEART", + "LEFT_VENTRICLE", + "RIGHT_VENTRICLE", + "LEFT_ATRIUM", + "RIGHT_ATRIUM", + "CORONARY_ARTERY_LAD", + "CORONARY_ARTERY_CFLX", + "CORONARY_ARTERY_RIGHT" + ] + } ], + "model": { + "architecture": "Seven individual binary U-Net models", + "training": "supervised", + "cmpapproach": "2D" + }, + "data": { + "training": { + "vol_samples": 126 + }, + "evaluation": { + "vol_samples": 22 + }, + "public": false, + "external": false + } + }, + "details": { + "name": "Cardiac Substructure Delineation", + "version": "1.0.0", + "devteam": "Leonard N\u00fcrnberg, MAASTRO Clinic, Clinical Data Science Radiotherapie", + "type": "Individual 2D binary U-Net models", + "date": { + "weights": "22/03/02", + "code": "22/03/02", + "pub": "22/06/23" + }, + "cite": "N\u00fcrnberg, L, Bontempi, D, De Ruysscher, D, et al. Deep learning segmentation of heart substructures in radiotherapy treatment planning. Physica Medica: European journal of medical physics, 2022", + "license": { + "code": "Apache 2.0", + "weights": "Apache 2.0" + }, + "publications": [ + { + "title": "Deep learning segmentation of heart substructures in radiotherapy treatment planning", + "uri": "https:\/\/cris.maastrichtuniversity.nl\/en\/publications\/deep-learning-segmentation-of-heart-substructures-in-radiotherapy" + } + ], + "github": "https:\/\/github.com\/LennyN95\/CaSuSt", + "slicer": true + }, + "info": { + "use": { + "title": "Intended Use", + "text": "This model is intended to segment the heart and seven substructures (left ventricle, right ventricle, left atrium, right atrium, coronary artery LAD, coronary artery CFLX, coronary artery right) on planning CT scans of radiotherapy patients. For each substructure, an individually model has been trained. The model is intended to be used in the context of radiotherapy treatment planning, to support the delineation of the heart and its substructures and has been validated by two radiation oncologists." + }, + "analyses": { + "title": "Quantitative Analyses", + "text": "The model's performance was assessed against test data and compared to human readers using the surface Dice score with a 5mm tolerance for the four larger structures (ventricles and atria) and a 3mm tolerance for the three smaller structures (vessels).", + "tables": [ + { + "label": "Mean Surface Dice Score", + "entries": { + "LEFT_VENTRICLE": "0.88 ± 0.07", + "RIGHT_VENTRICLE": "0.83 ± 0.08", + "LEFT_ATRIUM": "0.88 ± 0.09", + "RIGHT_ATRIUM": "0.87 ± 0.09", + "CORONARY_ARTERY_LAD": "0.7 ± 0.16", + "CORONARY_ARTERY_CFLX": "0.56 ± 0.27", + "CORONARY_ARTERY_RIGHT": "0.48 ± 0.18" + } + } + ] + }, + "evaluation": { + "title": "Evaluation Data", + "text": "The model was evaluated on a 15% split of the dataset." + }, + "training": { + "title": "Training Data", + "text": "A dataset was provided by the University Hospital of Turin, Italy. The dataset contains a single pCT scan for 80 lung cancer patients and 80 lymphoma patients, resulting in a total of 160 fully annotated pCT scans in DICOM format. The ground truth delineations were performed by multiple radiation oncologists and residents with a different level of expertise (range 1-12 years). To the best of our knowledge, no standard protocol was followed. The most common spacing among 100 of all scans was 1.171875 x 1.171875 x 3.0. All training samples have been re-sampled to this precision." + } + } +} diff --git a/models/gc_picai_baseline/meta.json b/models/gc_picai_baseline/meta.json index 1dbab230..4e270db4 100644 --- a/models/gc_picai_baseline/meta.json +++ b/models/gc_picai_baseline/meta.json @@ -70,7 +70,7 @@ }, "details": { "name": "PI-CAI challenge baseline", - "version": "v2.1.1", + "version": "2.1.1", "devteam": "Diagnostic Image Analysis Group, Radboud University Medical Center, Nijmegen, The Netherlands", "type": "Prediction", "date": { diff --git a/models/lungmask/meta.json b/models/lungmask/meta.json index e24fb649..c8fafc07 100644 --- a/models/lungmask/meta.json +++ b/models/lungmask/meta.json @@ -28,7 +28,7 @@ } ], "model": { "architecture": "U-net", - "training": "Supervised", + "training": "supervised", "cmpapproach": "2D" }, "data": { @@ -38,7 +38,7 @@ "evaluation": { "vol_samples": 191 }, - "public": "Partially", + "public": false, "external": true } }, diff --git a/models/nnunet_liver/meta.json b/models/nnunet_liver/meta.json index 27527748..34b9469f 100644 --- a/models/nnunet_liver/meta.json +++ b/models/nnunet_liver/meta.json @@ -23,8 +23,8 @@ } ], "model": { "architecture": "U-net", - "training": "Supervised", - "cmpapproach": "2D, 3D, ensemble" + "training": "supervised", + "cmpapproach": "ensemble" }, "data": { "training": { @@ -33,7 +33,7 @@ "evaluation": { "vol_samples": 70 }, - "public": "Yes", + "public": true, "external": false } }, diff --git a/models/nnunet_pancreas/meta.json b/models/nnunet_pancreas/meta.json index 3f8d3710..b95b10a7 100644 --- a/models/nnunet_pancreas/meta.json +++ b/models/nnunet_pancreas/meta.json @@ -23,8 +23,8 @@ } ], "model": { "architecture": "U-net", - "training": "Supervised", - "cmpapproach": "2D, 3D, ensemble" + "training": "supervised", + "cmpapproach": "ensemble" }, "data": { "training": { @@ -33,7 +33,7 @@ "evaluation": { "vol_samples": 139 }, - "public": "Yes", + "public": true, "external": false } }, diff --git a/models/platipy/meta.json b/models/platipy/meta.json index fd6dee27..3d487a33 100644 --- a/models/platipy/meta.json +++ b/models/platipy/meta.json @@ -38,8 +38,8 @@ } ], "model": { "architecture": "U-net, Atlas", - "training": "Supervised", - "cmpapproach": "Hybrid" + "training": "supervised", + "cmpapproach": "3D" }, "data": { "training": { @@ -48,7 +48,7 @@ "evaluation": { "vol_samples": 30 }, - "public": "Yes", + "public": true, "external": true } }, diff --git a/models/totalsegmentator/meta.json b/models/totalsegmentator/meta.json index 6ce750bb..cebcfe01 100644 --- a/models/totalsegmentator/meta.json +++ b/models/totalsegmentator/meta.json @@ -125,8 +125,8 @@ } ], "model": { "architecture": "U-net", - "training": "Supervised", - "cmpapproach": "2D, 3D, ensemble" + "training": "supervised", + "cmpapproach": "ensemble" }, "data": { "training": { @@ -135,7 +135,7 @@ "evaluation": { "vol_samples": 65 }, - "public": "Yes", + "public": true, "external": false } }, From b4c696bb934cc24b2fdb9b5ab00e25a32510c1a7 Mon Sep 17 00:00:00 2001 From: Cosmin Ciausu Date: Fri, 1 Mar 2024 17:57:42 -0500 Subject: [PATCH 096/125] Update default.yml changed seg output name and added '%Modality' filter --- models/monai_prostate158/config/default.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/models/monai_prostate158/config/default.yml b/models/monai_prostate158/config/default.yml index b4284bbb..4f8c1a57 100644 --- a/models/monai_prostate158/config/default.yml +++ b/models/monai_prostate158/config/default.yml @@ -16,7 +16,7 @@ modules: import_dir: sorted_data sort_data: true meta: - mod: 'mr' + mod: '%Modality' NiftiConverter: in_datas: dicom:mod=mr @@ -33,4 +33,4 @@ modules: DataOrganizer: targets: - - dicomseg-->[i:sid]/prostate158.seg.dcm + - dicomseg-->[i:sid]/monai_prostate158.seg.dcm From ca2805b1dc87fee7f9db25bcfad88b2a05a28bcc Mon Sep 17 00:00:00 2001 From: Cosmin Ciausu Date: Fri, 1 Mar 2024 18:20:38 -0500 Subject: [PATCH 097/125] Update meta.json added license information --- models/monai_prostate158/meta.json | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/models/monai_prostate158/meta.json b/models/monai_prostate158/meta.json index 95092f2a..ad964732 100644 --- a/models/monai_prostate158/meta.json +++ b/models/monai_prostate158/meta.json @@ -47,14 +47,14 @@ "devteam": "Lisa C. Adams, Keno K. Bressem", "type": "Prostate158 (U-Net structure for prostate segmentation)", "date": { - "weights": "2022", - "code": "2022", - "pub": "2022" + "weights": "March 2022", + "code": "April 2022", + "pub": "September 2022" }, "cite": "Lisa C. Adams and Marcus R. Makowski and Günther Engel and Maximilian Rattunde and Felix Busch and Patrick Asbach and Stefan M. Niehues and Shankeeth Vinayahalingam and Bram {van Ginneken} and Geert Litjens and Keno K. Bressem, Prostate158 - An expert-annotated 3T MRI dataset and algorithm for prostate cancer detection", "license": { - "code": "Apache 2.0", - "weights": "" + "code": "MIT", + "weights": "CC BY-NC 4.0" }, "publications": [ { From 37a7cae4da9d21b2857dec4ccaf292fb6b33cc85 Mon Sep 17 00:00:00 2001 From: silvandeleemput Date: Mon, 4 Mar 2024 14:46:01 +0100 Subject: [PATCH 098/125] meta.json - made public field a boolean, changed slice thickness to 0.75mm --- models/gc_lunglobes/meta.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/models/gc_lunglobes/meta.json b/models/gc_lunglobes/meta.json index a1fdc8c2..d5d530a1 100644 --- a/models/gc_lunglobes/meta.json +++ b/models/gc_lunglobes/meta.json @@ -10,7 +10,7 @@ "format": "DICOM", "modality": "CT", "bodypartexamined": "Chest", - "slicethickness": "2.5mm", + "slicethickness": "0.75mm", "non-contrast": true, "contrast": false } ], @@ -36,7 +36,7 @@ "evaluation": { "vol_samples": 1155 }, - "public": "false", + "public": false, "external": true } }, From 72ca968566286ed1c86454f867015b0120701b7f Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Mon, 4 Mar 2024 15:56:56 +0100 Subject: [PATCH 099/125] fixing gc_lunglobes config --- models/gc_lunglobes/config/default.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/models/gc_lunglobes/config/default.yml b/models/gc_lunglobes/config/default.yml index 6df165fb..5bbbbda3 100644 --- a/models/gc_lunglobes/config/default.yml +++ b/models/gc_lunglobes/config/default.yml @@ -16,14 +16,13 @@ modules: import_dir: sorted_data sort_data: True meta: - mod: ct + mod: '%Modality' MhaConverter: engine: panimg DsegConverter: model_name: GCLungLobes - source_segs: nifti:mod=seg skip_empty_slices: True DataOrganizer: From 5aee97e2b30ce45931442b3e761909d7ba287e38 Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Mon, 4 Mar 2024 21:46:02 +0100 Subject: [PATCH 100/125] verify test request via github action --- .github/schemas/testmodel.schema.json | 60 +++++++++++++++++ .github/scripts/comment_check.py | 81 ++++++++++++++++++++++ .github/workflows/submission_review.yml | 90 +++++++++++++++++++------ 3 files changed, 211 insertions(+), 20 deletions(-) create mode 100644 .github/schemas/testmodel.schema.json create mode 100644 .github/scripts/comment_check.py diff --git a/.github/schemas/testmodel.schema.json b/.github/schemas/testmodel.schema.json new file mode 100644 index 00000000..aacdc646 --- /dev/null +++ b/.github/schemas/testmodel.schema.json @@ -0,0 +1,60 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "properties": { + "sample": { + "type": "object", + "properties": { + "idc_version": { + "type": "string" + }, + "data": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "properties": { + "SeriesInstanceUID": { + "type": "string", + "pattern": "^[\\d\\.]+$" + }, + "aws_url": { + "type": "string", + "pattern": "^s3://[\\w\\-/]+/\\*$" + }, + "path": { + "type": "string", + "pattern": "^[^\\/][\\w\\-\\/]+$" + } + }, + "required": [ + "SeriesInstanceUID", + "aws_url", + "path" + ] + } + } + }, + "required": [ + "idc_version", + "data" + ] + }, + "reference": { + "type": "object", + "properties": { + "url": { + "type": "string", + "format": "uri" + } + }, + "required": [ + "url" + ] + } + }, + "required": [ + "sample", + "reference" + ] +} \ No newline at end of file diff --git a/.github/scripts/comment_check.py b/.github/scripts/comment_check.py new file mode 100644 index 00000000..3f1fd569 --- /dev/null +++ b/.github/scripts/comment_check.py @@ -0,0 +1,81 @@ +import sys, os, yaml, json, jsonschema + +YAML_TEST_DEFINITION_SCHEMA_FILE = ".github/schemas/testmodel.schema.json" + +def extract_yaml_test_definition(comment: str): + + # find a code block starting with ```yaml and ending with ``` + start = comment.find("```yaml") + end = comment.find("```", start + 1) + if start == -1 or end == -1: + raise Exception("No YAML code block found in comment") + + # extract the code block + yaml_code = comment[start:end] + + # remove the code block markers + yaml_code = yaml_code.replace("```yaml", "").strip() + + return yaml_code + +def validate_yaml_test_definition(yaml_str: str): + + # load yaml into dict + test_definition = yaml.safe_load(yaml_str) + + # load schema + with open(YAML_TEST_DEFINITION_SCHEMA_FILE, "r") as f: + schema = json.load(f) + + # validate + jsonschema.validate(test_definition, schema) + + +def set_action_output(output_name, value) : + """Sets the GitHub Action output. + + Keyword arguments: + output_name - The name of the output + value - The value of the output + """ + if "GITHUB_OUTPUT" in os.environ : + with open(os.environ["GITHUB_OUTPUT"], "a") as f : + print("{0}={1}".format(output_name, value), file=f) + + +if __name__ == "__main__": + + try: + # get comment body from first argument + comment = sys.argv[1] + + # print comment + print(f"Comment ----------------------") + print(comment) + print() + + # extract yaml test definition + yaml_str = extract_yaml_test_definition(comment) + + # validate yaml test definition + validate_yaml_test_definition(yaml_str) + + # print yaml + print(f"Test Definition --------------") + print(yaml_str) + print() + + # print success message + print("YAML test definition is valid") + + # set environment variable for following steps + set_action_output("test_report", "passed") + + except Exception as e: + # set environment variable for following steps + set_action_output("test_report", "failed") + + # print error message + print("YAML test definition is invalid") + print(e) + diff --git a/.github/workflows/submission_review.yml b/.github/workflows/submission_review.yml index 8631a91d..81496258 100644 --- a/.github/workflows/submission_review.yml +++ b/.github/workflows/submission_review.yml @@ -4,24 +4,74 @@ on: issue_comment: types: [created, edited] +permissions: + pull-requests: write + jobs: - autolabel: - if: ${{ github.event.issue.pull_request }} - name: Add labels to PR - runs-on: [ubuntu-latest] - - permissions: - pull-requests: write - - steps: - - name: Add Request Review Label - uses: actions-ecosystem/action-add-labels@v1 - if: ${{ startsWith(github.event.comment.body, '/review') }} - with: - labels: REQUEST REVIEW - - - name: Add Request Test Label - uses: actions-ecosystem/action-add-labels@v1 - if: ${{ startsWith(github.event.comment.body, '/test') }} - with: - labels: REQUEST TEST + request_review: + if: ${{ github.event.issue.pull_request && startsWith(github.event.comment.body, '/review')}} + name: Request Review + runs-on: [ubuntu-latest] + + steps: + - name: Add Request Review Label + uses: actions-ecosystem/action-add-labels@v1 + with: + labels: REQUEST REVIEW + + request_test: + if: ${{ github.event.issue.pull_request && startsWith(github.event.comment.body, '/test') }} + name: Request Test + runs-on: [ubuntu-latest] + + steps: + + # Checkout the latest code from the repo + - name: Checkout repo + uses: actions/checkout@v4 + + # Setup which version of Python to use + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: 3.8 + + # install python dependencies + - name: Install Python dependencies + run: | + python -m pip install --upgrade pip + pip install jsonschema PyYAML + + # Display the Python version being used + - name: Display Python version + run: python -c "import sys; print(sys.version)" + + - name: Run check comment script + id: check_comment + env: + COMMENT: ${{ github.event.comment.body }} + run: python .github/scripts/comment_check.py "$COMMENT" + + - name: Add TEST REQUESTED Label + uses: actions-ecosystem/action-add-labels@v1 + if: ${{ steps.check_comment.outputs.test_report == 'passed' }} + with: + labels: TEST REQUESTED + + - name: Remove INVALID TEST REQUEST Label + uses: actions-ecosystem/action-remove-labels@v1 + if: ${{ steps.check_comment.outputs.test_report == 'passed' }} + with: + labels: INVALID TEST REQUEST + + - name: Remove TEST REQUESTED Label + uses: actions-ecosystem/action-remove-labels@v1 + if: ${{ steps.check_comment.outputs.test_report == 'failed' }} + with: + labels: TEST REQUESTED + + - name: Add INVALID TEST REQUEST Label + uses: actions-ecosystem/action-add-labels@v1 + if: ${{ steps.check_comment.outputs.test_report == 'failed' }} + with: + labels: INVALID TEST REQUEST From bd48920e67e9a1c3352ca076ca08a849e64f248e Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Tue, 5 Mar 2024 08:14:05 +0100 Subject: [PATCH 101/125] Update test definition schema to allow numeric values for idc version --- .github/schemas/testmodel.schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/schemas/testmodel.schema.json b/.github/schemas/testmodel.schema.json index aacdc646..968d5372 100644 --- a/.github/schemas/testmodel.schema.json +++ b/.github/schemas/testmodel.schema.json @@ -6,7 +6,7 @@ "type": "object", "properties": { "idc_version": { - "type": "string" + "type": ["string", "number"] }, "data": { "type": "array", From 626b11e4dede16d433d0c7750f8e0d962e3dd169 Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Tue, 5 Mar 2024 08:47:24 +0100 Subject: [PATCH 102/125] create nnunet segthor model Dockerfile --- models/nnunet_segthor/dockerfiles/Dockerfile | 32 ++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 models/nnunet_segthor/dockerfiles/Dockerfile diff --git a/models/nnunet_segthor/dockerfiles/Dockerfile b/models/nnunet_segthor/dockerfiles/Dockerfile new file mode 100644 index 00000000..0ba84dc1 --- /dev/null +++ b/models/nnunet_segthor/dockerfiles/Dockerfile @@ -0,0 +1,32 @@ +FROM mhubai/base:latest + +# FIXME: set this environment variable as a shortcut to avoid nnunet crashing the build +# by pulling sklearn instead of scikit-learn +# N.B. this is a known issue: +# https://github.com/MIC-DKFZ/nnUNet/issues/1281 +# https://github.com/MIC-DKFZ/nnUNet/pull/1209 +ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True + +# Install nnunet and platipy +RUN pip3 install --no-cache-dir \ + nnunet==1.7.1 + +# Import the MHub model definiton +ARG MHUB_MODELS_REPO +RUN buildutils/import_mhub_model.sh nnunet_abdominal_oar ${MHUB_MODELS_REPO} + +# pull weights for platipy's nnU-Net so that the user doesn't need to every time a container is run +ENV WEIGHTS_DIR="/root/.nnunet/nnUNet_models/nnUNet/" +ENV WEIGHTS_URL="https://zenodo.org/record/4485926/files/Task055_SegTHOR.zip" +ENV WEIGHTS_FN="Task055_SegTHOR.zip" + +RUN wget --directory-prefix ${WEIGHTS_DIR} ${WEIGHTS_URL} +RUN unzip ${WEIGHTS_DIR}${WEIGHTS_FN} -d ${WEIGHTS_DIR} +RUN rm ${WEIGHTS_DIR}${WEIGHTS_FN} + +# specify nnunet specific environment variables +ENV WEIGHTS_FOLDER=$WEIGHTS_DIR + +# Default run script +ENTRYPOINT ["mhub.run"] +CMD ["--config", "/app/models/nnunet_segthor/config/default.yml"] \ No newline at end of file From 201db3ea9d44139d9050754f727b81208f5524d1 Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Tue, 5 Mar 2024 08:47:47 +0100 Subject: [PATCH 103/125] create nnunet segthor default config --- models/nnunet_segthor/config/default.yml | 33 ++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 models/nnunet_segthor/config/default.yml diff --git a/models/nnunet_segthor/config/default.yml b/models/nnunet_segthor/config/default.yml new file mode 100644 index 00000000..5a1d68a1 --- /dev/null +++ b/models/nnunet_segthor/config/default.yml @@ -0,0 +1,33 @@ +general: + data_base_dir: /app/data + version: 1.0 + description: default configuration for NNUnet Thoracic Organs at Risk segmentation (dicom to dicom) + +execute: +- DicomImporter +- NiftiConverter +- NNUnetRunner +- DsegConverter +- DataOrganizer + +modules: + DicomImporter: + source_dir: input_data + import_dir: sorted_data + sort_data: true + meta: + mod: '%Modality' + + NNUnetRunner: + nnunet_task: Task055_SegTHOR + nnunet_model: 3d_lowres + roi: HEART,LEFT_ATRIUM,LEFT_VENTRICLE,RIGHT_ATRIUM,RIGHT_VENTRICLE,CORONARY_ARTERY_LAD,CORONARY_ARTERY_CFLX,CORONARY_ARTERY_RIGHT + + DsegConverter: + source_segs: nifti:mod=seg + model_name: NNUnet Thoracic OAR + skip_empty_slices: True + + DataOrganizer: + targets: + - dicomseg-->[i:sid]/nnunet_segthor.seg.dcm \ No newline at end of file From 0ebd4bf35c14ea9d0a3abd3c57222e07e149ab5f Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Tue, 5 Mar 2024 08:48:00 +0100 Subject: [PATCH 104/125] add metadata for nnunet segthor --- models/nnunet_segthor/meta.json | 127 ++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 models/nnunet_segthor/meta.json diff --git a/models/nnunet_segthor/meta.json b/models/nnunet_segthor/meta.json new file mode 100644 index 00000000..195ff2ee --- /dev/null +++ b/models/nnunet_segthor/meta.json @@ -0,0 +1,127 @@ +{ + "id": "69754d0c-0521-4986-9763-c0df6594b6bf", + "name": "nnunet_segthor", + "title": "nnU-Net (thoracic OAR)", + "summary": { + "description": "nnU-Net's thoracic OAR segmentation model is an AI-based pipeline for the automated segmentation of the heart, the aorta, the esophagus and the trachea in CT scans (with and without contrast).", + "inputs": [ { + "label": "Input Image", + "description": "The CT scan of a patient.", + "format": "DICOM", + "modality": "CT", + "bodypartexamined": "Chest", + "slicethickness": "2.5 mm", + "non-contrast": true, + "contrast": true + } ], + "outputs": [ { + "type": "Segmentation", + "classes": [ + "HEART", + "LEFT_ATRIUM", + "LEFT_VENTRICLE", + "RIGHT_ATRIUM", + "RIGHT_VENTRICLE", + "CORONARY_ARTERY_LAD", + "CORONARY_ARTERY_CFLX", + "CORONARY_ARTERY_RIGHT" + ] + } ], + "model": { + "architecture": "U-net", + "training": "supervised", + "cmpapproach": "ensemble" + }, + "data": { + "training": { + "vol_samples": 40 + }, + "evaluation": { + "vol_samples": 20 + }, + "public": true, + "external": false + } + }, + "details": { + "name": "nnU-Net Thoracic Organs at Risk Segmentation Model", + "version": "1.0.0", + "devteam": "MIC-DKFZ (Helmholtz Imaging Applied Computer Vision Lab)", + "type": "nnU-Net (U-Net structure, optimized by data-driven heuristics)", + "date": { + "weights": "01/02/22", + "code": "n/a", + "pub": "2020" + }, + "cite": "Isensee, F., Jaeger, P. F., Kohl, S. A., Petersen, J., & Maier-Hein, K. H. (2020). nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation. Nature Methods, 1-9.", + "license": { + "code": "Apache 2.0", + "weights": "CC BY-NC 4.0" + }, + "publications": [ + { + "title": "nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation", + "uri": "https://www.nature.com/articles/s41592-020-01008-z" + } + ], + "github": "https://github.com/MIC-DKFZ/nnUNet/tree/nnunetv1", + "zenodo": "https://zenodo.org/record/4485926" + }, + "info": { + "use": { + "title": "Intended Use", + "text": "This model is intended to perform esophagus, heart, trachea and aorta segmentation in contrast-enhanced (CE) and non-CE chest CT scans. The model has been trained and tested on patients with Non-Small Cell Lung Cancer (NSCLC) referred for curative-intent radiotherapy, so it might not be suited for a healthy population. The generalization capabilities of the model on a range of ages, genders, and ethnicities are unknown. The slice thickness should not exceed 2.5mm for best results." + }, + "analyses": { + "title": "Quantitative Analyses", + "text": "The model's performance was assessed using the Dice Coefficient and the (raw) Hausdorff Distance, in the context of the CodaLab SegTHOR challenge. The complete breakdown of the metrics can be consulted on CodaLab [1] and is reported in the supplementary material to the publication [2].", + "references": [ + { + "label": "SegTHOR Challenge on CodaLab", + "uri": "https://competitions.codalab.org/competitions/21145" + }, + { + "label": "nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation", + "uri": "https://www.nature.com/articles/s41592-020-01008-z" + } + ] + }, + "evaluation": { + "title": "Evaluation Data", + "text": "The evaluation dataset consists of 20 contrast-enhanced (CE) and non-CE chest CT scans from the SegTHOR dataset [1][2]. This dataset comprised Non-Small Cell Lung Cancer (NSCLC) patients referred for curative-intent radiotherapy (excluding patients with tumor extension distorting the mediastinum anatomy). Images were provided by the Centre Henri Becquerel, Rouen, France (CHB). On each CT scan, the OARs have been delineated by an experienced radiation oncologist using a Varian Medical Systems SomaVision platform. The body and lung contours were segmented with the automatic tools available on the platform. The esophagus was manually delineated from the 4th cervical vertebra to the esophago-gastric junction. The heart was delineated as recommended by the Radiation Therapy Oncology Group 2. The trachea was contoured from the lower limit of the larynx to 2cm below the carena excluding the lobar bronchi. The aorta was delineated from its origin above the heart down to below the diaphragm pillars.", + "references": [ + { + "label": "SegTHOR: Segmentation of Thoracic Organs at Risk in CT images", + "uri": "https://arxiv.org/abs/1912.05950" + }, + { + "label": "SegTHOR Challenge on CodaLab", + "uri": "https://competitions.codalab.org/competitions/21145" + } + ] + }, + "training": { + "title": "Training Data", + "text": "The evaluation dataset consists of 20 contrast-enhanced (CE) and non-CE chest CT scans from the SegTHOR dataset [1][2]. This dataset comprised Non-Small Cell Lung Cancer (NSCLC) patients referred for curative-intent radiotherapy (excluding patients with tumor extension distorting the mediastinum anatomy), provided by the Centre Henri Becquerel, Rouen, France (CHB). For details regarding the labels the model was trained with, see the section above. The authors reported the following reconstruction and acquisition parameters for the CT scans in the training datasets:", + "tables": [ + { + "label": "SegTHOR dataset", + "entries": { + "Slice Thickness": "2-3.7 mm", + "In-Plane Resolution": "0.9–1.37 mm" + } + } + ], + "references": [ + { + "label": "SegTHOR: Segmentation of Thoracic Organs at Risk in CT images", + "uri": "https://arxiv.org/abs/1912.05950" + }, + { + "label": "SegTHOR Challenge on CodaLab", + "uri": "https://competitions.codalab.org/competitions/21145" + } + ] + } + } + } \ No newline at end of file From 6e4f9fcb16038dd7077c0eee4bf82e7931048e6f Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Tue, 5 Mar 2024 09:23:29 +0100 Subject: [PATCH 105/125] fix dockerfile typo --- .../dockerfiles/Dockerfile | 32 +++++++++++++++++++ models/nnunet_segthor/dockerfiles/Dockerfile | 2 +- 2 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 models/nnunet_abdominal_oar/dockerfiles/Dockerfile diff --git a/models/nnunet_abdominal_oar/dockerfiles/Dockerfile b/models/nnunet_abdominal_oar/dockerfiles/Dockerfile new file mode 100644 index 00000000..847eea43 --- /dev/null +++ b/models/nnunet_abdominal_oar/dockerfiles/Dockerfile @@ -0,0 +1,32 @@ +FROM mhubai/base:latest + +# FIXME: set this environment variable as a shortcut to avoid nnunet crashing the build +# by pulling sklearn instead of scikit-learn +# N.B. this is a known issue: +# https://github.com/MIC-DKFZ/nnUNet/issues/1281 +# https://github.com/MIC-DKFZ/nnUNet/pull/1209 +ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True + +# Install nnunet and platipy +RUN pip3 install --no-cache-dir \ + nnunet==1.7.1 + +# Import the MHub model definiton +ARG MHUB_MODELS_REPO +RUN buildutils/import_mhub_model.sh nnunet_abdominal_oar ${MHUB_MODELS_REPO} + +# pull weights for platipy's nnU-Net so that the user doesn't need to every time a container is run +ENV WEIGHTS_DIR="/root/.nnunet/nnUNet_models/nnUNet/" +ENV WEIGHTS_URL="https://zenodo.org/record/4485926/files/Task017_AbdominalOrganSegmentation.zip" +ENV WEIGHTS_FN="Task017_AbdominalOrganSegmentation.zip" + +RUN wget --directory-prefix ${WEIGHTS_DIR} ${WEIGHTS_URL} +RUN unzip ${WEIGHTS_DIR}${WEIGHTS_FN} -d ${WEIGHTS_DIR} +RUN rm ${WEIGHTS_DIR}${WEIGHTS_FN} + +# specify nnunet specific environment variables +ENV WEIGHTS_FOLDER=$WEIGHTS_DIR + +# Default run script +ENTRYPOINT ["mhub.run"] +CMD ["--config", "/app/models/nnunet_abdominal_oar/config/default.yml"] \ No newline at end of file diff --git a/models/nnunet_segthor/dockerfiles/Dockerfile b/models/nnunet_segthor/dockerfiles/Dockerfile index 0ba84dc1..5022b7e4 100644 --- a/models/nnunet_segthor/dockerfiles/Dockerfile +++ b/models/nnunet_segthor/dockerfiles/Dockerfile @@ -13,7 +13,7 @@ RUN pip3 install --no-cache-dir \ # Import the MHub model definiton ARG MHUB_MODELS_REPO -RUN buildutils/import_mhub_model.sh nnunet_abdominal_oar ${MHUB_MODELS_REPO} +RUN buildutils/import_mhub_model.sh nnunet_segthor ${MHUB_MODELS_REPO} # pull weights for platipy's nnU-Net so that the user doesn't need to every time a container is run ENV WEIGHTS_DIR="/root/.nnunet/nnUNet_models/nnUNet/" From 9960a3612c45717cdbd665da9805b47868cc140a Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Tue, 5 Mar 2024 09:36:56 +0100 Subject: [PATCH 106/125] fix mhub version report Move model version delimiter outside of loop (all model versions shall be listed as one group) --- base/bin/mhub.version | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/base/bin/mhub.version b/base/bin/mhub.version index d87eca7d..da91b5e5 100755 --- a/base/bin/mhub.version +++ b/base/bin/mhub.version @@ -24,8 +24,9 @@ if [ -d "$MODEL_FOLDER" ]; then for mdir in /app/models/* ; do MVERSION=$(jq -r '.details.version' ${mdir}/meta.json) echo -e "mhub.$(basename ${mdir})==${MVERSION}" - echo -e "+++" done + + echo -e "+++" else echo -e "+++" echo -e " " From bfebc9bbe8105ff05fb7d6774c18b5804055419c Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Tue, 5 Mar 2024 09:44:04 +0100 Subject: [PATCH 107/125] rearange Dockerfile instruction order for faster builds --- models/nnunet_segthor/dockerfiles/Dockerfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/models/nnunet_segthor/dockerfiles/Dockerfile b/models/nnunet_segthor/dockerfiles/Dockerfile index 5022b7e4..779da51c 100644 --- a/models/nnunet_segthor/dockerfiles/Dockerfile +++ b/models/nnunet_segthor/dockerfiles/Dockerfile @@ -11,10 +11,6 @@ ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True RUN pip3 install --no-cache-dir \ nnunet==1.7.1 -# Import the MHub model definiton -ARG MHUB_MODELS_REPO -RUN buildutils/import_mhub_model.sh nnunet_segthor ${MHUB_MODELS_REPO} - # pull weights for platipy's nnU-Net so that the user doesn't need to every time a container is run ENV WEIGHTS_DIR="/root/.nnunet/nnUNet_models/nnUNet/" ENV WEIGHTS_URL="https://zenodo.org/record/4485926/files/Task055_SegTHOR.zip" @@ -27,6 +23,10 @@ RUN rm ${WEIGHTS_DIR}${WEIGHTS_FN} # specify nnunet specific environment variables ENV WEIGHTS_FOLDER=$WEIGHTS_DIR +# Import the MHub model definiton +ARG MHUB_MODELS_REPO +RUN buildutils/import_mhub_model.sh nnunet_segthor ${MHUB_MODELS_REPO} + # Default run script ENTRYPOINT ["mhub.run"] CMD ["--config", "/app/models/nnunet_segthor/config/default.yml"] \ No newline at end of file From a664fab7116d1dd6dd8ed1bb2d388a27e85501d3 Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Tue, 5 Mar 2024 09:55:30 +0100 Subject: [PATCH 108/125] remove wrong Dockerfile --- .../dockerfiles/Dockerfile | 32 ------------------- 1 file changed, 32 deletions(-) delete mode 100644 models/nnunet_abdominal_oar/dockerfiles/Dockerfile diff --git a/models/nnunet_abdominal_oar/dockerfiles/Dockerfile b/models/nnunet_abdominal_oar/dockerfiles/Dockerfile deleted file mode 100644 index 847eea43..00000000 --- a/models/nnunet_abdominal_oar/dockerfiles/Dockerfile +++ /dev/null @@ -1,32 +0,0 @@ -FROM mhubai/base:latest - -# FIXME: set this environment variable as a shortcut to avoid nnunet crashing the build -# by pulling sklearn instead of scikit-learn -# N.B. this is a known issue: -# https://github.com/MIC-DKFZ/nnUNet/issues/1281 -# https://github.com/MIC-DKFZ/nnUNet/pull/1209 -ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True - -# Install nnunet and platipy -RUN pip3 install --no-cache-dir \ - nnunet==1.7.1 - -# Import the MHub model definiton -ARG MHUB_MODELS_REPO -RUN buildutils/import_mhub_model.sh nnunet_abdominal_oar ${MHUB_MODELS_REPO} - -# pull weights for platipy's nnU-Net so that the user doesn't need to every time a container is run -ENV WEIGHTS_DIR="/root/.nnunet/nnUNet_models/nnUNet/" -ENV WEIGHTS_URL="https://zenodo.org/record/4485926/files/Task017_AbdominalOrganSegmentation.zip" -ENV WEIGHTS_FN="Task017_AbdominalOrganSegmentation.zip" - -RUN wget --directory-prefix ${WEIGHTS_DIR} ${WEIGHTS_URL} -RUN unzip ${WEIGHTS_DIR}${WEIGHTS_FN} -d ${WEIGHTS_DIR} -RUN rm ${WEIGHTS_DIR}${WEIGHTS_FN} - -# specify nnunet specific environment variables -ENV WEIGHTS_FOLDER=$WEIGHTS_DIR - -# Default run script -ENTRYPOINT ["mhub.run"] -CMD ["--config", "/app/models/nnunet_abdominal_oar/config/default.yml"] \ No newline at end of file From ec8aa852830605fc7364ffc822e30c978a78d600 Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Tue, 5 Mar 2024 10:12:28 +0100 Subject: [PATCH 109/125] update segmentation ids --- models/nnunet_segthor/config/default.yml | 2 +- models/nnunet_segthor/meta.json | 10 +++------- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/models/nnunet_segthor/config/default.yml b/models/nnunet_segthor/config/default.yml index 5a1d68a1..239a395c 100644 --- a/models/nnunet_segthor/config/default.yml +++ b/models/nnunet_segthor/config/default.yml @@ -21,7 +21,7 @@ modules: NNUnetRunner: nnunet_task: Task055_SegTHOR nnunet_model: 3d_lowres - roi: HEART,LEFT_ATRIUM,LEFT_VENTRICLE,RIGHT_ATRIUM,RIGHT_VENTRICLE,CORONARY_ARTERY_LAD,CORONARY_ARTERY_CFLX,CORONARY_ARTERY_RIGHT + roi: ESOPHAGUS,HEART,TRACHEA,AORTA DsegConverter: source_segs: nifti:mod=seg diff --git a/models/nnunet_segthor/meta.json b/models/nnunet_segthor/meta.json index 195ff2ee..8e32b9cf 100644 --- a/models/nnunet_segthor/meta.json +++ b/models/nnunet_segthor/meta.json @@ -17,14 +17,10 @@ "outputs": [ { "type": "Segmentation", "classes": [ + "ESOPHAGUS", "HEART", - "LEFT_ATRIUM", - "LEFT_VENTRICLE", - "RIGHT_ATRIUM", - "RIGHT_VENTRICLE", - "CORONARY_ARTERY_LAD", - "CORONARY_ARTERY_CFLX", - "CORONARY_ARTERY_RIGHT" + "TRACHEA", + "AORTA" ] } ], "model": { From 6dc6f3b7a28696a9dbf012cfbc488aea294cbf3e Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Tue, 5 Mar 2024 16:13:57 +0100 Subject: [PATCH 110/125] fix metadata for gc_nnunet_pancreas output segmentations Replace the free-text class labels with the actual SegDB IDs. This change does not require a model rebuild and will be automatically update the website representation. --- models/gc_nnunet_pancreas/meta.json | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/models/gc_nnunet_pancreas/meta.json b/models/gc_nnunet_pancreas/meta.json index c38dcd9a..9eb831c1 100644 --- a/models/gc_nnunet_pancreas/meta.json +++ b/models/gc_nnunet_pancreas/meta.json @@ -36,11 +36,11 @@ "label": "Pancreas segmentation", "description": "Segmentation of pancreas related tissues, these segmentation classes were not thoroughly validated, use them on your own risk!", "classes": [ - "veins", - "arteries", - "pancreas", - "pancreatic duct", - "bile duct" + "VEIN", + "ARTERY", + "PANCREAS", + "PANCREATIC_DUCT", + "BILE_DUCT" ] } ], From c864936958be00745ef42e4c9411cec38363255f Mon Sep 17 00:00:00 2001 From: Suraj Pai Date: Tue, 5 Mar 2024 20:09:19 -0500 Subject: [PATCH 111/125] Add meta --- models/fmcib/meta.json | 35 ----- .../config/default.yml | 0 .../dockerfiles/Dockerfile | 10 +- models/fmcib_radiomics/meta.json | 138 ++++++++++++++++++ .../utils/FMCIBRunner.py | 0 .../utils/__init__.py | 0 6 files changed, 145 insertions(+), 38 deletions(-) delete mode 100644 models/fmcib/meta.json rename models/{fmcib => fmcib_radiomics}/config/default.yml (100%) rename models/{fmcib => fmcib_radiomics}/dockerfiles/Dockerfile (67%) create mode 100644 models/fmcib_radiomics/meta.json rename models/{fmcib => fmcib_radiomics}/utils/FMCIBRunner.py (100%) rename models/{fmcib => fmcib_radiomics}/utils/__init__.py (100%) diff --git a/models/fmcib/meta.json b/models/fmcib/meta.json deleted file mode 100644 index 9256e570..00000000 --- a/models/fmcib/meta.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "id": "", - "name": "fmcib", - "title": "Foundation Model for Cancer Imaging Biomarkers", - "summary": { - "description": "This algorithm extracts a 4096 dimensonal feature set for a volume centered on the tumor location", - "inputs": [ - { - - }, - { - - } - ], - "outputs": [ - { - - } - ], - "model": { - "architecture": "Resnet50 (2x wide)", - "training": "weakly-supervised contrastive learning", - "cmpapproach": "3D" - }, - "data": { - "training": { - "vol_samples": 11467 - }, - "evaluation": { - }, - "public": true, - "external": false - } - } -} \ No newline at end of file diff --git a/models/fmcib/config/default.yml b/models/fmcib_radiomics/config/default.yml similarity index 100% rename from models/fmcib/config/default.yml rename to models/fmcib_radiomics/config/default.yml diff --git a/models/fmcib/dockerfiles/Dockerfile b/models/fmcib_radiomics/dockerfiles/Dockerfile similarity index 67% rename from models/fmcib/dockerfiles/Dockerfile rename to models/fmcib_radiomics/dockerfiles/Dockerfile index 9f3d9603..2ef43e85 100644 --- a/models/fmcib/dockerfiles/Dockerfile +++ b/models/fmcib_radiomics/dockerfiles/Dockerfile @@ -2,15 +2,19 @@ FROM mhubai/base:latest LABEL authors="bspai@bwh.harvard.edu" - - +ARG MHUB_MODELS_REPO +# Add pull models repo command here after local testingRUN +RUN buildutils/import_mhub_model.sh fmcib_radiomics ${MHUB_MODELS_REPO} RUN wget https://zenodo.org/records/10528450/files/model_weights.torch?download=1 -O /app/model_weights.torch + RUN mkdir models RUN mkdir models/fmcib # Install FMCIB package, should install everything else ... RUN pip install foundation-cancer-image-biomarker --pre + + ENTRYPOINT ["python3", "-m", "mhubio.run"] -CMD ["--config", "/app/models/fmcib/config/default.yml", "--print"] +CMD ["--workflow", "default"] diff --git a/models/fmcib_radiomics/meta.json b/models/fmcib_radiomics/meta.json new file mode 100644 index 00000000..66f00159 --- /dev/null +++ b/models/fmcib_radiomics/meta.json @@ -0,0 +1,138 @@ +{ + "id": "...", + "name": "fmcib_radiomics", + "title": "Foundation Model for Cancer Imaging Biomarkers", + "summary": { + "description": "A foundation model for cancer imaging biomarker discovery trained through self-supervised learning using a dataset of 11,467 radiographic lesions. The model features can be used as a data-driven substitute for classical radiomic features", + "inputs": [ + { + "label": "Input CT Image", + "description": "CT imaging data containing lesions of interest, such as nodules or tumors", + "format": "DICOM", + "modality": "CT", + "slicethickness": "5mm", + "bodypartexamined": "Whole", + "non-contrast": true, + "contrast": true + }, + { + "label": "Center of mass", + "description": "Center of mass of the lesion in the CT image", + "format": "JSON", + "modality": "JSON", + "slicethickness": "5mm", + "bodypartexamined": "Whole", + "non-contrast": true, + "contrast": true + } + ], + "outputs": [ + { + "type": "Prediction", + "valueType": "Feature vector", + "description": "A set of features extracted from the input CT image", + "label": "Features" + + } + ], + "model": { + "architecture": "3D ResNet50", + "training": "other", + "cmpapproach": "3D" + }, + "data": { + "training": { + "vol_samples": 11467 + }, + "evaluation": { + "vol_samples": 1944 + }, + "public": true, + "external": true + } + }, + "details": { + "name": "Foundation Model for Cancer Imaging Biomarkers", + "version": "0.0.1", + "type": "Feature extractor", + "devteam": "Researchers from the Artificial Intelligence in Medicine (AIM) Program, Mass General Brigham, Harvard Medical School and other institutions", + "date": { + "pub": "2023 (preprint)", + "code": "n/a", + "weights": "18.01.2024" + }, + "cite": "Pai, S., Bontempi, D., Hadzic, I., Prudente, V., et al. Foundation Model for Cancer Imaging Biomarkers. 2023.", + "license": { + "code": "MIT", + "weights": "CC BY-NC 4.0" + }, + "publications": [ + { + "title": "Foundation Model for Cancer Imaging Biomarkers", + "uri": "https://www.medrxiv.org/content/10.1101/2023.09.04.23294952v1" + } + ], + "github": "https://github.com/AIM-Harvard/foundation-cancer-image-biomarker", + "zenodo": "https://zenodo.org/records/10528450", + "colab": "https://colab.research.google.com/drive/1JMtj_4W0uNPzrVnM9EpN1_xpaB-5KC1H?usp=sharing", + "slicer": false + }, + "info": { + "use": { + "title": "Intended Use", + "text": "The foundation model is intended to extract features from several different types of lesions (lung, liver, kidney, mediastinal, abdominal, pelvic, bone and soft tissue). These features can be used for a variety of predictive and clustering tasks as a data-driven substitute for classical radiomic features." + }, + "analyses": { + "title": "Quantitative Analyses", + "text": "The model's performance was assessed using three different downstream tasks, including malignancy prediction and lung cancer risk prediction. Refer to the publication for more details [1].", + "references": [ + { + "label": "Foundation model for cancer image biomarkers", + "uri": "https://www.medrxiv.org/content/10.1101/2023.09.04.23294952v1" + } + ] + }, + "evaluation": { + "title": "Evaluation Data", + "text": "The evaluation dataset consists of 1,944 lesions, including 1,221 lesions for anatomical site classification, 170 nodules for malignancy prediction, and 553 tumors (420 LUNG1 + 133 RADIO) for prognostication. The dataset was held out from the training data and gathered from several different sources [1, 2, 3, 4].", + "tables": [ + { + "label": "Evaluation Tasks & Datasets", + "entries": { + "Lesion Anatomical Site Prediction": "DeepLesion (n=1221)", + "Nodule Malignancy Prediction": "LUNA16 (n=170)", + "Tumor Prognostication": "NSCLC-Radiomics (n=420) + NSCLC-Radiogenomics (n=133)" + } + } + ], + "references": [ + { + "label": "DeepLesion: automated mining of large-scale lesion annotations and universal lesion detection with deep learning.", + "uri": "https://pubmed.ncbi.nlm.nih.gov/30035154/" + }, + { + "label": "LUNA16", + "uri": "https://www.cancerimagingarchive.net/collection/lidc-idri/" + }, + { + "label": "NSCLC-Radiomics", + "uri": "https://www.cancerimagingarchive.net/collection/nsclc-radiomics/" + }, + { + "label": "NSCLC-Radiogenomics", + "uri": "https://www.cancerimagingarchive.net/analysis-result/nsclc-radiogenomics-stanford/" + } + ] + }, + "training": { + "title": "Training Data", + "text": "The training dataset consists of 11467 lesions sourced from 5,513 unique CT scans across 2,312 different patients. This was curated from the DeepLesion dataset [1] following two steps - 1) Lesions that did not contain anatomical labels were selected, 2) Scans with spacing 5mm or more were removed.", + "references": [ + { + "label": "DeepLesion: automated mining of large-scale lesion annotations and universal lesion detection with deep learning.", + "uri": "https://pubmed.ncbi.nlm.nih.gov/30035154/" + } + ] + } + } +} diff --git a/models/fmcib/utils/FMCIBRunner.py b/models/fmcib_radiomics/utils/FMCIBRunner.py similarity index 100% rename from models/fmcib/utils/FMCIBRunner.py rename to models/fmcib_radiomics/utils/FMCIBRunner.py diff --git a/models/fmcib/utils/__init__.py b/models/fmcib_radiomics/utils/__init__.py similarity index 100% rename from models/fmcib/utils/__init__.py rename to models/fmcib_radiomics/utils/__init__.py From 351eff88f36dfb3fdb6c27f7089991eb1368d137 Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Wed, 6 Mar 2024 10:36:56 +0100 Subject: [PATCH 112/125] adding uuid to meta json --- models/fmcib_radiomics/meta.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/models/fmcib_radiomics/meta.json b/models/fmcib_radiomics/meta.json index 66f00159..ba3f4087 100644 --- a/models/fmcib_radiomics/meta.json +++ b/models/fmcib_radiomics/meta.json @@ -1,5 +1,5 @@ { - "id": "...", + "id": "26e98e14-b605-4007-bd8b-79d517c935b5", "name": "fmcib_radiomics", "title": "Foundation Model for Cancer Imaging Biomarkers", "summary": { @@ -32,7 +32,6 @@ "valueType": "Feature vector", "description": "A set of features extracted from the input CT image", "label": "Features" - } ], "model": { From 4cf674dcace8c568d95ff5d0cc8f80403baff322 Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Wed, 6 Mar 2024 11:12:27 +0100 Subject: [PATCH 113/125] update Dockerfile, outsource centroid extraction, add coords schema --- models/fmcib_radiomics/dockerfiles/Dockerfile | 21 ++--- .../utils/CentroidExtractor.py | 43 ++++++++++ models/fmcib_radiomics/utils/FMCIBRunner.py | 81 +++++++++++-------- .../fmcib_radiomics/utils/coords.schema.json | 20 +++++ 4 files changed, 123 insertions(+), 42 deletions(-) create mode 100644 models/fmcib_radiomics/utils/CentroidExtractor.py create mode 100644 models/fmcib_radiomics/utils/coords.schema.json diff --git a/models/fmcib_radiomics/dockerfiles/Dockerfile b/models/fmcib_radiomics/dockerfiles/Dockerfile index 2ef43e85..ec4851ab 100644 --- a/models/fmcib_radiomics/dockerfiles/Dockerfile +++ b/models/fmcib_radiomics/dockerfiles/Dockerfile @@ -1,20 +1,23 @@ FROM mhubai/base:latest -LABEL authors="bspai@bwh.harvard.edu" +LABEL authors="bspai@bwh.harvard.edu,lnuernberg@bwh.harvard.edu" -ARG MHUB_MODELS_REPO -# Add pull models repo command here after local testingRUN -RUN buildutils/import_mhub_model.sh fmcib_radiomics ${MHUB_MODELS_REPO} -RUN wget https://zenodo.org/records/10528450/files/model_weights.torch?download=1 -O /app/model_weights.torch +# create +RUN mkdir -p models/fmcib +# download model weights +RUN wget https://zenodo.org/records/10528450/files/model_weights.torch?download=1 -O /app/model_weights.torch -RUN mkdir models -RUN mkdir models/fmcib +# clone mhub implementation +ARG MHUB_MODELS_REPO +RUN buildutils/import_mhub_model.sh fmcib_radiomics ${MHUB_MODELS_REPO} # Install FMCIB package, should install everything else ... RUN pip install foundation-cancer-image-biomarker --pre +# Install additional pip packages +RUN pip3 install --upgrade pip && pip3 install --no-cache-dir \ + jsonschema==4.21.1 - -ENTRYPOINT ["python3", "-m", "mhubio.run"] +ENTRYPOINT ["mhub.run"] CMD ["--workflow", "default"] diff --git a/models/fmcib_radiomics/utils/CentroidExtractor.py b/models/fmcib_radiomics/utils/CentroidExtractor.py new file mode 100644 index 00000000..89d5bf65 --- /dev/null +++ b/models/fmcib_radiomics/utils/CentroidExtractor.py @@ -0,0 +1,43 @@ +""" +--------------------------------------------------------- +Author: Leonard Nürnberg +Email: lnuernberg@bwh.harvard.edu +Date: 06.03.2024 +--------------------------------------------------------- +""" + +import json, jsonschema +from mhubio.core import Instance, InstanceData, IO, Module +import SimpleITK as sitk + +class CentroidExtractor(Module): + + @IO.Instance() + @IO.Input('in_mask', 'nrrd:mod=seg', the='Tumor segmentation mask for the input NRRD file.') + @IO.Output('centroids_json', 'centroids.json', "json:type=fmcibcoordinates", the='JSON file containing 3D coordinates of the centroid of the input mask.') + def task(self, instance: Instance, in_data: InstanceData, in_mask: InstanceData, centroids_json: InstanceData) -> None: + + # read the input mask + mask = sitk.ReadImage(in_mask.abspath) + + # get the center of massk from the mask via ITK + label_shape_filter = sitk.LabelShapeStatisticsImageFilter() + label_shape_filter.Execute(mask) + try: + centroid = label_shape_filter.GetCentroid(255) + except: + centroid = label_shape_filter.GetCentroid(1) + + # extract x, y, and z coordinates from the centroid + x, y, z = centroid + + # set up the coordinate dictionary + coordinate_dict = { + "coordX": x, + "coordY": y, + "coordZ": z, + } + + # write the coordinate dictionary to a json file + with open(centroids_json.abspath, "w") as f: + json.dump(coordinate_dict, f) diff --git a/models/fmcib_radiomics/utils/FMCIBRunner.py b/models/fmcib_radiomics/utils/FMCIBRunner.py index 4e783867..8595e795 100644 --- a/models/fmcib_radiomics/utils/FMCIBRunner.py +++ b/models/fmcib_radiomics/utils/FMCIBRunner.py @@ -5,49 +5,64 @@ --------------------------------------------------------- """ -import json -import torch +import json, jsonschema, os from fmcib.models import fmcib_model import SimpleITK as sitk from mhubio.core import Instance, InstanceData, IO, Module -from fmcib.preprocessing import preprocess +COORDS_SCHEMA_PATH = os.path.join(os.path.dirname(__file__), "coords.schema.json") + +def fmcib(input_dict: dict, json_output_file_path: str): + """Run the FCMIB pipeline. + + Args: + input_dict (dict): The input dictionary containing the image path and the seed point coordinates. + json_output_file_path (str): The path were the features are exported to as a json file. + """ + # model dependency imports + import torch + from fmcib.preprocessing import preprocess + + # initialize model + model = fmcib_model() + + # run model preroecessing + image = preprocess(input_dict) + image = image.unsqueeze(0) + + # run model inference + model.eval() + with torch.no_grad(): + features = model(image) + + # generate fearure dictionary + feature_dict = {f"feature_{idx}": feature for idx, feature in enumerate(features.flatten().tolist())} + + # write feature dictionary to json file + with open(json_output_file_path, "w") as f: + json.dump(feature_dict, f) class FMCIBRunner(Module): + @IO.Instance() @IO.Input('in_data', 'nrrd:mod=ct', the='Input NRRD file') - @IO.Input('in_mask', 'nrrd|json', the='Tumor mask for the input NRRD file') - @IO.Output('feature_json', 'features.json', "json", bundle='model', the='output JSON file') - def task(self, instance: Instance, in_data: InstanceData, in_mask: InstanceData, feature_json: InstanceData) -> None: - mask_path = in_mask.abspath - mask = sitk.ReadImage(mask_path) - - # Get the CoM of the mask - label_shape_filter = sitk.LabelShapeStatisticsImageFilter() - label_shape_filter.Execute(mask) - try: - centroid = label_shape_filter.GetCentroid(255) - except: - centroid = label_shape_filter.GetCentroid(1) - - x, y, z = centroid + @IO.Input('centroids_json', 'json:type=fmcibcoordinates', the='The centroids in the input image coordinate space') + @IO.Output('feature_json', 'features.json', "json:type=fmcibfeatures", bundle='model', the='Features extracted from the input image at the specified seed point.') + def task(self, instance: Instance, in_data: InstanceData, centroids_json: InstanceData, feature_json: InstanceData) -> None: + + # read centroids from json file + centroids = json.load(centroids_json.abspath) + # verify input data schema + with open("models/fmcib_radiomics/utils/input_schema.json") as f: + schema = json.load(f) + jsonschema.validate(centroids, schema) + + # define input dictionary input_dict = { "image_path": in_data.abspath, - "coordX": x, - "coordY": y, - "coordZ": z, + **centroids } - image = preprocess(input_dict) - image = image.unsqueeze(0) - model = fmcib_model() - - model.eval() - with torch.no_grad(): - features = model(image) - - feature_dict = {f"feature_{idx}": feature for idx, feature in enumerate(features.flatten().tolist())} - - with open(feature_json.abspath, "w") as f: - json.dump(feature_dict, f) + # run model + fmcib(input_dict, feature_json.abspath) \ No newline at end of file diff --git a/models/fmcib_radiomics/utils/coords.schema.json b/models/fmcib_radiomics/utils/coords.schema.json new file mode 100644 index 00000000..1ee86a00 --- /dev/null +++ b/models/fmcib_radiomics/utils/coords.schema.json @@ -0,0 +1,20 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "properties": { + "coordX": { + "type": "number" + }, + "coordY": { + "type": "number" + }, + "coordZ": { + "type": "number" + } + }, + "required": [ + "coordX", + "coordY", + "coordZ" + ] +} \ No newline at end of file From fcb7f637fe0e617e2bd8c6aa0771840aa139cc2e Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Wed, 6 Mar 2024 11:12:45 +0100 Subject: [PATCH 114/125] update default workflow and propose alternative workflow --- models/fmcib_radiomics/config/default.yml | 5 +++-- .../fmcib_radiomics/config/from_centroids.yml | 20 +++++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) create mode 100644 models/fmcib_radiomics/config/from_centroids.yml diff --git a/models/fmcib_radiomics/config/default.yml b/models/fmcib_radiomics/config/default.yml index 4f91e608..cc4b1559 100644 --- a/models/fmcib_radiomics/config/default.yml +++ b/models/fmcib_radiomics/config/default.yml @@ -5,6 +5,7 @@ general: execute: - FileStructureImporter +- CentroidExtractor - FMCIBRunner - DataOrganizer @@ -12,9 +13,9 @@ modules: FileStructureImporter: structures: - $patientID/CT.nrrd@instance@nrrd:mod=ct - - $patientID/masks/GTV-1.nrrd@nrrd + - $patientID/masks/GTV-1.nrrd@nrrd:mod=seg import_id: patientID DataOrganizer: targets: - - json-->[i:patientID]/features.json \ No newline at end of file + - json:type=fmcibfeatures-->[i:patientID]/features.json \ No newline at end of file diff --git a/models/fmcib_radiomics/config/from_centroids.yml b/models/fmcib_radiomics/config/from_centroids.yml new file mode 100644 index 00000000..462fc8b4 --- /dev/null +++ b/models/fmcib_radiomics/config/from_centroids.yml @@ -0,0 +1,20 @@ +general: + data_base_dir: /app/data + version: 1.0 + description: "FMCIB pipeline starting from a coordinate json file" + +execute: +- FileStructureImporter +- FMCIBRunner +- DataOrganizer + +modules: + FileStructureImporter: + structures: + - $patientID/CT.nrrd@instance@nrrd:mod=ct + - $patientID/centroids.json@json:type=fmcibcoordinates + import_id: patientID + + DataOrganizer: + targets: + - json:type=fmcibfeatures-->[i:patientID]/features.json \ No newline at end of file From 0a255eda4c980d9c333af18f5b84d7014dac1a28 Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Wed, 6 Mar 2024 18:32:39 +0100 Subject: [PATCH 115/125] add automatic support for various json schemas --- models/fmcib_radiomics/utils/FMCIBRunner.py | 63 +- .../utils/slicermarkup.schema.json | 699 ++++++++++++++++++ 2 files changed, 753 insertions(+), 9 deletions(-) create mode 100644 models/fmcib_radiomics/utils/slicermarkup.schema.json diff --git a/models/fmcib_radiomics/utils/FMCIBRunner.py b/models/fmcib_radiomics/utils/FMCIBRunner.py index 8595e795..0c28386f 100644 --- a/models/fmcib_radiomics/utils/FMCIBRunner.py +++ b/models/fmcib_radiomics/utils/FMCIBRunner.py @@ -9,8 +9,58 @@ from fmcib.models import fmcib_model import SimpleITK as sitk from mhubio.core import Instance, InstanceData, IO, Module +from enum import Enum +from typing import Optional COORDS_SCHEMA_PATH = os.path.join(os.path.dirname(__file__), "coords.schema.json") +SLICERMARKUP_SCHEMA_PATH = os.path.join(os.path.dirname(__file__), "slicermarkup.schema.json") + +def is_valid(json_data: dict, schema_file_path: str) -> bool: + """Check if a json file is valid according to a given schema. + + Args: + json_data (dict): The json data to be validated. + schema_file_path (str): The path to the schema file. + + Returns: + bool: True if the json file is valid according to the schema, False otherwise. + """ + with open(schema_file_path) as f: + schema = json.load(f) + + try: + jsonschema.validate(json_data, schema) + return True + except: + return False + +def get_coordinates(json_file_path: str) -> dict: + + # read json file + with open(json_file_path) as f: + json_data = json.load(f) + + # check which schema the json file adheres to + if is_valid(json_data, COORDS_SCHEMA_PATH): + return json_data + + if is_valid(json_data, SLICERMARKUP_SCHEMA_PATH): + markups = json_data["markups"] + assert markups["coordinateSystem"] == "LPS" + + controlPoints = markups["controlPoints"] + assert len(controlPoints) == 1 + + position = controlPoints[0]["position"] + return { + "coordX": position[0], + "coordY": position[1], + "coordZ": position[2] + } + + # + raise ValueError("The input json file does not adhere to the expected schema.") + def fmcib(input_dict: dict, json_output_file_path: str): """Run the FCMIB pipeline. @@ -46,22 +96,17 @@ class FMCIBRunner(Module): @IO.Instance() @IO.Input('in_data', 'nrrd:mod=ct', the='Input NRRD file') - @IO.Input('centroids_json', 'json:type=fmcibcoordinates', the='The centroids in the input image coordinate space') + @IO.Input('coordinates_json', 'json:type=fmcibcoordinates', the='The coordinates of the 3D seed point in the input image') @IO.Output('feature_json', 'features.json', "json:type=fmcibfeatures", bundle='model', the='Features extracted from the input image at the specified seed point.') - def task(self, instance: Instance, in_data: InstanceData, centroids_json: InstanceData, feature_json: InstanceData) -> None: + def task(self, instance: Instance, in_data: InstanceData, coordinates_json: InstanceData, feature_json: InstanceData) -> None: # read centroids from json file - centroids = json.load(centroids_json.abspath) - - # verify input data schema - with open("models/fmcib_radiomics/utils/input_schema.json") as f: - schema = json.load(f) - jsonschema.validate(centroids, schema) + coordinates = get_coordinates(coordinates_json.abspath) # define input dictionary input_dict = { "image_path": in_data.abspath, - **centroids + **coordinates } # run model diff --git a/models/fmcib_radiomics/utils/slicermarkup.schema.json b/models/fmcib_radiomics/utils/slicermarkup.schema.json new file mode 100644 index 00000000..3ca04d45 --- /dev/null +++ b/models/fmcib_radiomics/utils/slicermarkup.schema.json @@ -0,0 +1,699 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/Slicer/Slicer/main/Modules/Loadable/Markups/Resources/Schema/markups-v1.0.3-schema.json#", + "type": "object", + "title": "Schema for storing one or more markups", + "description": "Stores points, lines, curves, etc.", + "required": ["@schema", "markups"], + "additionalProperties": true, + "properties": { + "@schema": { + "$id": "#schema", + "type": "string", + "title": "Schema", + "description": "URL of versioned schema." + }, + "markups": { + "$id": "#markups", + "type": "array", + "title": "Markups", + "description": "Stores position and display properties of one or more markups.", + "additionalItems": true, + "items": { + "$id": "#markupItems", + "anyOf": [ + { + "$id": "#markup", + "type": "object", + "title": "Markup", + "description": "Stores a single markup.", + "default": {}, + "required": ["type"], + "additionalProperties": true, + "properties": { + "type": { + "$id": "#markup/type", + "type": "string", + "title": "Basic type", + "enum": ["Fiducial", "Line", "Angle", "Curve", "ClosedCurve", "Plane", "ROI"] + }, + "name": { + "$id": "#markup/name", + "type": "string", + "title": "Name", + "description": "Displayed name of the markup.", + "default": "" + }, + "coordinateSystem": { + "$id": "#markup/coordinateSystem", + "type": "string", + "title": "Control point positions coordinate system name", + "description": "Coordinate system name. Medical images most commonly use LPS patient coordinate system.", + "default": "LPS", + "enum": ["LPS", "RAS"] + }, + "coordinateUnits": { + "$id": "#markup/coordinateUnits", + "anyOf": [ + { + "type": "string", + "title": "Units of control point coordinates", + "description": "Control point coordinate values are specified in this length unit. Specified in UCUM.", + "default": "mm", + "enum": ["mm", "um"] + }, + { + "type": "array", + "title": "Coordinates units code", + "description": "Standard DICOM-compliant terminology item containing code, coding scheme designator, code meaning.", + "examples": [["mm", "UCUM", "millimeter"]], + "additionalItems": false, + "items": { "type": "string" }, + "minItems": 3, + "maxItems": 3 + } + ] + }, + "locked": { + "$id": "#markup/locked", + "type": "boolean", + "title": "Locked", + "description": "Markup can be interacted with on the user interface.", + "default": true + }, + "fixedNumberOfControlPoints": { + "$id": "#markup/fixedNumberOfControlPoints", + "type": "boolean", + "title": "Fixed number of control points", + "description": "Number of control points is fixed at the current value. Control points may not be added or removed (point positions can be unset instead of deleting).", + "default": false + }, + "labelFormat": { + "$id": "#markup/labelFormat", + "type": "string", + "title": "Label format", + "description": "Format of generation new labels. %N refers to node name, %d refers to point index.", + "default": "%N-%d" + }, + "lastUsedControlPointNumber": { + "$id": "#markup/lastUsedControlPointNumber", + "type": "integer", + "title": "Last used control point number", + "description": "This value is used for generating number in the control point's name when a new point is added.", + "default": 0 + }, + "roiType": { + "$id": "#markup/roiType", + "type": "string", + "title": "ROI type", + "description": "Method used to determine ROI bounds from control points. Ex. 'Box', 'BoundingBox'.", + "default": "Box" + }, + "insideOut": { + "$id": "#markup/insideOut", + "type": "boolean", + "title": "Inside out", + "description": "ROI is inside out. Objects that would normally be inside are considered outside and vice versa.", + "default": false + }, + "planeType": { + "$id": "#markup/planeType", + "type": "string", + "title": "Plane type", + "description": "Method used to determine dimensions from control points. Ex. 'PointNormal', '3Points'.", + "default": "PointNormal" + }, + "sizeMode": { + "$id": "#markup/sizeMode", + "type": "string", + "title": "Plane size mode", + "description": "Mode used to calculate the size of the plane representation. (Ex. Static absolute or automatically calculated plane size based on control points).", + "default": "auto" + }, + "autoScalingSizeFactor": { + "$id": "#markup/autoScalingSizeFactor", + "type": "number", + "title": "Plane auto scaling size factor", + "description": "When the plane size mode is 'auto', the size of the plane is scaled by the auto size scaling factor.", + "default": "1.0" + }, + "center": { + "$id": "#markup/center", + "type": "array", + "title": "Center", + "description": "The center of the markups representation. Ex. center of ROI or plane markups.", + "examples": [[0.0, 0.0, 0.0]], + "additionalItems": false, + "items": { "type": "number" }, + "minItems": 3, + "maxItems": 3 + }, + "normal": { + "$id": "#markup/normal", + "type": "array", + "title": "Normal", + "description": "The normal direction of plane markups.", + "examples": [[0.0, 0.0, 1.0]], + "additionalItems": false, + "items": { "type": "number" }, + "minItems": 3, + "maxItems": 3 + }, + "size": { + "$id": "#markup/size", + "type": "array", + "title": "Size", + "description": "The size of the markups representation. For example, axis-aligned edge lengths of the ROI or plane markups.", + "examples": [[5.0, 5.0, 4.0], [5.0, 5.0, 0.0]], + "additionalItems": false, + "items": { "type": "number" }, + "minItems": 3, + "maxItems": 3 + }, + "planeBounds": { + "$id": "#markup/planeBounds", + "type": "array", + "title": "Plane bounds", + "description": "The bounds of the plane representation.", + "examples": [[-50, 50, -50, 50]], + "additionalItems": false, + "items": { "type": "number" }, + "minItems": 4, + "maxItems": 4 + }, + "objectToBase": { + "$id": "#markup/objectToBase", + "type": "array", + "title": "Object to Base matrix", + "description": "4x4 transform matrix from the object representation to the coordinate system defined by the control points.", + "examples": [[-0.9744254538021788, -0.15660098593235834, -0.16115572030626558, 26.459385388492746, + -0.08525118065879463, -0.4059244688892957, 0.9099217338613386, -48.04154530201596, + -0.20791169081775938, 0.9003896138683279, 0.3821927158637956, -53.35829266424462, + 0.0, 0.0, 0.0, 1.0]], + "additionalItems": false, + "items": { "type": "number" }, + "minItems": 16, + "maxItems": 16 + }, + "baseToNode": { + "$id": "#markup/baseToNode", + "type": "array", + "title": "Base to Node matrix", + "description": "4x4 transform matrix from the base representation to the node coordinate system.", + "examples": [[-0.9744254538021788, -0.15660098593235834, -0.16115572030626558, 26.459385388492746, + -0.08525118065879463, -0.4059244688892957, 0.9099217338613386, -48.04154530201596, + -0.20791169081775938, 0.9003896138683279, 0.3821927158637956, -53.35829266424462, + 0.0, 0.0, 0.0, 1.0]], + "additionalItems": false, + "items": { "type": "number" }, + "minItems": 16, + "maxItems": 16 + }, + "orientation": { + "$id": "#markup/orientation", + "type": "array", + "title": "Markups orientation", + "description": "3x3 orientation matrix of the markups representation. Ex. [orientation[0], orientation[3], orientation[6]] is the x vector of the object coordinate system in the node coordinate system.", + "examples": [[-0.6157905804369491, -0.3641498920623639, 0.6987108251316091, + -0.7414677108739087, -0.03213048377225371, -0.6702188193000602, + 0.2665100275346712, -0.9307859518297049, -0.2502197376306259]], + "additionalItems": false, + "items": { "type": "number" }, + "minItems": 9, + "maxItems": 9 + }, + "controlPoints": { + "$id": "#markup/controlPoints", + "type": "array", + "title": "Control points", + "description": "Stores all control points of this markup.", + "default": [], + "additionalItems": true, + "items": { + "$id": "#markup/controlPointItems", + "anyOf": [ + { + "$id": "#markup/controlPoint", + "type": "object", + "title": "The first anyOf schema", + "description": "Object containing the properties of a single control point.", + "default": {}, + "required": [], + "additionalProperties": true, + "properties": { + "id": { + "$id": "#markup/controlPoint/id", + "type": "string", + "title": "Control point ID", + "description": "Identifier of the control point within this markup", + "default": "", + "examples": ["2", "5"] + }, + "label": { + "$id": "#markup/controlPoint/label", + "type": "string", + "title": "Control point label", + "description": "Label displayed next to the control point.", + "default": "", + "examples": ["F_1"] + }, + "description": { + "$id": "#markup/controlPoint/description", + "type": "string", + "title": "Control point description", + "description": "Details about the control point.", + "default": "" + }, + "associatedNodeID": { + "$id": "#markup/controlPoint/associatedNodeID", + "type": "string", + "title": "Associated node ID", + "description": "ID of the node where this markups is defined on.", + "default": "", + "examples": ["vtkMRMLModelNode1"] + }, + "position": { + "$id": "#markup/controlPoint/position", + "type": "array", + "title": "Control point position", + "description": "Tuple of 3 defined in the specified coordinate system.", + "examples": [[-9.9, 1.1, 12.3]], + "additionalItems": false, + "items": { "type": "number" }, + "minItems": 3, + "maxItems": 3 + }, + "orientation": { + "$id": "#markup/controlPoint/orientation", + "type": "array", + "title": "Control point orientation", + "description": "3x3 orientation matrix", + "examples": [[1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0 ]], + "additionalItems": false, + "items": {"type": "number"}, + "minItems": 9, + "maxItems": 9 + }, + "selected": { + "$id": "#markup/controlPoint/selected", + "type": "boolean", + "title": "Control point is selected", + "description": "Specifies if the control point is selected or unselected.", + "default": true + }, + "locked": { + "$id": "#markup/controlPoint/locked", + "type": "boolean", + "title": "Control point locked", + "description": "Control point cannot be moved on the user interface.", + "default": false + }, + "visibility": { + "$id": "#markup/controlPoint/visibility", + "type": "boolean", + "title": "The visibility schema", + "description": "Visibility of the control point.", + "default": true + }, + "positionStatus": { + "$id": "#markup/controlPoint/positionStatus", + "type": "string", + "title": "The positionStatus schema", + "description": "Status of the control point position.", + "enum": ["undefined", "preview", "defined"], + "default": "defined" + } + } + } + ] + } + }, + "display": { + "$id": "#display", + "type": "object", + "title": "The display schema", + "description": "Object holding markups display properties.", + "default": {}, + "required": [], + "additionalProperties": true, + "properties": { + "visibility": { + "$id": "#display/visibility", + "type": "boolean", + "title": "Markup visibility", + "description": "Visibility of the entire markup.", + "default": true + }, + "opacity": { + "$id": "#display/opacity", + "type": "number", + "title": "Markup opacity", + "description": "Overall opacity of the markup.", + "minimum": 0.0, + "maximum": 1.0, + "default": 1.0 + }, + "color": { + "$id": "#display/color", + "type": "array", + "title": "Markup color", + "description": "Overall RGB color of the markup.", + "default": [0.4, 1.0, 1.0], + "additionalItems": false, + "items": {"type": "number", "minimum": 0.0, "maximum": 1.0}, + "minItems": 3, + "maxItems": 3 + }, + "selectedColor": { + "$id": "#display/selectedColor", + "title": "Markup selected color", + "description": "Overall RGB color of selected points in the markup.", + "default": [1.0, 0.5, 0.5], + "additionalItems": false, + "items": {"type": "number", "minimum": 0.0, "maximum": 1.0}, + "minItems": 3, + "maxItems": 3 + }, + "activeColor": { + "$id": "#display/activeColor", + "title": "Markup active color", + "description": "Overall RGB color of active points in the markup.", + "default": [0.4, 1.0, 0.0], + "additionalItems": false, + "items": {"type": "number", "minimum": 0.0, "maximum": 1.0}, + "minItems": 3, + "maxItems": 3 + }, + "propertiesLabelVisibility": { + "$id": "#display/propertiesLabelVisibility", + "type": "boolean", + "title": "Properties label visibility", + "description": "Visibility of the label that shows basic properties.", + "default": false + }, + "pointLabelsVisibility": { + "$id": "#display/pointLabelsVisibility", + "type": "boolean", + "title": "Point labels visibility", + "description": "Visibility of control point labels.", + "default": false + }, + "textScale": { + "$id": "#display/textScale", + "type": "number", + "title": "Markup overall text scale", + "description": "Size of displayed text as percentage of window size.", + "default": 3.0, + "minimum": 0.0 + }, + "glyphType": { + "$id": "#display/glyphType", + "type": "string", + "title": "The glyphType schema", + "description": "Enum representing the displayed glyph type.", + "default": "Sphere3D", + "enum": ["Vertex2D", "Dash2D", "Cross2D", "ThickCross2D", "Triangle2D", "Square2D", + "Circle2D", "Diamond2D", "Arrow2D", "ThickArrow2D", "HookedArrow2D", "StarBurst2D", + "Sphere3D", "Diamond3D"] + }, + "glyphScale": { + "$id": "#display/glyphScale", + "type": "number", + "title": "Point glyph scale", + "description": "Glyph size as percentage of window size.", + "default": 1.0, + "minimum": 0.0 + }, + "glyphSize": { + "$id": "#display/glyphSize", + "type": "number", + "title": "Point glyph size", + "description": "Absolute glyph size.", + "default": 5.0, + "minimum": 0.0 + }, + "useGlyphScale": { + "$id": "#display/useGlyphScale", + "type": "boolean", + "title": "Use glyph scale", + "description": "Use relative glyph scale.", + "default": true + }, + "sliceProjection": { + "$id": "#display/sliceProjection", + "type": "boolean", + "title": "Slice projection", + "description": "Enable project markups to slice views.", + "default": false + }, + "sliceProjectionUseFiducialColor": { + "$id": "#display/sliceProjectionUseFiducialColor", + "type": "boolean", + "title": "Use fiducial color for slice projection", + "description": "Choose between projection color or fiducial color for projections.", + "default": true + }, + "sliceProjectionOutlinedBehindSlicePlane": { + "$id": "#display/sliceProjectionOutlinedBehindSlicePlane", + "type": "boolean", + "title": "Display slice projection as outline", + "description": "Display slice projection as outline if behind slice plane.", + "default": false + }, + "sliceProjectionColor": { + "$id": "#display/sliceProjectionColor", + "type": "array", + "title": "Slice projection color", + "description": "Overall RGB color for displaying projection.", + "default": [1.0, 1.0, 1.0], + "additionalItems": false, + "items": {"type": "number", "minimum": 0.0, "maximum": 1.0}, + "minItems": 3, + "maxItems": 3 + }, + "sliceProjectionOpacity": { + "$id": "#display/sliceProjectionOpacity", + "type": "number", + "title": "Slice projection opacity", + "description": "Overall opacity of markup slice projection.", + "minimum": 0.0, + "maximum": 1.0, + "default": 0.6 + }, + "lineThickness": { + "$id": "#display/lineThickness", + "type": "number", + "title": "Line thickness", + "description": "Line thickness relative to markup size.", + "default": 0.2, + "minimum": 0.0 + }, + "lineColorFadingStart": { + "$id": "#display/lineColorFadingStart", + "type": "number", + "title": "Line color fading start", + "description": "Distance where line starts to fade out.", + "default": 1.0, + "minimum": 0.0 + }, + "lineColorFadingEnd": { + "$id": "#display/lineColorFadingEnd", + "type": "number", + "title": "Line color fading end", + "description": "Distance where line fades out completely.", + "default": 10.0, + "minimum": 0.0 + }, + "lineColorFadingSaturation": { + "$id": "#display/lineColorFadingSaturation", + "type": "number", + "title": "Color fading saturation", + "description": "Amount of color saturation change as the line fades out.", + "default": 1.0 + }, + "lineColorFadingHueOffset": { + "$id": "#display/lineColorFadingHueOffset", + "type": "number", + "title": "Color fadue hue offset", + "description": "Change in color hue as the line fades out.", + "default": 0.0 + }, + "handlesInteractive": { + "$id": "#display/handlesInteractive", + "type": "boolean", + "title": "Handles interactive", + "description": "Show interactive handles to transform this markup.", + "default": false + }, + "translationHandleVisibility": { + "$id": "#display/translationHandleVisibility", + "type": "boolean", + "title": "Translation handle visibility", + "description": "Visibility of the translation interaction handles", + "default": false + }, + "rotationHandleVisibility": { + "$id": "#display/rotationHandleVisibility", + "type": "boolean", + "title": "Rotation handle visibility", + "description": "Visibility of the rotation interaction handles", + "default": false + }, + "scaleHandleVisibility": { + "$id": "#display/scaleHandleVisibility", + "type": "boolean", + "title": "Scale handle visibility", + "description": "Visibility of the scale interaction handles", + "default": false + }, + "interactionHandleScale": { + "$id": "#display/interactionHandleScale", + "type": "number", + "title": "Interaction handle glyph scale", + "description": "Interaction handle size as percentage of window size.", + "default": 3.0 + }, + "snapMode": { + "$id": "#display/snapMode", + "type": "string", + "title": "Snap mode", + "description": "How control points can be defined and moved.", + "default": "toVisibleSurface", + "enum": ["unconstrained", "toVisibleSurface"] + } + } + }, + "measurements": { + "$id": "#markup/measurements", + "type": "array", + "title": "Measurements", + "description": "Stores all measurements for this markup.", + "default": [], + "additionalItems": true, + "items": { + "$id": "#markup/measurementItems", + "anyOf": [ + { + "$id": "#markup/measurement", + "type": "object", + "title": "Measurement", + "description": "Store a single measurement.", + "default": {}, + "required": [], + "additionalProperties": true, + "properties": { + "name": { + "$id": "#markup/measurement/name", + "type": "string", + "title": "Measurement name", + "description": "Printable name of the measurement", + "default": "", + "examples": ["length", "area"] + }, + "enabled": { + "$id": "#markup/measurement/enabled", + "type": "boolean", + "title": "Computation of the measurement is enabled", + "description": "This can be used to define measurements but prevent automatic updates.", + "default": true + }, + "value": { + "$id": "#display/measurement/value", + "type": "number", + "title": "Measurement value", + "description": "Numeric value of the measurement." + }, + "units": { + "$id": "#markup/measurement/units", + "anyOf": [ + { + "type": "string", + "title": "Measurement unit", + "description": "Printable measurement unit. Use of UCUM is preferred.", + "default": "", + "examples": ["mm", "mm2"] + }, + { + "type": "array", + "title": "Measurement units code", + "description": "Standard DICOM-compliant terminology item containing code, coding scheme designator, code meaning.", + "examples": [["cm3", "UCUM", "cubic centimeter"]], + "additionalItems": false, + "items": { "type": "string" }, + "minItems": 3, + "maxItems": 3 + } + ] + }, + "description": { + "$id": "#markup/measurement/description", + "type": "string", + "title": "Measurement description", + "description": "Explanation of the measurement.", + "default": "" + }, + "printFormat": { + "$id": "#markup/measurement/printFormat", + "type": "string", + "title": "Print format", + "description": "Format string (printf-style) to create user-displayable string from value and units.", + "default": "", + "examples": ["%5.3f %s"] + }, + "quantityCode": { + "$id": "#markup/measurement/quantityCode", + "type": "array", + "title": "Measurement quantity code", + "description": "Standard DICOM-compliant terminology item containing code, coding scheme designator, code meaning.", + "default": [], + "examples": [["118565006", "SCT", "Volume"]], + "additionalItems": false, + "items": { "type": "string" }, + "minItems": 3, + "maxItems": 3 + }, + "derivationCode": { + "$id": "#markup/measurement/derivationCode", + "type": "array", + "title": "Measurement derivation code", + "description": "Standard DICOM-compliant terminology item containing code, coding scheme designator, code meaning.", + "default": [], + "examples": [["255605001", "SCT", "Minimum"]], + "additionalItems": false, + "items": { "type": "string" }, + "minItems": 3, + "maxItems": 3 + }, + "methodCode": { + "$id": "#markup/measurement/methodCode", + "type": "array", + "title": "Measurement method code", + "description": "Standard DICOM-compliant terminology item containing code, coding scheme designator, code meaning.", + "default": [], + "examples": [["126030", "DCM", "Sum of segmented voxel volumes"]], + "additionalItems": false, + "items": { "type": "string" }, + "minItems": 3, + "maxItems": 3 + }, + "controlPointValues": { + "$id": "#markup/controlPoint/controlPointValues", + "type": "array", + "title": "Measurement values for each control point.", + "description": "This stores measurement result if it has value for each control point.", + "examples": [[-9.9, 1.1, 12.3, 4.3, 4.8]], + "additionalItems": false, + "items": { "type": "number" } + } + } + } + ] + } + } + } + } + ] + } + } + } +} From df1d8ba69badc7b38e793e76ff3ade19fd9b4002 Mon Sep 17 00:00:00 2001 From: Suraj Pai Date: Thu, 7 Mar 2024 01:40:31 -0500 Subject: [PATCH 116/125] Tested default workflow --- models/fmcib_radiomics/dockerfiles/Dockerfile | 13 +++++++------ .../fmcib_radiomics/utils/CentroidExtractor.py | 2 +- models/fmcib_radiomics/utils/FMCIBRunner.py | 17 +++++++++-------- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/models/fmcib_radiomics/dockerfiles/Dockerfile b/models/fmcib_radiomics/dockerfiles/Dockerfile index ec4851ab..1c9716ed 100644 --- a/models/fmcib_radiomics/dockerfiles/Dockerfile +++ b/models/fmcib_radiomics/dockerfiles/Dockerfile @@ -2,9 +2,6 @@ FROM mhubai/base:latest LABEL authors="bspai@bwh.harvard.edu,lnuernberg@bwh.harvard.edu" -# create -RUN mkdir -p models/fmcib - # download model weights RUN wget https://zenodo.org/records/10528450/files/model_weights.torch?download=1 -O /app/model_weights.torch @@ -12,12 +9,16 @@ RUN wget https://zenodo.org/records/10528450/files/model_weights.torch?download= ARG MHUB_MODELS_REPO RUN buildutils/import_mhub_model.sh fmcib_radiomics ${MHUB_MODELS_REPO} -# Install FMCIB package, should install everything else ... -RUN pip install foundation-cancer-image-biomarker --pre # Install additional pip packages RUN pip3 install --upgrade pip && pip3 install --no-cache-dir \ jsonschema==4.21.1 +# Install FMCIB package, should install everything else ... +RUN pip3 install foundation-cancer-image-biomarker --pre + +# Fix for mpmath torch bug: https://github.com/underworldcode/underworld3/issues/167 +RUN pip3 install mpmath==1.3.0 + ENTRYPOINT ["mhub.run"] -CMD ["--workflow", "default"] +CMD ["--workflow", "default", "--print"] diff --git a/models/fmcib_radiomics/utils/CentroidExtractor.py b/models/fmcib_radiomics/utils/CentroidExtractor.py index 89d5bf65..1e5154cb 100644 --- a/models/fmcib_radiomics/utils/CentroidExtractor.py +++ b/models/fmcib_radiomics/utils/CentroidExtractor.py @@ -15,7 +15,7 @@ class CentroidExtractor(Module): @IO.Instance() @IO.Input('in_mask', 'nrrd:mod=seg', the='Tumor segmentation mask for the input NRRD file.') @IO.Output('centroids_json', 'centroids.json', "json:type=fmcibcoordinates", the='JSON file containing 3D coordinates of the centroid of the input mask.') - def task(self, instance: Instance, in_data: InstanceData, in_mask: InstanceData, centroids_json: InstanceData) -> None: + def task(self, instance: Instance, in_mask: InstanceData, centroids_json: InstanceData) -> None: # read the input mask mask = sitk.ReadImage(in_mask.abspath) diff --git a/models/fmcib_radiomics/utils/FMCIBRunner.py b/models/fmcib_radiomics/utils/FMCIBRunner.py index 0c28386f..f3273156 100644 --- a/models/fmcib_radiomics/utils/FMCIBRunner.py +++ b/models/fmcib_radiomics/utils/FMCIBRunner.py @@ -1,12 +1,11 @@ """ --------------------------------------------------------- -Author: Suraj Pia -Email: bspai@bwh.harvard.edu +Author: Suraj Pai, Leonard Nürnberg +Email: bspai@bwh.harvard.edu, lnuernberg@bwh.harvard.edu +Date: 06.03.2024 --------------------------------------------------------- """ - import json, jsonschema, os -from fmcib.models import fmcib_model import SimpleITK as sitk from mhubio.core import Instance, InstanceData, IO, Module from enum import Enum @@ -71,9 +70,10 @@ def fmcib(input_dict: dict, json_output_file_path: str): """ # model dependency imports import torch + from fmcib.models import fmcib_model from fmcib.preprocessing import preprocess - # initialize model + # initialize the ResNet50 model with pretrained weights model = fmcib_model() # run model preroecessing @@ -96,12 +96,12 @@ class FMCIBRunner(Module): @IO.Instance() @IO.Input('in_data', 'nrrd:mod=ct', the='Input NRRD file') - @IO.Input('coordinates_json', 'json:type=fmcibcoordinates', the='The coordinates of the 3D seed point in the input image') + @IO.Input('centroids_json', "json:type=fmcibcoordinates", the='JSON file containing 3D coordinates of the centroid of the input mask.') @IO.Output('feature_json', 'features.json', "json:type=fmcibfeatures", bundle='model', the='Features extracted from the input image at the specified seed point.') - def task(self, instance: Instance, in_data: InstanceData, coordinates_json: InstanceData, feature_json: InstanceData) -> None: + def task(self, instance: Instance, in_data: InstanceData, centroids_json: InstanceData, feature_json: InstanceData) -> None: # read centroids from json file - coordinates = get_coordinates(coordinates_json.abspath) + coordinates = get_coordinates(centroids_json.abspath) # define input dictionary input_dict = { @@ -109,5 +109,6 @@ def task(self, instance: Instance, in_data: InstanceData, coordinates_json: Inst **coordinates } + # run model fmcib(input_dict, feature_json.abspath) \ No newline at end of file From a50b34521400cfd03c76aa802a7876a288a65dd6 Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Thu, 7 Mar 2024 08:26:05 +0100 Subject: [PATCH 117/125] remove outdated imports --- models/fmcib_radiomics/utils/FMCIBRunner.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/models/fmcib_radiomics/utils/FMCIBRunner.py b/models/fmcib_radiomics/utils/FMCIBRunner.py index 0c28386f..c02ca429 100644 --- a/models/fmcib_radiomics/utils/FMCIBRunner.py +++ b/models/fmcib_radiomics/utils/FMCIBRunner.py @@ -7,10 +7,7 @@ import json, jsonschema, os from fmcib.models import fmcib_model -import SimpleITK as sitk from mhubio.core import Instance, InstanceData, IO, Module -from enum import Enum -from typing import Optional COORDS_SCHEMA_PATH = os.path.join(os.path.dirname(__file__), "coords.schema.json") SLICERMARKUP_SCHEMA_PATH = os.path.join(os.path.dirname(__file__), "slicermarkup.schema.json") From 21206b4540845b4abce62a2c1f269296322397df Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Thu, 7 Mar 2024 08:30:12 +0100 Subject: [PATCH 118/125] remove outdated fileglobal imports --- models/fmcib_radiomics/utils/FMCIBRunner.py | 1 - 1 file changed, 1 deletion(-) diff --git a/models/fmcib_radiomics/utils/FMCIBRunner.py b/models/fmcib_radiomics/utils/FMCIBRunner.py index 64409369..a930c04c 100644 --- a/models/fmcib_radiomics/utils/FMCIBRunner.py +++ b/models/fmcib_radiomics/utils/FMCIBRunner.py @@ -6,7 +6,6 @@ --------------------------------------------------------- """ import json, jsonschema, os -from fmcib.models import fmcib_model from mhubio.core import Instance, InstanceData, IO, Module COORDS_SCHEMA_PATH = os.path.join(os.path.dirname(__file__), "coords.schema.json") From 4e1525cf0d6abafd35985e15b843736be70487e7 Mon Sep 17 00:00:00 2001 From: Suraj Pai Date: Thu, 7 Mar 2024 02:43:47 -0500 Subject: [PATCH 119/125] Test slicer config --- models/fmcib_radiomics/config/from_slicer.yml | 20 +++++++++++++++++++ models/fmcib_radiomics/dockerfiles/Dockerfile | 5 +---- models/fmcib_radiomics/utils/FMCIBRunner.py | 8 ++++++-- 3 files changed, 27 insertions(+), 6 deletions(-) create mode 100644 models/fmcib_radiomics/config/from_slicer.yml diff --git a/models/fmcib_radiomics/config/from_slicer.yml b/models/fmcib_radiomics/config/from_slicer.yml new file mode 100644 index 00000000..1c5682a9 --- /dev/null +++ b/models/fmcib_radiomics/config/from_slicer.yml @@ -0,0 +1,20 @@ +general: + data_base_dir: /app/data + version: 1.0 + description: "FMCIB pipeline" + +execute: +- FileStructureImporter +- FMCIBRunner +- DataOrganizer + +modules: + FileStructureImporter: + structures: + - $patientID@instance/re:^.*\.nrrd$::@nrrd:mod=ct + - $patientID/re:^.*\.json$::@json:type=fmcibcoordinates + import_id: patientID + + DataOrganizer: + targets: + - json:type=fmcibfeatures-->[i:patientID]/features.json \ No newline at end of file diff --git a/models/fmcib_radiomics/dockerfiles/Dockerfile b/models/fmcib_radiomics/dockerfiles/Dockerfile index 1c9716ed..54059428 100644 --- a/models/fmcib_radiomics/dockerfiles/Dockerfile +++ b/models/fmcib_radiomics/dockerfiles/Dockerfile @@ -17,8 +17,5 @@ RUN pip3 install --upgrade pip && pip3 install --no-cache-dir \ # Install FMCIB package, should install everything else ... RUN pip3 install foundation-cancer-image-biomarker --pre -# Fix for mpmath torch bug: https://github.com/underworldcode/underworld3/issues/167 -RUN pip3 install mpmath==1.3.0 - ENTRYPOINT ["mhub.run"] -CMD ["--workflow", "default", "--print"] +CMD ["--workflow", "default"] diff --git a/models/fmcib_radiomics/utils/FMCIBRunner.py b/models/fmcib_radiomics/utils/FMCIBRunner.py index f3273156..70310e81 100644 --- a/models/fmcib_radiomics/utils/FMCIBRunner.py +++ b/models/fmcib_radiomics/utils/FMCIBRunner.py @@ -45,9 +45,13 @@ def get_coordinates(json_file_path: str) -> dict: if is_valid(json_data, SLICERMARKUP_SCHEMA_PATH): markups = json_data["markups"] - assert markups["coordinateSystem"] == "LPS" + + assert len(markups) == 1, "Currently, only one point per file is supported." + markup = markups[0] + + assert markup["coordinateSystem"] == "LPS" - controlPoints = markups["controlPoints"] + controlPoints = markup["controlPoints"] assert len(controlPoints) == 1 position = controlPoints[0]["position"] From 0c262b6ee880948f18972cfaa7122fcc3904cf6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Leonard=20N=C3=BCrnberg?= Date: Tue, 12 Mar 2024 18:56:21 +0100 Subject: [PATCH 120/125] Create LICENSE --- LICENSE | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..45181a18 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 MHub.ai + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. From 6243b98582ad55ce511f622b04a4e4e9b105b677 Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Wed, 13 Mar 2024 18:38:10 +0100 Subject: [PATCH 121/125] minor updates on runner module --- models/fmcib_radiomics/utils/FMCIBRunner.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/models/fmcib_radiomics/utils/FMCIBRunner.py b/models/fmcib_radiomics/utils/FMCIBRunner.py index faf2e23f..0729413d 100644 --- a/models/fmcib_radiomics/utils/FMCIBRunner.py +++ b/models/fmcib_radiomics/utils/FMCIBRunner.py @@ -61,7 +61,6 @@ def get_coordinates(json_file_path: str) -> dict: # raise ValueError("The input json file does not adhere to the expected schema.") - def fmcib(input_dict: dict, json_output_file_path: str): """Run the FCMIB pipeline. @@ -96,7 +95,7 @@ def fmcib(input_dict: dict, json_output_file_path: str): class FMCIBRunner(Module): @IO.Instance() - @IO.Input('in_data', 'nrrd:mod=ct', the='Input NRRD file') + @IO.Input('in_data', 'nrrd|nifti:mod=ct', the='Input nrrd or nifti ct image file') @IO.Input('centroids_json', "json:type=fmcibcoordinates", the='JSON file containing 3D coordinates of the centroid of the input mask.') @IO.Output('feature_json', 'features.json', "json:type=fmcibfeatures", bundle='model', the='Features extracted from the input image at the specified seed point.') def task(self, instance: Instance, in_data: InstanceData, centroids_json: InstanceData, feature_json: InstanceData) -> None: From a184cfe98b676e95261c68dd1c23c011b1ad0c0f Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Wed, 13 Mar 2024 18:38:21 +0100 Subject: [PATCH 122/125] add dicom workflow --- models/fmcib_radiomics/config/dicom.yml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 models/fmcib_radiomics/config/dicom.yml diff --git a/models/fmcib_radiomics/config/dicom.yml b/models/fmcib_radiomics/config/dicom.yml new file mode 100644 index 00000000..da7dc513 --- /dev/null +++ b/models/fmcib_radiomics/config/dicom.yml @@ -0,0 +1,22 @@ +general: + data_base_dir: /app/data + version: 1.0 + description: FMCIB pipeline starting from DICOM files and centroids in json files named by their SeriesInstanceUID + +execute: +- DicomImporter +- FileImporter +- NiftiConverter +- FMCIBRunner +- DataOrganizer + +modules: + + FileImporter: + instance_id: sid + meta: type=fmcibcoordinates + type: json + + DataOrganizer: + targets: + - json:type=fmcibfeatures-->[i:sid]/features.json \ No newline at end of file From 94d46a8eff8045acf8bffb10fc6785d80c3f7864 Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Thu, 14 Mar 2024 10:40:09 +0100 Subject: [PATCH 123/125] reorganize configs --- models/fmcib_radiomics/config/default.yml | 19 ++++++++-------- models/fmcib_radiomics/config/dicom.yml | 22 ------------------- .../fmcib_radiomics/config/from_nrrd_mask.yml | 21 ++++++++++++++++++ 3 files changed, 31 insertions(+), 31 deletions(-) delete mode 100644 models/fmcib_radiomics/config/dicom.yml create mode 100644 models/fmcib_radiomics/config/from_nrrd_mask.yml diff --git a/models/fmcib_radiomics/config/default.yml b/models/fmcib_radiomics/config/default.yml index cc4b1559..297a8a14 100644 --- a/models/fmcib_radiomics/config/default.yml +++ b/models/fmcib_radiomics/config/default.yml @@ -1,21 +1,22 @@ general: data_base_dir: /app/data version: 1.0 - description: "FMCIB pipeline" + description: FMCIB pipeline starting from DICOM files and centroids in json files or slicer exports named by their SeriesInstanceUID execute: -- FileStructureImporter -- CentroidExtractor +- DicomImporter +- FileImporter +- NiftiConverter - FMCIBRunner - DataOrganizer modules: - FileStructureImporter: - structures: - - $patientID/CT.nrrd@instance@nrrd:mod=ct - - $patientID/masks/GTV-1.nrrd@nrrd:mod=seg - import_id: patientID + + FileImporter: + instance_id: sid + meta: type=fmcibcoordinates + type: json DataOrganizer: targets: - - json:type=fmcibfeatures-->[i:patientID]/features.json \ No newline at end of file + - json:type=fmcibfeatures-->[i:sid]/features.json \ No newline at end of file diff --git a/models/fmcib_radiomics/config/dicom.yml b/models/fmcib_radiomics/config/dicom.yml deleted file mode 100644 index da7dc513..00000000 --- a/models/fmcib_radiomics/config/dicom.yml +++ /dev/null @@ -1,22 +0,0 @@ -general: - data_base_dir: /app/data - version: 1.0 - description: FMCIB pipeline starting from DICOM files and centroids in json files named by their SeriesInstanceUID - -execute: -- DicomImporter -- FileImporter -- NiftiConverter -- FMCIBRunner -- DataOrganizer - -modules: - - FileImporter: - instance_id: sid - meta: type=fmcibcoordinates - type: json - - DataOrganizer: - targets: - - json:type=fmcibfeatures-->[i:sid]/features.json \ No newline at end of file diff --git a/models/fmcib_radiomics/config/from_nrrd_mask.yml b/models/fmcib_radiomics/config/from_nrrd_mask.yml new file mode 100644 index 00000000..22644ffc --- /dev/null +++ b/models/fmcib_radiomics/config/from_nrrd_mask.yml @@ -0,0 +1,21 @@ +general: + data_base_dir: /app/data + version: 1.0 + description: "FMCIB pipeline starting from a nrrd file image and a nnrd binary mask of the GTV." + +execute: +- FileStructureImporter +- CentroidExtractor +- FMCIBRunner +- DataOrganizer + +modules: + FileStructureImporter: + structures: + - $patientID/CT.nrrd@instance@nrrd:mod=ct + - $patientID/masks/GTV-1.nrrd@nrrd:mod=seg + import_id: patientID + + DataOrganizer: + targets: + - json:type=fmcibfeatures-->[i:patientID]/features.json \ No newline at end of file From de652f33b882f69ab68ad72509b205bc965976de Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Thu, 14 Mar 2024 18:59:41 +0100 Subject: [PATCH 124/125] update test json schema --- .github/schemas/testmodel.schema.json | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/.github/schemas/testmodel.schema.json b/.github/schemas/testmodel.schema.json index 968d5372..cd866684 100644 --- a/.github/schemas/testmodel.schema.json +++ b/.github/schemas/testmodel.schema.json @@ -22,15 +22,32 @@ "type": "string", "pattern": "^s3://[\\w\\-/]+/\\*$" }, + "url": { + "type": "string", + "format": "uri" + }, + "description": { + "type": "string" + }, "path": { "type": "string", "pattern": "^[^\\/][\\w\\-\\/]+$" } }, - "required": [ - "SeriesInstanceUID", - "aws_url", - "path" + "oneOf": [ + { + "required": [ + "SeriesInstanceUID", + "aws_url", + "path" + ] + }, + { + "required": [ + "url", + "path" + ] + } ] } } From 3d894bdc24552102336e9d30dc4b3a52737aa8bf Mon Sep 17 00:00:00 2001 From: LennyN95 Date: Thu, 14 Mar 2024 19:04:21 +0100 Subject: [PATCH 125/125] allow dots in test config data path value --- .github/schemas/testmodel.schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/schemas/testmodel.schema.json b/.github/schemas/testmodel.schema.json index cd866684..3ef4692e 100644 --- a/.github/schemas/testmodel.schema.json +++ b/.github/schemas/testmodel.schema.json @@ -31,7 +31,7 @@ }, "path": { "type": "string", - "pattern": "^[^\\/][\\w\\-\\/]+$" + "pattern": "^[^\\/][\\w\\-\\.\\/]+$" } }, "oneOf": [