diff --git a/models/gc_picai_baseline/config/default.yml b/models/gc_picai_baseline/config/default.yml new file mode 100644 index 00000000..bb5d5deb --- /dev/null +++ b/models/gc_picai_baseline/config/default.yml @@ -0,0 +1,34 @@ +general: + data_base_dir: /app/data + version: 1.0 + description: Prostate MRI classification default (dicom to json) + +execute: +- FileStructureImporter +- MhaConverter +- PicaiBaselineRunner +- ReportExporter +- DataOrganizer + +modules: + FileStructureImporter: + input_dir: input_data + structures: + - $sid@instance/$type@dicom:mod=mr + import_id: sid + + MhaConverter: + engine: panimg + allow_multi_input: true + + ReportExporter: + format: compact + includes: + - data: prostate_cancer_likelihood + label: prostate_cancer_likelihood + value: value + + DataOrganizer: + targets: + - json:mod=report-->[i:sid]/cspca-case-level-likelihood.json + - mha:mod=dm-->[i:sid]/cspca-detection-map.mha diff --git a/models/gc_picai_baseline/config/mha-pipeline.yml b/models/gc_picai_baseline/config/mha-pipeline.yml new file mode 100644 index 00000000..f20d5abc --- /dev/null +++ b/models/gc_picai_baseline/config/mha-pipeline.yml @@ -0,0 +1,31 @@ +general: + data_base_dir: /app/data + version: 1.0 + description: Prostate MRI classification MHA pipeline (mha to json) + +execute: +- FileStructureImporter +- PicaiBaselineRunner +- ReportExporter +- DataOrganizer + +modules: + FileStructureImporter: + input_dir: input_data + structures: + - $sid@instance/images/transverse-adc-prostate-mri/adc.mha@mha:mod=mradc + - $sid/images/transverse-t2-prostate-mri/t2w.mha@mha:mod=mrt2 + - $sid/images/transverse-hbv-prostate-mri/hbv.mha@mha:mod=mrhbv + import_id: sid + + ReportExporter: + format: compact + includes: + - data: prostate_cancer_likelihood + label: prostate_cancer_likelihood + value: value + + DataOrganizer: + targets: + - json:mod=report-->[i:sid]/cspca-case-level-likelihood.json + - mha:mod=hm-->[i:sid]/cspca-detection-map.mha diff --git a/models/gc_picai_baseline/dockerfiles/Dockerfile b/models/gc_picai_baseline/dockerfiles/Dockerfile new file mode 100644 index 00000000..7c2af162 --- /dev/null +++ b/models/gc_picai_baseline/dockerfiles/Dockerfile @@ -0,0 +1,55 @@ +FROM mhubai/base:latest + +# Specify/override authors label +LABEL authors="sil.vandeleemput@radboudumc.nl" + +# Install PyTorch 2.0.1 (CUDA enabled) +RUN pip3 install --no-cache-dir torch==2.0.1+cu118 -f https://download.pytorch.org/whl/torch_stable.html + +# Install git-lfs (required for unpacking model weights) +RUN apt update && \ + apt install -y --no-install-recommends git-lfs && \ + rm -rf /var/lib/apt/lists/* + +# Install PICAI baseline algorithm and model weights +# - Git clone the algorithm repository for v2.1.2 (fixed to v2.1.2 tag) +# - We remove unnecessary files for a compacter docker layer +# - Subsequently we remove the .git directory to procuce a compacter docker layer +RUN git clone --depth 1 --branch v2.1.2 https://github.com/DIAGNijmegen/picai_nnunet_semi_supervised_gc_algorithm.git /opt/algorithm && \ + rm -rf /opt/algorithm/test && \ + rm -rf /opt/algorithm/.git + +# Set this environment variable as a shortcut to avoid nnunet==1.7.0 crashing the build +# by pulling sklearn instead of scikit-learn +# N.B. this is a known issue: +# https://github.com/MIC-DKFZ/nnUNet/issues/1281 +# https://github.com/MIC-DKFZ/nnUNet/pull/1209 +ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True + +# Install additional PICAI requirements +RUN pip3 install --no-cache-dir -r /opt/algorithm/requirements.txt + +# Extend the nnUNet installation with custom trainers +RUN SITE_PKG=`pip3 show nnunet | grep "Location:" | awk '{print $2}'` && \ + mv /opt/algorithm/nnUNetTrainerV2_focalLoss.py "$SITE_PKG/nnunet/training/network_training/nnUNet_variants/loss_function/nnUNetTrainerV2_focalLoss.py" +RUN SITE_PKG=`pip3 show nnunet | grep "Location:" | awk '{print $2}'` && \ + mv /opt/algorithm/nnUNetTrainerV2_Loss_CE_checkpoints.py "$SITE_PKG/nnunet/training/network_training/nnUNetTrainerV2_Loss_CE_checkpoints.py" +RUN SITE_PKG=`pip3 show nnunet | grep "Location:" | awk '{print $2}'` && \ + mv /opt/algorithm/nnUNetTrainerV2_Loss_FL_and_CE.py "$SITE_PKG/nnunet/training/network_training/nnUNetTrainerV2_Loss_FL_and_CE.py" + +# Two code edits to the __init__ method of the algorithm class in process.py to prevent some of its default behavior +# 1. Skip forced error caused by using a different input locations than expected (we don't use the GC dirs) +# 2. Prevent unnecessary folder creation before input directories have been set (we will set the correct directory later) +RUN sed -i "s|file_paths = list(Path(folder).glob(scan_glob_format))|return|g" /opt/algorithm/process.py && \ + sed -i "s|self.cspca_detection_map_path.parent.mkdir(exist_ok=True, parents=True)||g" /opt/algorithm/process.py + +# Import the MHub model definiton +ARG MHUB_MODELS_REPO +RUN buildutils/import_mhub_model.sh gc_picai_baseline ${MHUB_MODELS_REPO} + +# Add lobe segmentation code base to python path +ENV PYTHONPATH="/app:/opt/algorithm" + +# Default entrypoint +ENTRYPOINT ["python3", "-m", "mhubio.run"] +CMD ["--config", "/app/models/gc_picai_baseline/config/default.yml"] diff --git a/models/gc_picai_baseline/meta.json b/models/gc_picai_baseline/meta.json new file mode 100644 index 00000000..1dbab230 --- /dev/null +++ b/models/gc_picai_baseline/meta.json @@ -0,0 +1,179 @@ +{ + "id": "c5f886fb-9f54-4555-a954-da02b22d6d3f", + "name": "gc_picai_baseline", + "title": "PI-CAI challenge baseline", + "summary": { + "description": "This algorithm predicts a detection map for the likelihood of clinically significant prostate cancer (csPCa) using biparametric MRI (bpMRI). The algorithm ensembles 5-fold cross-validation models that were trained on the PI-CAI: Public Training and Development Dataset v2.0. The detection map is at the same spatial resolution and physical dimensions as the input axial T2-weighted image. This model algorithm was used as a baseline for the PI-CAI challenge hosted on Grand Challenge.", + "inputs": [ + { + "label": "Transverse T2-weighted prostate biparametric MRI", + "description": "Transverse T2-weighted prostate biparametric MRI exam.", + "format": "DICOM", + "modality": "MR", + "bodypartexamined": "Prostate", + "slicethickness": "0.5 x 0.5 x 3.0 mm", + "non-contrast": false, + "contrast": false + }, + { + "label": "Transverse high b-value diffusion-weighted maps of the prostate", + "description": "Transverse high b-value diffusion-weighted (DWI) maps, with b-value of 1400 or 2000, either acquired or vendor-calculated.", + "format": "DICOM", + "modality": "MR", + "bodypartexamined": "Prostate", + "slicethickness": "0.5 x 0.5 x 3.0 mm", + "non-contrast": false, + "contrast": false + }, + { + "label": "Transverse apparent diffusion coefficient map of the prostate", + "description": "Transverse apparent diffusion coefficient (ADC) prostate MRI map.", + "format": "DICOM", + "modality": "MR", + "bodypartexamined": "Prostate", + "slicethickness": "0.5 x 0.5 x 3.0 mm", + "non-contrast": false, + "contrast": false + } + ], + "outputs": [ + { + "type": "Prediction", + "valueType": "Likelihood", + "label": "Prostate cancer likelihood", + "description": "Case-level likelihood of harboring clinically significant prostate cancer, in range [0,1].", + "classes": [] + }, + { + "type": "Prediction", + "valueType": "Likelihood map", + "label": "Transverse cancer detection map", + "description": "Detection map of clinically significant prostate cancer lesions in 3D, where each voxel represents a floating point in range [0,1]. This map is at the same spatial resolution and physical dimensions as the input transversal T2-weighted image.", + "classes": [] + } + ], + "model": { + "architecture": "3d fullres nnUNet", + "training": "semi-supervised", + "cmpapproach": "3D" + }, + "data": { + "training": { + "vol_samples": 1500 + }, + "evaluation": { + "vol_samples": 1000 + }, + "public": false, + "external": false + } + }, + "details": { + "name": "PI-CAI challenge baseline", + "version": "v2.1.1", + "devteam": "Diagnostic Image Analysis Group, Radboud University Medical Center, Nijmegen, The Netherlands", + "type": "Prediction", + "date": { + "weights": "2022-06-22", + "code": "2022-09-05", + "pub": "" + }, + "cite": "J. S. Bosma, A. Saha, M. Hosseinzadeh, I. Slootweg, M. de Rooij, and H. Huisman, \"Semisupervised Learning with Report-guided Pseudo Labels for Deep Learning–based Prostate Cancer Detection Using Biparametric MRI\", Radiology: Artificial Intelligence, 230031, 2023. DOI: 10.1148/ryai.230031", + "license": { + "code": "Apache 2.0", + "weights": "CC-BY-NC-SA-4.0" + }, + "publications": [ + { + "uri": "https://doi.org/10.5281/zenodo.6667655", + "title": "Artificial Intelligence and Radiologists at Prostate Cancer Detection in MRI: The PI-CAI Challenge (Study Protocol)" + }, + { + "uri": "https://pubs.rsna.org/doi/10.1148/ryai.230031", + "title": "Semisupervised Learning with Report-guided Pseudo Labels for Deep Learning–based Prostate Cancer Detection Using Biparametric MRI" + } + ], + "github": "https://github.com/DIAGNijmegen/picai_nnunet_semi_supervised_gc_algorithm", + "zenodo": "", + "colab": "", + "slicer": false + }, + "info": { + "use": { + "title": "Intended use", + "text": "This algorithm is a deep learning-based detection/diagnosis model, which ensembles 5 independent nnU-Net models (5-fold cross-validation). To predict the likelihood of harboring clinically significant prostate cancer (csPCa), the transversal T2-weighted, apparent diffusion coefficient (ADC) and high b-value diffusion weighted maps are required. The input sequences should be co-registered or aligned reasonably well and the prostate gland should be localized within a volume of 460 cm³ from the centre coordinate. The nnU-Net framework will internally resample all input scans to 0.5 x 0.5 x 3.0 mm. Per case the input data should be put into the following folder structure: `case1/adc`, `case1/hbv`, `case1/t2w`, corresponding respectively with the ADC, high b-value DWI, and the T2 weighted MR inputs for a case called `case1`.", + "references": [ + { + "label": "PI-CAI baseline algorithm on grand-challenge", + "uri": "https://grand-challenge.org/algorithms/pi-cai-baseline-nnu-net-semi-supervised/" + } + ], + "tables": [] + }, + "analyses": { + "title": "Evaluation", + "text": "Patient-level diagnosis performance is evaluated using the Area Under Receiver Operating Characteristic (AUROC) metric. Lesion-level detection performance is evaluated using the Average Precision (AP) metric.", + "references": [ + { + "label": "PI-CAI AI challenge details", + "uri": "https://pi-cai.grand-challenge.org/AI/" + }, + { + "label": "PI-CAI baseline algorithm evaluation results on grand-challenge.", + "uri": "https://pi-cai.grand-challenge.org/evaluation/fe187cdb-cb61-4cbb-ab63-2de483a52d60/" + } + ], + "tables": [ + { + "label": "Evaluation results on the PI-CAI testing cohort of 1000 cases.", + "entries": { + "AUROC": "0.865", + "AP": "0.576" + } + } + ] + }, + "evaluation": { + "title": "Evaluation data", + "text": "The PI-CAI Hidden Testing Cohort (1000 cases) includes internal testing data (unseen cases from seen centers) and external testing data (unseen cases from an unseen center).", + "references": [ + { + "label": "PI-CAI data section", + "uri": "https://pi-cai.grand-challenge.org/DATA/" + } + ], + "tables": [] + }, + "training": { + "title": "Training data", + "text": "The publicly available PI-CAI training and development dataset of 1500 biparametric MRI exams was used for training [1]. AI-derived annotations were created for cases without manual annotations [2]. This model was trained using a custom preprocessing step followed by the standard nnU-Net pipeline. The default nnU-Net loss-function was changed to Cross-Entropy + Focal loss [3].", + "references": [ + { + "label": "PI-CAI publicly available training and development dataset", + "uri": "https://zenodo.org/record/6624726" + }, + { + "label": "Method to obtain AI-derived annotations", + "uri": "https://fastmri.eu/research/bosma22a.html" + }, + { + "label": "Detailed description of training method", + "uri": "https://github.com/DIAGNijmegen/picai_baseline/blob/main/nnunet_baseline.md" + } + ], + "tables": [] + }, + "ethics": { + "title": "", + "text": "", + "references": [], + "tables": [] + }, + "limitations": { + "title": "Limitations", + "text": "This algorithm was developed for research purposes only.", + "references": [], + "tables": [] + } + } +} \ No newline at end of file diff --git a/models/gc_picai_baseline/utils/PicaiBaselineRunner.py b/models/gc_picai_baseline/utils/PicaiBaselineRunner.py new file mode 100644 index 00000000..84dc1474 --- /dev/null +++ b/models/gc_picai_baseline/utils/PicaiBaselineRunner.py @@ -0,0 +1,65 @@ +""" +--------------------------------------------------------- +Mhub / DIAG - Run Module for the PICAI baseline Algorithm +--------------------------------------------------------- + +--------------------------------------------------------- +Author: Sil van de Leemput +Email: sil.vandeleemput@radboudumc.nl +--------------------------------------------------------- +""" + +import json +import sys +from pathlib import Path + +from mhubio.core import Instance, InstanceData, IO, Module, ValueOutput, ClassOutput, Meta + + +CLI_PATH = Path(__file__).parent / "cli.py" + + +@ValueOutput.Name('prostate_cancer_likelihood') +@ValueOutput.Label('ProstateCancerLikelihood') +@ValueOutput.Type(float) +@ValueOutput.Description('Likelihood of case-level prostate cancer.') +class ProstateCancerLikelihood(ValueOutput): + pass + + +class PicaiBaselineRunner(Module): + + @IO.Instance() + @IO.Input('in_data_t2', 'mha:mod=mr:type=t2w', the='input T2 weighted prostate MR image') + @IO.Input('in_data_adc', 'mha:mod=mr:type=adc', the='input ADC prostate MR image') + @IO.Input('in_data_hbv', 'mha:mod=mr:type=hbv', the='input HBV prostate MR image') + @IO.Output('cancer_likelihood_json', 'cspca-case-level-likelihood.json', "json", bundle='model', the='output JSON file with PICAI baseline prostate cancer likelihood') + @IO.Output('cancer_lesion_detection_map', 'cspca-detection-map.mha', "mha:mod=dm", bundle='model', the='output detection map of clinically significant prostate cancer lesions in 3D, where each voxel represents a floating point in range [0,1]') + @IO.OutputData('cancer_likelihood', ProstateCancerLikelihood, the='PICAI baseline prostate cancer likelihood') + def task(self, instance: Instance, in_data_t2: InstanceData, in_data_adc: InstanceData, in_data_hbv: InstanceData, cancer_likelihood_json: InstanceData, cancer_lesion_detection_map: InstanceData, cancer_likelihood: ProstateCancerLikelihood) -> None: + # build command (order matters!) + cmd = [ + sys.executable, + str(CLI_PATH), + in_data_t2.abspath, + in_data_adc.abspath, + in_data_hbv.abspath, + cancer_likelihood_json.abspath, + cancer_lesion_detection_map.abspath, + ] + + # run the command as subprocess + self.subprocess(cmd, text=True) + + # Extract cancer likelihood value from cancer_likelihood_file + if not Path(cancer_likelihood_json.abspath).is_file(): + raise FileNotFoundError(f"Output file {cancer_likelihood_json.abspath} could not be found!") + + with open(cancer_likelihood_json.abspath, "r") as f: + cancer_lh = float(json.load(f)) + + if not (isinstance(cancer_lh, (float, int)) and (0.0 <= cancer_lh <= 1.0)): + raise ValueError(f"Cancer likelihood value should be between 0 and 1, found: {cancer_lh}") + + # Output the predicted values + cancer_likelihood.value = cancer_lh diff --git a/models/gc_picai_baseline/utils/__init__.py b/models/gc_picai_baseline/utils/__init__.py new file mode 100644 index 00000000..a0ec22bc --- /dev/null +++ b/models/gc_picai_baseline/utils/__init__.py @@ -0,0 +1 @@ +from .PicaiBaselineRunner import * diff --git a/models/gc_picai_baseline/utils/cli.py b/models/gc_picai_baseline/utils/cli.py new file mode 100644 index 00000000..deaf9ecf --- /dev/null +++ b/models/gc_picai_baseline/utils/cli.py @@ -0,0 +1,54 @@ +""" +-------------------------------------------------- +Mhub / DIAG - CLI for the PICAI baseline Algorithm +-------------------------------------------------- + +-------------------------------------------------- +Author: Sil van de Leemput +Email: sil.vandeleemput@radboudumc.nl +-------------------------------------------------- +""" + +import argparse +from pathlib import Path +from process import csPCaAlgorithm as PicaiClassifier + + +def run_classifier(t2: Path, adc: Path, hbv: Path, cancer_likelihood_json: Path, cancer_lesion_detection_map: Path): + # Initialize classifier object + classifier = PicaiClassifier() + + # Specify input files (the order is important!) + classifier.scan_paths = [ + t2, + adc, + hbv, + ] + + # Specify output files + classifier.cspca_detection_map_path = cancer_lesion_detection_map + classifier.case_confidence_path = cancer_likelihood_json + + # Run the classifier on the input images + classifier.process() + + +def run_classifier_cli(): + parser = argparse.ArgumentParser("CLI to run the PICAI baseline classifier") + parser.add_argument("input_t2", type=str, help="input T2 weighted prostate MR image (MHA)") + parser.add_argument("input_adc", type=str, help="input ADC prostate MR image (MHA") + parser.add_argument("input_hbv", type=str, help="input HBV prostate MR image (MHA)") + parser.add_argument("output_cancer_likelihood_json", type=str, help="output JSON file with PICAI baseline prostate cancer likelihood (JSON)") + parser.add_argument("output_cancer_lesion_detection_map", type=str, help="output detection map of clinically significant prostate cancer lesions in 3D (MHA)") + args = parser.parse_args() + run_classifier( + t2=Path(args.input_t2), + adc=Path(args.input_adc), + hbv=Path(args.input_hbv), + cancer_likelihood_json=Path(args.output_cancer_likelihood_json), + cancer_lesion_detection_map=Path(args.output_cancer_lesion_detection_map), + ) + + +if __name__ == "__main__": + run_classifier_cli()