From 395140a9fc5ea9f9973519158a48b35542baa91a Mon Sep 17 00:00:00 2001
From: silvandeleemput <silvandeleemput@gmail.com>
Date: Thu, 22 Jun 2023 17:32:00 +0200
Subject: [PATCH 01/19] add initial implementation grt123 model

---
 models/grt123_lung_cancer/__init__.py         |   1 +
 models/grt123_lung_cancer/config/config.yml   |  14 ++
 .../dockerfiles/cuda11.4/Dockerfile           |  55 ++++++++
 .../dockerfiles/cuda12.0/Dockerfile           |  55 ++++++++
 .../dockerfiles/nocuda/Dockerfile             |  55 ++++++++
 models/grt123_lung_cancer/scripts/run.py      |  42 ++++++
 .../utils/LungCancerClassifierRunner.py       |  59 +++++++++
 .../utils/PanImgConverters.py                 | 122 ++++++++++++++++++
 models/grt123_lung_cancer/utils/__init__.py   |   1 +
 9 files changed, 404 insertions(+)
 create mode 100644 models/grt123_lung_cancer/__init__.py
 create mode 100644 models/grt123_lung_cancer/config/config.yml
 create mode 100644 models/grt123_lung_cancer/dockerfiles/cuda11.4/Dockerfile
 create mode 100644 models/grt123_lung_cancer/dockerfiles/cuda12.0/Dockerfile
 create mode 100644 models/grt123_lung_cancer/dockerfiles/nocuda/Dockerfile
 create mode 100644 models/grt123_lung_cancer/scripts/run.py
 create mode 100644 models/grt123_lung_cancer/utils/LungCancerClassifierRunner.py
 create mode 100644 models/grt123_lung_cancer/utils/PanImgConverters.py
 create mode 100644 models/grt123_lung_cancer/utils/__init__.py

diff --git a/models/grt123_lung_cancer/__init__.py b/models/grt123_lung_cancer/__init__.py
new file mode 100644
index 00000000..90f60fdd
--- /dev/null
+++ b/models/grt123_lung_cancer/__init__.py
@@ -0,0 +1 @@
+from .utils import *
\ No newline at end of file
diff --git a/models/grt123_lung_cancer/config/config.yml b/models/grt123_lung_cancer/config/config.yml
new file mode 100644
index 00000000..bad14ddc
--- /dev/null
+++ b/models/grt123_lung_cancer/config/config.yml
@@ -0,0 +1,14 @@
+general:
+  data_base_dir: /app/data
+  
+modules:
+  DicomImporter:
+    source_dir: input_data
+    import_dir: sorted_data
+    sort_data: True
+    meta: 
+      mod: ct
+   
+  LungCancerClassifierRunner:
+    tmp_path: /app/tmp
+    n_preprocessing_workers: 8
diff --git a/models/grt123_lung_cancer/dockerfiles/cuda11.4/Dockerfile b/models/grt123_lung_cancer/dockerfiles/cuda11.4/Dockerfile
new file mode 100644
index 00000000..036d4118
--- /dev/null
+++ b/models/grt123_lung_cancer/dockerfiles/cuda11.4/Dockerfile
@@ -0,0 +1,55 @@
+# syntax=docker/dockerfile:experimental
+
+# Specify the base image for the environment
+FROM mhubai/base:cuda11.4
+
+# Specify/override authors label
+LABEL authors="sil.vandeleemput@radboudumc.nl"
+
+# Install panimg to test conversion integration TODO should later be installed with MHub/mhubio
+RUN apt-get update && apt-get install -y --no-install-recommends \
+  python3-openslide \
+  && rm -rf /var/lib/apt/lists/*
+RUN pip3 install panimg
+
+# install required dependencies for grt123 algorithm (CPU-only)
+RUN pip3 install --no-cache-dir \
+    torch==1.10.0+cu113 -f https://download.pytorch.org/whl/torch_stable.html
+
+
+# TODO remove later ==== Temporariy SSH fix as long as repo is private =======
+RUN apt-get update && apt-get install -y --no-install-recommends \
+  openssh-client \
+  && rm -rf /var/lib/apt/lists/*
+# Add github public key to known_hosts for SSH
+RUN mkdir -p -m 0600 ~/.ssh && ssh-keyscan github.com >> ~/.ssh/known_hosts
+# TODO remove later ===============================
+
+
+# TODO make public and remove ssh extras...
+# Install grt123 algorithm and model weights
+#   - We use a shallow git clone for reduced bandwidth usage
+#   - We remove unnecessary files for a compacter docker layer
+#   - Subsequently we remove the .git directory to procuce a compacter docker layer, but keep the latest commit hash in the HEAD file
+RUN --mount=type=ssh git clone --branch 44-port-to-python3 --depth 1 git@github.com:DIAGNijmegen/bodyct-dsb2017-grt123.git /grt123_lung_cancer && \
+    rm -rf /grt123_lung_cancer/tests && \
+    rm -rf /grt123_lung_cancer/training && \
+    rm -rf /grt123_lung_cancer/processor && \
+    rm -rf /grt123_lung_cancer/images && \
+    rm /grt123_lung_cancer/README.md && \
+    rm /grt123_lung_cancer/solution-grt123-team.pdf && \
+    echo "$(git log --format="%H" -n 1)" > /grt123_lung_cancer/HEAD && \
+    rm -rf /grt123_lung_cancer/.git/* && \
+    mv /grt123_lung_cancer/HEAD /grt123_lung_cancer/.git
+
+# Clone MHub model (m-grt123-lung-cancer branch, fixed to commit TODO)
+#RUN git init \
+# && git sparse-checkout set "models/grt123_lung_cancer" \
+# && git fetch https://github.com/MHubAI/models.git m-grt123-lung-cancer \
+# && git merge TODO
+
+# Add lobe segmentation code base to python path
+ENV PYTHONPATH="/grt123_lung_cancer:/app"
+
+# Default run script
+CMD ["python3", "/app/models/grt123_lung_cancer/scripts/run.py"]
diff --git a/models/grt123_lung_cancer/dockerfiles/cuda12.0/Dockerfile b/models/grt123_lung_cancer/dockerfiles/cuda12.0/Dockerfile
new file mode 100644
index 00000000..84a71111
--- /dev/null
+++ b/models/grt123_lung_cancer/dockerfiles/cuda12.0/Dockerfile
@@ -0,0 +1,55 @@
+# syntax=docker/dockerfile:experimental
+
+# Specify the base image for the environment
+FROM mhubai/base:cuda12.0
+
+# Specify/override authors label
+LABEL authors="sil.vandeleemput@radboudumc.nl"
+
+# Install panimg to test conversion integration TODO should later be installed with MHub/mhubio
+RUN apt-get update && apt-get install -y --no-install-recommends \
+  python3-openslide \
+  && rm -rf /var/lib/apt/lists/*
+RUN pip3 install panimg
+
+# install required dependencies for grt123 algorithm (CPU-only)
+RUN pip3 install --no-cache-dir \
+    torch===2.0.1+cu118 -f https://download.pytorch.org/whl/torch_stable.html
+
+
+# TODO remove later ==== Temporariy SSH fix as long as repo is private =======
+RUN apt-get update && apt-get install -y --no-install-recommends \
+  openssh-client \
+  && rm -rf /var/lib/apt/lists/*
+# Add github public key to known_hosts for SSH
+RUN mkdir -p -m 0600 ~/.ssh && ssh-keyscan github.com >> ~/.ssh/known_hosts
+# TODO remove later ===============================
+
+
+# TODO make public and remove ssh extras...
+# Install grt123 algorithm and model weights
+#   - We use a shallow git clone for reduced bandwidth usage
+#   - We remove unnecessary files for a compacter docker layer
+#   - Subsequently we remove the .git directory to procuce a compacter docker layer, but keep the latest commit hash in the HEAD file
+RUN --mount=type=ssh git clone --branch 44-port-to-python3 --depth 1 git@github.com:DIAGNijmegen/bodyct-dsb2017-grt123.git /grt123_lung_cancer && \
+    rm -rf /grt123_lung_cancer/tests && \
+    rm -rf /grt123_lung_cancer/training && \
+    rm -rf /grt123_lung_cancer/processor && \
+    rm -rf /grt123_lung_cancer/images && \
+    rm /grt123_lung_cancer/README.md && \
+    rm /grt123_lung_cancer/solution-grt123-team.pdf && \
+    echo "$(git log --format="%H" -n 1)" > /grt123_lung_cancer/HEAD && \
+    rm -rf /grt123_lung_cancer/.git/* && \
+    mv /grt123_lung_cancer/HEAD /grt123_lung_cancer/.git
+
+# Clone MHub model (m-grt123-lung-cancer branch, fixed to commit TODO)
+#RUN git init \
+# && git sparse-checkout set "models/grt123_lung_cancer" \
+# && git fetch https://github.com/MHubAI/models.git m-grt123-lung-cancer \
+# && git merge TODO
+
+# Add lobe segmentation code base to python path
+ENV PYTHONPATH="/grt123_lung_cancer:/app"
+
+# Default run script
+CMD ["python3", "/app/models/grt123_lung_cancer/scripts/run.py"]
diff --git a/models/grt123_lung_cancer/dockerfiles/nocuda/Dockerfile b/models/grt123_lung_cancer/dockerfiles/nocuda/Dockerfile
new file mode 100644
index 00000000..7a9d968d
--- /dev/null
+++ b/models/grt123_lung_cancer/dockerfiles/nocuda/Dockerfile
@@ -0,0 +1,55 @@
+# syntax=docker/dockerfile:experimental
+
+# Specify the base image for the environment
+FROM mhubai/base:nocuda
+
+# Specify/override authors label
+LABEL authors="sil.vandeleemput@radboudumc.nl"
+
+# Install panimg to test conversion integration TODO should later be installed with MHub/mhubio
+RUN apt-get update && apt-get install -y --no-install-recommends \
+  python3-openslide \
+  && rm -rf /var/lib/apt/lists/*
+RUN pip3 install panimg
+
+# install required dependencies for grt123 algorithm (CPU-only)
+RUN pip3 install --no-cache-dir \
+    torch===2.0.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
+
+
+# TODO remove later ==== Temporariy SSH fix as long as repo is private =======
+RUN apt-get update && apt-get install -y --no-install-recommends \
+  openssh-client \
+  && rm -rf /var/lib/apt/lists/*
+# Add github public key to known_hosts for SSH
+RUN mkdir -p -m 0600 ~/.ssh && ssh-keyscan github.com >> ~/.ssh/known_hosts
+# TODO remove later ===============================
+
+
+# TODO make public and remove ssh extras...
+# Install grt123 algorithm and model weights
+#   - We use a shallow git clone for reduced bandwidth usage
+#   - We remove unnecessary files for a compacter docker layer
+#   - Subsequently we remove the .git directory to procuce a compacter docker layer, but keep the latest commit hash in the HEAD file
+RUN --mount=type=ssh git clone --branch 44-port-to-python3 --depth 1 git@github.com:DIAGNijmegen/bodyct-dsb2017-grt123.git /grt123_lung_cancer && \
+    rm -rf /grt123_lung_cancer/tests && \
+    rm -rf /grt123_lung_cancer/training && \
+    rm -rf /grt123_lung_cancer/processor && \
+    rm -rf /grt123_lung_cancer/images && \
+    rm /grt123_lung_cancer/README.md && \
+    rm /grt123_lung_cancer/solution-grt123-team.pdf && \
+    echo "$(git log --format="%H" -n 1)" > /grt123_lung_cancer/HEAD && \
+    rm -rf /grt123_lung_cancer/.git/* && \
+    mv /grt123_lung_cancer/HEAD /grt123_lung_cancer/.git
+
+# Clone MHub model (m-grt123-lung-cancer branch, fixed to commit TODO)
+#RUN git init \
+# && git sparse-checkout set "models/grt123_lung_cancer" \
+# && git fetch https://github.com/MHubAI/models.git m-grt123-lung-cancer \
+# && git merge TODO
+
+# Add lobe segmentation code base to python path
+ENV PYTHONPATH="/grt123_lung_cancer:/app"
+
+# Default run script
+CMD ["python3", "/app/models/grt123_lung_cancer/scripts/run.py"]
diff --git a/models/grt123_lung_cancer/scripts/run.py b/models/grt123_lung_cancer/scripts/run.py
new file mode 100644
index 00000000..f6294777
--- /dev/null
+++ b/models/grt123_lung_cancer/scripts/run.py
@@ -0,0 +1,42 @@
+"""
+-------------------------------------------------------
+MHub / DIAG - Run grt123 Lung Cancer Classifier locally
+-------------------------------------------------------
+
+-------------------------------------------------------
+Author: Sil van de Leemput
+Email:  sil.vandeleemput@radboudumc.nl
+-------------------------------------------------------
+"""
+
+import sys
+sys.path.append('.')
+
+from mhubio.core import Config, DataType, FileType
+from mhubio.modules.importer.DicomImporter import DicomImporter
+from mhubio.modules.organizer.DataOrganizer import DataOrganizer
+from models.grt123_lung_cancer.utils.LungCancerClassifierRunner import LungCancerClassifierRunner
+from models.grt123_lung_cancer.utils.PanImgConverters import MhaPanImgConverter
+
+# clean-up
+import shutil
+shutil.rmtree("/app/data/sorted_data", ignore_errors=True)
+shutil.rmtree("/app/tmp", ignore_errors=True)
+shutil.rmtree("/app/data/output_data", ignore_errors=True)
+
+# config
+config = Config('/app/models/grt123_lung_cancer/config/config.yml')
+
+# import (ct:dicom)
+DicomImporter(config).execute()
+
+# convert (ct:dicom -> ct:mha)
+MhaPanImgConverter(config).execute()
+
+# execute model (nnunet)
+LungCancerClassifierRunner(config).execute()
+
+# organize data into output folder
+organizer = DataOrganizer(config, set_file_permissions=sys.platform.startswith('linux'))
+organizer.setTarget(DataType(FileType.JSON), "/app/data/output_data/[i:sid]/grt123_lung_cancer_findings.json")
+organizer.execute()
diff --git a/models/grt123_lung_cancer/utils/LungCancerClassifierRunner.py b/models/grt123_lung_cancer/utils/LungCancerClassifierRunner.py
new file mode 100644
index 00000000..f4ddba6f
--- /dev/null
+++ b/models/grt123_lung_cancer/utils/LungCancerClassifierRunner.py
@@ -0,0 +1,59 @@
+"""
+----------------------------------------------------------
+Mhub / DIAG - Run Module for grt123 Lung Cancer Classifier
+----------------------------------------------------------
+
+----------------------------------------------------------
+Author: Sil van de Leemput
+Email:  sil.vandeleemput@radboudumc.nl
+----------------------------------------------------------
+"""
+import torch.cuda
+from mhubio.core import Instance, InstanceData, IO, Module
+
+from pathlib import Path
+import numpy as np
+import SimpleITK as sitk
+
+import torch
+
+import main
+
+
+@IO.Config('n_preprocessing_workers', int, 6, the="number of preprocessing workers to use for the grt123 lung mask preprocessor")
+@IO.Config('tmp_path', str, "/app/tmp", the="the path to write intermediate grt123 files to")
+class LungCancerClassifierRunner(Module):
+
+    n_preprocessing_workers: int
+    tmp_path: str
+
+    @IO.Instance()
+    @IO.Input('in_data', 'mha:mod=ct', the='input ct scan')
+    @IO.Output('out_data', 'grt123_lung_cancer_findings.json', 'json:model=grt123LungCancerClassification', 'in_data', the='predicted nodules and lung cancer findings of the lung lobe')
+    def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData) -> None:
+
+        tmp_path = Path(self.tmp_path)
+        tmp_output_bbox_dir = tmp_path / "bbox"
+        tmp_output_prep_dir = tmp_path / "prep"
+        tmp_output_bbox_dir.mkdir(exist_ok=True, parents=True)
+        tmp_output_prep_dir.mkdir(exist_ok=True, parents=True)
+
+        n_gpu = 1 if torch.cuda.is_available() else 0
+
+        # apply grt123 algorithm
+        results = main.main(
+            skip_detect=False,
+            skip_preprocessing=False,
+            datapath=str(Path(in_data.abspath).parent),
+            outputdir=str(tmp_path),
+            output_bbox_dir=str(tmp_output_bbox_dir),
+            output_prep_dir=str(tmp_output_prep_dir),
+            n_gpu=n_gpu,
+            n_worker_preprocessing=self.n_preprocessing_workers,
+            data_filter=r".*.mha"
+        )
+
+        # store classification results
+        self.v(f"Writing classification results to {out_data.abspath}")
+        assert len(results) > 0, "LungCancerClassifierRunner - Always expects at least one output report"
+        results[0].to_file(out_data.abspath)
diff --git a/models/grt123_lung_cancer/utils/PanImgConverters.py b/models/grt123_lung_cancer/utils/PanImgConverters.py
new file mode 100644
index 00000000..25dd618e
--- /dev/null
+++ b/models/grt123_lung_cancer/utils/PanImgConverters.py
@@ -0,0 +1,122 @@
+"""
+-------------------------------------------------------------
+MHub - PanImg Conversion Modules Dicom2Mha and WSI-Dicom2Tiff
+-------------------------------------------------------------
+
+-------------------------------------------------------------
+Author: Sil van de Leemput
+Email:  sil.vandeleemput@radboudumc.nl
+-------------------------------------------------------------
+"""
+
+
+from typing import Optional
+
+from mhubio.modules.convert.DataConverter import DataConverter
+from mhubio.core import Instance, InstanceData, DataType, FileType
+
+import os
+from pathlib import Path
+import shutil
+
+from panimg.exceptions import UnconsumedFilesException
+from panimg.image_builders.dicom import image_builder_dicom
+from panimg.image_builders.tiff import image_builder_tiff
+from panimg.image_builders.metaio_nrrd import image_builder_nrrd
+
+import SimpleITK
+
+
+class MhaPanImgConverter(DataConverter):
+    """
+    Conversion module.
+    Convert instance data from dicom or nrrd to mha.
+    """
+
+    def convert(self, instance: Instance) -> Optional[InstanceData]:
+
+        # create a converted instance
+        has_instance_dicom = instance.hasType(DataType(FileType.DICOM))
+        has_instance_nrrd = instance.hasType(DataType(FileType.NRRD))
+
+        assert has_instance_dicom or has_instance_nrrd, f"CONVERT ERROR: required datatype (dicom or nrrd) not available in instance {str(instance)}."
+
+        # select input data, dicom has priority over nrrd
+        input_data = instance.data.filter(DataType(FileType.DICOM) if has_instance_dicom else DataType(FileType.NRRD)).first()
+
+        # out data
+        mha_data = InstanceData("image.mha", DataType(FileType.MHA, input_data.type.meta))
+        mha_data.instance = instance
+
+        # paths
+        inp_data_dir = Path(input_data.abspath)
+        out_mha_file = Path(mha_data.abspath)
+
+        # sanity check
+        assert(inp_data_dir.is_dir())
+
+        # DICOM CT to MHA conversion (if the file doesn't exist yet)
+        if out_mha_file.is_file():
+            print("CONVERT ERROR: File already exists: ", out_mha_file)
+            return None
+        else:
+            # run conversion using panimg
+            input_files = {f for f in inp_data_dir.glob(["*.nrrd", "*.dcm"][has_instance_dicom]) if f.is_file()}
+            img_builder = image_builder_dicom if has_instance_dicom else image_builder_nrrd
+            try:
+                for result in img_builder(files=input_files):
+                    sitk_image = result.image  # SimpleITK image
+                    SimpleITK.WriteImage(sitk_image, str(out_mha_file))
+            except UnconsumedFilesException as e:
+                # e.errors is keyed with a Path to a file that could not be consumed,
+                # with a list of all the errors found with loading it,
+                # the user can then choose what to do with that information
+                print("CONVERT ERROR: UnconsumedFilesException during PanImg conversion: ", e.errors)
+                return None
+
+            return mha_data
+
+
+class TiffPanImgConverter(DataConverter):
+    """
+    Conversion module.
+    Convert instance data from WSI-dicom to tiff.
+    """
+
+    def convert(self, instance: Instance) -> Optional[InstanceData]:
+
+        # create a converted instance
+        assert instance.hasType(DataType(FileType.DICOM)), f"CONVERT ERROR: required datatype (dicom) not available in instance {str(instance)}."
+        dicom_data = instance.data.filter(DataType(FileType.DICOM)).first()
+
+        # out data
+        tiff_data = InstanceData("image.tiff", DataType(FileType.TIFF, dicom_data.type.meta))
+        tiff_data.instance = instance
+
+        # paths
+        inp_dicom_dir = Path(dicom_data.abspath)
+        out_tiff_file = Path(tiff_data.abspath)
+
+        # sanity check
+        assert(inp_dicom_dir.is_dir())
+
+        # WSI-DICOM to TIFF conversion (if the file doesn't exist yet)
+        if out_tiff_file.is_file():
+            print("CONVERT ERROR: File already exists: ", out_tiff_file)
+            return None
+        else:
+            # run conversion using panimg
+            dcm_input_files = {f for f in inp_dicom_dir.glob("*.dcm") if f.is_file()}
+
+            try:
+                for result in image_builder_tiff(files=dcm_input_files):
+                    tiff_image = result.file  # Path to the tiff file
+                    shutil.move(str(tiff_image), str(out_tiff_file))
+            except UnconsumedFilesException as e:
+                # e.errors is keyed with a Path to a file that could not be consumed,
+                # with a list of all the errors found with loading it,
+                # the user can then choose what to do with that information
+                print("CONVERT ERROR: UnconsumedFilesException during PanImg conversion: ", e.errors)
+                return None
+
+            return tiff_data
diff --git a/models/grt123_lung_cancer/utils/__init__.py b/models/grt123_lung_cancer/utils/__init__.py
new file mode 100644
index 00000000..d9f025f9
--- /dev/null
+++ b/models/grt123_lung_cancer/utils/__init__.py
@@ -0,0 +1 @@
+from .LungCancerClassifierRunner import *
\ No newline at end of file

From e1b4fd04978b7b3fd073b0a86838db42e3a6b058 Mon Sep 17 00:00:00 2001
From: silvandeleemput <silvandeleemput@gmail.com>
Date: Tue, 4 Jul 2023 13:09:06 +0200
Subject: [PATCH 02/19] made Dockerfiles install publicly available v2.0.0 of
 grt123

---
 .../dockerfiles/cuda11.4/Dockerfile             | 17 ++---------------
 .../dockerfiles/cuda12.0/Dockerfile             | 15 +--------------
 .../dockerfiles/nocuda/Dockerfile               | 15 +--------------
 3 files changed, 4 insertions(+), 43 deletions(-)

diff --git a/models/grt123_lung_cancer/dockerfiles/cuda11.4/Dockerfile b/models/grt123_lung_cancer/dockerfiles/cuda11.4/Dockerfile
index 036d4118..0b356621 100644
--- a/models/grt123_lung_cancer/dockerfiles/cuda11.4/Dockerfile
+++ b/models/grt123_lung_cancer/dockerfiles/cuda11.4/Dockerfile
@@ -1,5 +1,3 @@
-# syntax=docker/dockerfile:experimental
-
 # Specify the base image for the environment
 FROM mhubai/base:cuda11.4
 
@@ -16,22 +14,11 @@ RUN pip3 install panimg
 RUN pip3 install --no-cache-dir \
     torch==1.10.0+cu113 -f https://download.pytorch.org/whl/torch_stable.html
 
-
-# TODO remove later ==== Temporariy SSH fix as long as repo is private =======
-RUN apt-get update && apt-get install -y --no-install-recommends \
-  openssh-client \
-  && rm -rf /var/lib/apt/lists/*
-# Add github public key to known_hosts for SSH
-RUN mkdir -p -m 0600 ~/.ssh && ssh-keyscan github.com >> ~/.ssh/known_hosts
-# TODO remove later ===============================
-
-
-# TODO make public and remove ssh extras...
 # Install grt123 algorithm and model weights
 #   - We use a shallow git clone for reduced bandwidth usage
 #   - We remove unnecessary files for a compacter docker layer
 #   - Subsequently we remove the .git directory to procuce a compacter docker layer, but keep the latest commit hash in the HEAD file
-RUN --mount=type=ssh git clone --branch 44-port-to-python3 --depth 1 git@github.com:DIAGNijmegen/bodyct-dsb2017-grt123.git /grt123_lung_cancer && \
+RUN git clone --depth 1 --branch v2.0.0 https://github.com/DIAGNijmegen/bodyct-dsb2017-grt123.git /grt123_lung_cancer && \
     rm -rf /grt123_lung_cancer/tests && \
     rm -rf /grt123_lung_cancer/training && \
     rm -rf /grt123_lung_cancer/processor && \
@@ -40,7 +27,7 @@ RUN --mount=type=ssh git clone --branch 44-port-to-python3 --depth 1 git@github.
     rm /grt123_lung_cancer/solution-grt123-team.pdf && \
     echo "$(git log --format="%H" -n 1)" > /grt123_lung_cancer/HEAD && \
     rm -rf /grt123_lung_cancer/.git/* && \
-    mv /grt123_lung_cancer/HEAD /grt123_lung_cancer/.git
+    mv /grt123_lung_cancer/HEAD /grt123_lung_cancer/.git \
 
 # Clone MHub model (m-grt123-lung-cancer branch, fixed to commit TODO)
 #RUN git init \
diff --git a/models/grt123_lung_cancer/dockerfiles/cuda12.0/Dockerfile b/models/grt123_lung_cancer/dockerfiles/cuda12.0/Dockerfile
index 84a71111..2e9c8145 100644
--- a/models/grt123_lung_cancer/dockerfiles/cuda12.0/Dockerfile
+++ b/models/grt123_lung_cancer/dockerfiles/cuda12.0/Dockerfile
@@ -1,5 +1,3 @@
-# syntax=docker/dockerfile:experimental
-
 # Specify the base image for the environment
 FROM mhubai/base:cuda12.0
 
@@ -16,22 +14,11 @@ RUN pip3 install panimg
 RUN pip3 install --no-cache-dir \
     torch===2.0.1+cu118 -f https://download.pytorch.org/whl/torch_stable.html
 
-
-# TODO remove later ==== Temporariy SSH fix as long as repo is private =======
-RUN apt-get update && apt-get install -y --no-install-recommends \
-  openssh-client \
-  && rm -rf /var/lib/apt/lists/*
-# Add github public key to known_hosts for SSH
-RUN mkdir -p -m 0600 ~/.ssh && ssh-keyscan github.com >> ~/.ssh/known_hosts
-# TODO remove later ===============================
-
-
-# TODO make public and remove ssh extras...
 # Install grt123 algorithm and model weights
 #   - We use a shallow git clone for reduced bandwidth usage
 #   - We remove unnecessary files for a compacter docker layer
 #   - Subsequently we remove the .git directory to procuce a compacter docker layer, but keep the latest commit hash in the HEAD file
-RUN --mount=type=ssh git clone --branch 44-port-to-python3 --depth 1 git@github.com:DIAGNijmegen/bodyct-dsb2017-grt123.git /grt123_lung_cancer && \
+RUN git clone --depth 1 --branch v2.0.0 https://github.com/DIAGNijmegen/bodyct-dsb2017-grt123.git /grt123_lung_cancer && \
     rm -rf /grt123_lung_cancer/tests && \
     rm -rf /grt123_lung_cancer/training && \
     rm -rf /grt123_lung_cancer/processor && \
diff --git a/models/grt123_lung_cancer/dockerfiles/nocuda/Dockerfile b/models/grt123_lung_cancer/dockerfiles/nocuda/Dockerfile
index 7a9d968d..a63c2d61 100644
--- a/models/grt123_lung_cancer/dockerfiles/nocuda/Dockerfile
+++ b/models/grt123_lung_cancer/dockerfiles/nocuda/Dockerfile
@@ -1,5 +1,3 @@
-# syntax=docker/dockerfile:experimental
-
 # Specify the base image for the environment
 FROM mhubai/base:nocuda
 
@@ -16,22 +14,11 @@ RUN pip3 install panimg
 RUN pip3 install --no-cache-dir \
     torch===2.0.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
 
-
-# TODO remove later ==== Temporariy SSH fix as long as repo is private =======
-RUN apt-get update && apt-get install -y --no-install-recommends \
-  openssh-client \
-  && rm -rf /var/lib/apt/lists/*
-# Add github public key to known_hosts for SSH
-RUN mkdir -p -m 0600 ~/.ssh && ssh-keyscan github.com >> ~/.ssh/known_hosts
-# TODO remove later ===============================
-
-
-# TODO make public and remove ssh extras...
 # Install grt123 algorithm and model weights
 #   - We use a shallow git clone for reduced bandwidth usage
 #   - We remove unnecessary files for a compacter docker layer
 #   - Subsequently we remove the .git directory to procuce a compacter docker layer, but keep the latest commit hash in the HEAD file
-RUN --mount=type=ssh git clone --branch 44-port-to-python3 --depth 1 git@github.com:DIAGNijmegen/bodyct-dsb2017-grt123.git /grt123_lung_cancer && \
+RUN git clone --depth 1 --branch v2.0.0 https://github.com/DIAGNijmegen/bodyct-dsb2017-grt123.git /grt123_lung_cancer && \
     rm -rf /grt123_lung_cancer/tests && \
     rm -rf /grt123_lung_cancer/training && \
     rm -rf /grt123_lung_cancer/processor && \

From a09b573628842f9a6000fa925f8f98633579ecce Mon Sep 17 00:00:00 2001
From: silvandeleemput <silvandeleemput@gmail.com>
Date: Tue, 4 Jul 2023 14:37:25 +0200
Subject: [PATCH 03/19] cleanup output JSON report

---
 .../utils/LungCancerClassifierRunner.py       | 26 ++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/models/grt123_lung_cancer/utils/LungCancerClassifierRunner.py b/models/grt123_lung_cancer/utils/LungCancerClassifierRunner.py
index f4ddba6f..9e91144c 100644
--- a/models/grt123_lung_cancer/utils/LungCancerClassifierRunner.py
+++ b/models/grt123_lung_cancer/utils/LungCancerClassifierRunner.py
@@ -11,6 +11,8 @@
 import torch.cuda
 from mhubio.core import Instance, InstanceData, IO, Module
 
+from typing import Dict
+import json
 from pathlib import Path
 import numpy as np
 import SimpleITK as sitk
@@ -20,6 +22,24 @@
 import main
 
 
+def cleanup_json_report(data: Dict):
+    for key in ["trainingset1", "trainingset2"]:
+        del data["lungcad"][key]
+    for key in ["patientuid", "studyuid"]:
+        del data["imageinfo"][key]
+    data["findings"] = [
+        dict(
+            id=f["id"],
+            x=f["x"],
+            y=f["y"],
+            z=f["z"],
+            probability=f["probability"],
+            cancerprobability=f["cancerprobability"]
+        )
+        for f in data["findings"]
+    ]
+
+
 @IO.Config('n_preprocessing_workers', int, 6, the="number of preprocessing workers to use for the grt123 lung mask preprocessor")
 @IO.Config('tmp_path', str, "/app/tmp", the="the path to write intermediate grt123 files to")
 class LungCancerClassifierRunner(Module):
@@ -56,4 +76,8 @@ def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData
         # store classification results
         self.v(f"Writing classification results to {out_data.abspath}")
         assert len(results) > 0, "LungCancerClassifierRunner - Always expects at least one output report"
-        results[0].to_file(out_data.abspath)
+        results_json = results[0].to_json()
+        cleanup_json_report(results_json)
+        with open(out_data.abspath, "w") as f:
+            json.dump(results_json, f, indent=4)
+

From ec5f1463d8f584eb949e9cdfd442740670dd2017 Mon Sep 17 00:00:00 2001
From: silvandeleemput <silvandeleemput@gmail.com>
Date: Tue, 4 Jul 2023 14:47:26 +0200
Subject: [PATCH 04/19] Move git HEAD file in Dockerfiles to retain proper hash
 content

---
 models/grt123_lung_cancer/dockerfiles/cuda11.4/Dockerfile | 4 ++--
 models/grt123_lung_cancer/dockerfiles/cuda12.0/Dockerfile | 2 +-
 models/grt123_lung_cancer/dockerfiles/nocuda/Dockerfile   | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/models/grt123_lung_cancer/dockerfiles/cuda11.4/Dockerfile b/models/grt123_lung_cancer/dockerfiles/cuda11.4/Dockerfile
index 0b356621..b24c6f1d 100644
--- a/models/grt123_lung_cancer/dockerfiles/cuda11.4/Dockerfile
+++ b/models/grt123_lung_cancer/dockerfiles/cuda11.4/Dockerfile
@@ -25,9 +25,9 @@ RUN git clone --depth 1 --branch v2.0.0 https://github.com/DIAGNijmegen/bodyct-d
     rm -rf /grt123_lung_cancer/images && \
     rm /grt123_lung_cancer/README.md && \
     rm /grt123_lung_cancer/solution-grt123-team.pdf && \
-    echo "$(git log --format="%H" -n 1)" > /grt123_lung_cancer/HEAD && \
+    mv /grt123_lung_cancer/.git/HEAD /grt123_lung_cancer && \
     rm -rf /grt123_lung_cancer/.git/* && \
-    mv /grt123_lung_cancer/HEAD /grt123_lung_cancer/.git \
+    mv /grt123_lung_cancer/HEAD /grt123_lung_cancer/.git
 
 # Clone MHub model (m-grt123-lung-cancer branch, fixed to commit TODO)
 #RUN git init \
diff --git a/models/grt123_lung_cancer/dockerfiles/cuda12.0/Dockerfile b/models/grt123_lung_cancer/dockerfiles/cuda12.0/Dockerfile
index 2e9c8145..0a9724e6 100644
--- a/models/grt123_lung_cancer/dockerfiles/cuda12.0/Dockerfile
+++ b/models/grt123_lung_cancer/dockerfiles/cuda12.0/Dockerfile
@@ -25,7 +25,7 @@ RUN git clone --depth 1 --branch v2.0.0 https://github.com/DIAGNijmegen/bodyct-d
     rm -rf /grt123_lung_cancer/images && \
     rm /grt123_lung_cancer/README.md && \
     rm /grt123_lung_cancer/solution-grt123-team.pdf && \
-    echo "$(git log --format="%H" -n 1)" > /grt123_lung_cancer/HEAD && \
+    mv /grt123_lung_cancer/.git/HEAD /grt123_lung_cancer && \
     rm -rf /grt123_lung_cancer/.git/* && \
     mv /grt123_lung_cancer/HEAD /grt123_lung_cancer/.git
 
diff --git a/models/grt123_lung_cancer/dockerfiles/nocuda/Dockerfile b/models/grt123_lung_cancer/dockerfiles/nocuda/Dockerfile
index a63c2d61..862a1dc7 100644
--- a/models/grt123_lung_cancer/dockerfiles/nocuda/Dockerfile
+++ b/models/grt123_lung_cancer/dockerfiles/nocuda/Dockerfile
@@ -25,7 +25,7 @@ RUN git clone --depth 1 --branch v2.0.0 https://github.com/DIAGNijmegen/bodyct-d
     rm -rf /grt123_lung_cancer/images && \
     rm /grt123_lung_cancer/README.md && \
     rm /grt123_lung_cancer/solution-grt123-team.pdf && \
-    echo "$(git log --format="%H" -n 1)" > /grt123_lung_cancer/HEAD && \
+    mv /grt123_lung_cancer/.git/HEAD /grt123_lung_cancer && \
     rm -rf /grt123_lung_cancer/.git/* && \
     mv /grt123_lung_cancer/HEAD /grt123_lung_cancer/.git
 

From 20729a9b65d4a83b0eb7762f0a3144a2ae2403e4 Mon Sep 17 00:00:00 2001
From: silvandeleemput <silvandeleemput@gmail.com>
Date: Tue, 4 Jul 2023 15:17:09 +0200
Subject: [PATCH 05/19] change MHub/DIAG -> MHub/GC in comments

---
 models/grt123_lung_cancer/scripts/run.py               | 10 +++++-----
 .../utils/LungCancerClassifierRunner.py                | 10 +++++-----
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/models/grt123_lung_cancer/scripts/run.py b/models/grt123_lung_cancer/scripts/run.py
index f6294777..78971e1a 100644
--- a/models/grt123_lung_cancer/scripts/run.py
+++ b/models/grt123_lung_cancer/scripts/run.py
@@ -1,12 +1,12 @@
 """
--------------------------------------------------------
-MHub / DIAG - Run grt123 Lung Cancer Classifier locally
--------------------------------------------------------
+------------------------------------------------------
+MHub / GC - Run grt123 Lung Cancer Classifier locally
+-----------------------------------------------------
 
--------------------------------------------------------
+-----------------------------------------------------
 Author: Sil van de Leemput
 Email:  sil.vandeleemput@radboudumc.nl
--------------------------------------------------------
+-----------------------------------------------------
 """
 
 import sys
diff --git a/models/grt123_lung_cancer/utils/LungCancerClassifierRunner.py b/models/grt123_lung_cancer/utils/LungCancerClassifierRunner.py
index 9e91144c..80bc2485 100644
--- a/models/grt123_lung_cancer/utils/LungCancerClassifierRunner.py
+++ b/models/grt123_lung_cancer/utils/LungCancerClassifierRunner.py
@@ -1,12 +1,12 @@
 """
-----------------------------------------------------------
-Mhub / DIAG - Run Module for grt123 Lung Cancer Classifier
-----------------------------------------------------------
+--------------------------------------------------------
+Mhub / GC - Run Module for grt123 Lung Cancer Classifier
+--------------------------------------------------------
 
-----------------------------------------------------------
+--------------------------------------------------------
 Author: Sil van de Leemput
 Email:  sil.vandeleemput@radboudumc.nl
-----------------------------------------------------------
+--------------------------------------------------------
 """
 import torch.cuda
 from mhubio.core import Instance, InstanceData, IO, Module

From 2942e373e3270f6be2cecfb49fd0e7a1806b81ca Mon Sep 17 00:00:00 2001
From: silvandeleemput <silvandeleemput@gmail.com>
Date: Tue, 1 Aug 2023 13:43:51 +0200
Subject: [PATCH 06/19] Updated for new base image (single Dockerfile), updated
 config.yml, run.py, and others...

---
 .../__init__.py                               |  0
 .../gc_grt123_lung_cancer/config/config.yml   | 26 ++++++++++++
 .../dockerfiles/Dockerfile                    | 36 ++++++++++++++++
 .../scripts/run.py                            | 10 ++---
 .../utils/LungCancerClassifierRunner.py       |  7 +++-
 .../utils/PanImgConverters.py                 |  8 ++--
 .../utils/__init__.py                         |  0
 models/grt123_lung_cancer/config/config.yml   | 14 -------
 .../dockerfiles/cuda11.4/Dockerfile           | 42 -------------------
 .../dockerfiles/cuda12.0/Dockerfile           | 42 -------------------
 .../dockerfiles/nocuda/Dockerfile             | 42 -------------------
 11 files changed, 76 insertions(+), 151 deletions(-)
 rename models/{grt123_lung_cancer => gc_grt123_lung_cancer}/__init__.py (100%)
 create mode 100644 models/gc_grt123_lung_cancer/config/config.yml
 create mode 100644 models/gc_grt123_lung_cancer/dockerfiles/Dockerfile
 rename models/{grt123_lung_cancer => gc_grt123_lung_cancer}/scripts/run.py (67%)
 rename models/{grt123_lung_cancer => gc_grt123_lung_cancer}/utils/LungCancerClassifierRunner.py (93%)
 rename models/{grt123_lung_cancer => gc_grt123_lung_cancer}/utils/PanImgConverters.py (94%)
 rename models/{grt123_lung_cancer => gc_grt123_lung_cancer}/utils/__init__.py (100%)
 delete mode 100644 models/grt123_lung_cancer/config/config.yml
 delete mode 100644 models/grt123_lung_cancer/dockerfiles/cuda11.4/Dockerfile
 delete mode 100644 models/grt123_lung_cancer/dockerfiles/cuda12.0/Dockerfile
 delete mode 100644 models/grt123_lung_cancer/dockerfiles/nocuda/Dockerfile

diff --git a/models/grt123_lung_cancer/__init__.py b/models/gc_grt123_lung_cancer/__init__.py
similarity index 100%
rename from models/grt123_lung_cancer/__init__.py
rename to models/gc_grt123_lung_cancer/__init__.py
diff --git a/models/gc_grt123_lung_cancer/config/config.yml b/models/gc_grt123_lung_cancer/config/config.yml
new file mode 100644
index 00000000..c6947ac6
--- /dev/null
+++ b/models/gc_grt123_lung_cancer/config/config.yml
@@ -0,0 +1,26 @@
+general:
+  data_base_dir: /app/data
+
+execute:
+  - DicomImporter
+  - MhaConverter
+  - LungCancerClassifierRunner
+  - DataOrganizer
+
+modules:
+  DicomImporter:
+    source_dir: input_data
+    import_dir: sorted_data
+    sort_data: True
+    meta: 
+      mod: ct
+
+  LungCancerClassifierRunner:
+    tmp_path: /app/tmp
+    n_preprocessing_workers: 8
+
+  DataOrganizer:
+    target_dir: output_data
+    require_data_confirmation: true
+    targets:
+      - json-->[i:sid]/gc_grt123_lung_cancer_findings.json
diff --git a/models/gc_grt123_lung_cancer/dockerfiles/Dockerfile b/models/gc_grt123_lung_cancer/dockerfiles/Dockerfile
new file mode 100644
index 00000000..03ed18e8
--- /dev/null
+++ b/models/gc_grt123_lung_cancer/dockerfiles/Dockerfile
@@ -0,0 +1,36 @@
+# Specify the base image for the environment
+FROM mhubai/base:latest
+
+# Specify/override authors label
+LABEL authors="sil.vandeleemput@radboudumc.nl"
+
+# install required dependencies for grt123 algorithm including GPU support
+RUN pip3 install --no-cache-dir \
+    torch===2.0.1+cu118 -f https://download.pytorch.org/whl/torch_stable.html
+
+# Install grt123 algorithm and model weights
+#   - We use a shallow git clone for reduced bandwidth usage
+#   - We remove unnecessary files for a compacter docker layer
+#   - Subsequently we remove the .git directory to procuce a compacter docker layer, but keep the latest commit hash in the HEAD file
+RUN git clone --depth 1 --branch v2.0.0 https://github.com/DIAGNijmegen/bodyct-dsb2017-grt123.git /gc_grt123_lung_cancer && \
+    rm -rf /gc_grt123_lung_cancer/tests && \
+    rm -rf /gc_grt123_lung_cancer/training && \
+    rm -rf /gc_grt123_lung_cancer/processor && \
+    rm -rf /gc_grt123_lung_cancer/images && \
+    rm /gc_grt123_lung_cancer/README.md && \
+    rm /gc_grt123_lung_cancer/solution-grt123-team.pdf && \
+    mv /gc_grt123_lung_cancer/.git/HEAD /gc_grt123_lung_cancer && \
+    rm -rf /gc_grt123_lung_cancer/.git/* && \
+    mv /gc_grt123_lung_cancer/HEAD /gc_grt123_lung_cancer/.git
+
+# Clone MHub model (m-gc-grt123-lung-cancer branch, fixed to commit TODO)
+#RUN git init \
+# && git sparse-checkout set "models/grt123_lung_cancer" \
+# && git fetch https://github.com/MHubAI/models.git m-gc-grt123-lung-cancer \
+# && git merge TODO
+
+# Add lobe segmentation code base to python path
+ENV PYTHONPATH="/gc_grt123_lung_cancer:/app"
+
+# Default run script # TODO should be direct call to config.yml waiting for MhaConverter with panimg backend
+CMD ["python3", "/app/models/gc_grt123_lung_cancer/scripts/run.py"]
diff --git a/models/grt123_lung_cancer/scripts/run.py b/models/gc_grt123_lung_cancer/scripts/run.py
similarity index 67%
rename from models/grt123_lung_cancer/scripts/run.py
rename to models/gc_grt123_lung_cancer/scripts/run.py
index 78971e1a..e53880cd 100644
--- a/models/grt123_lung_cancer/scripts/run.py
+++ b/models/gc_grt123_lung_cancer/scripts/run.py
@@ -15,8 +15,8 @@
 from mhubio.core import Config, DataType, FileType
 from mhubio.modules.importer.DicomImporter import DicomImporter
 from mhubio.modules.organizer.DataOrganizer import DataOrganizer
-from models.grt123_lung_cancer.utils.LungCancerClassifierRunner import LungCancerClassifierRunner
-from models.grt123_lung_cancer.utils.PanImgConverters import MhaPanImgConverter
+from models.gc_grt123_lung_cancer.utils.LungCancerClassifierRunner import LungCancerClassifierRunner
+from models.gc_grt123_lung_cancer.utils.PanImgConverters import MhaPanImgConverter
 
 # clean-up
 import shutil
@@ -25,7 +25,7 @@
 shutil.rmtree("/app/data/output_data", ignore_errors=True)
 
 # config
-config = Config('/app/models/grt123_lung_cancer/config/config.yml')
+config = Config('/app/models/gc_grt123_lung_cancer/config/config.yml')
 
 # import (ct:dicom)
 DicomImporter(config).execute()
@@ -37,6 +37,4 @@
 LungCancerClassifierRunner(config).execute()
 
 # organize data into output folder
-organizer = DataOrganizer(config, set_file_permissions=sys.platform.startswith('linux'))
-organizer.setTarget(DataType(FileType.JSON), "/app/data/output_data/[i:sid]/grt123_lung_cancer_findings.json")
-organizer.execute()
+DataOrganizer(config, set_file_permissions=sys.platform.startswith('linux')).execute()
diff --git a/models/grt123_lung_cancer/utils/LungCancerClassifierRunner.py b/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py
similarity index 93%
rename from models/grt123_lung_cancer/utils/LungCancerClassifierRunner.py
rename to models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py
index 80bc2485..d1b6643c 100644
--- a/models/grt123_lung_cancer/utils/LungCancerClassifierRunner.py
+++ b/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py
@@ -58,7 +58,12 @@ def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData
         tmp_output_bbox_dir.mkdir(exist_ok=True, parents=True)
         tmp_output_prep_dir.mkdir(exist_ok=True, parents=True)
 
-        n_gpu = 1 if torch.cuda.is_available() else 0
+        if torch.cuda.is_available():
+            self.v("Running with a GPU")
+            n_gpu = 1
+        else:
+            self.v("Running on the CPU, might be slow...")
+            n_gpu = 0
 
         # apply grt123 algorithm
         results = main.main(
diff --git a/models/grt123_lung_cancer/utils/PanImgConverters.py b/models/gc_grt123_lung_cancer/utils/PanImgConverters.py
similarity index 94%
rename from models/grt123_lung_cancer/utils/PanImgConverters.py
rename to models/gc_grt123_lung_cancer/utils/PanImgConverters.py
index 25dd618e..824d20f4 100644
--- a/models/grt123_lung_cancer/utils/PanImgConverters.py
+++ b/models/gc_grt123_lung_cancer/utils/PanImgConverters.py
@@ -68,10 +68,10 @@ def convert(self, instance: Instance) -> Optional[InstanceData]:
                     sitk_image = result.image  # SimpleITK image
                     SimpleITK.WriteImage(sitk_image, str(out_mha_file))
             except UnconsumedFilesException as e:
-                # e.errors is keyed with a Path to a file that could not be consumed,
+                # e.file_errors is keyed with a Path to a file that could not be consumed,
                 # with a list of all the errors found with loading it,
                 # the user can then choose what to do with that information
-                print("CONVERT ERROR: UnconsumedFilesException during PanImg conversion: ", e.errors)
+                print("CONVERT ERROR: UnconsumedFilesException during PanImg conversion: ", e.file_errors)
                 return None
 
             return mha_data
@@ -113,10 +113,10 @@ def convert(self, instance: Instance) -> Optional[InstanceData]:
                     tiff_image = result.file  # Path to the tiff file
                     shutil.move(str(tiff_image), str(out_tiff_file))
             except UnconsumedFilesException as e:
-                # e.errors is keyed with a Path to a file that could not be consumed,
+                # e.file_errors is keyed with a Path to a file that could not be consumed,
                 # with a list of all the errors found with loading it,
                 # the user can then choose what to do with that information
-                print("CONVERT ERROR: UnconsumedFilesException during PanImg conversion: ", e.errors)
+                print("CONVERT ERROR: UnconsumedFilesException during PanImg conversion: ", e.file_errors)
                 return None
 
             return tiff_data
diff --git a/models/grt123_lung_cancer/utils/__init__.py b/models/gc_grt123_lung_cancer/utils/__init__.py
similarity index 100%
rename from models/grt123_lung_cancer/utils/__init__.py
rename to models/gc_grt123_lung_cancer/utils/__init__.py
diff --git a/models/grt123_lung_cancer/config/config.yml b/models/grt123_lung_cancer/config/config.yml
deleted file mode 100644
index bad14ddc..00000000
--- a/models/grt123_lung_cancer/config/config.yml
+++ /dev/null
@@ -1,14 +0,0 @@
-general:
-  data_base_dir: /app/data
-  
-modules:
-  DicomImporter:
-    source_dir: input_data
-    import_dir: sorted_data
-    sort_data: True
-    meta: 
-      mod: ct
-   
-  LungCancerClassifierRunner:
-    tmp_path: /app/tmp
-    n_preprocessing_workers: 8
diff --git a/models/grt123_lung_cancer/dockerfiles/cuda11.4/Dockerfile b/models/grt123_lung_cancer/dockerfiles/cuda11.4/Dockerfile
deleted file mode 100644
index b24c6f1d..00000000
--- a/models/grt123_lung_cancer/dockerfiles/cuda11.4/Dockerfile
+++ /dev/null
@@ -1,42 +0,0 @@
-# Specify the base image for the environment
-FROM mhubai/base:cuda11.4
-
-# Specify/override authors label
-LABEL authors="sil.vandeleemput@radboudumc.nl"
-
-# Install panimg to test conversion integration TODO should later be installed with MHub/mhubio
-RUN apt-get update && apt-get install -y --no-install-recommends \
-  python3-openslide \
-  && rm -rf /var/lib/apt/lists/*
-RUN pip3 install panimg
-
-# install required dependencies for grt123 algorithm (CPU-only)
-RUN pip3 install --no-cache-dir \
-    torch==1.10.0+cu113 -f https://download.pytorch.org/whl/torch_stable.html
-
-# Install grt123 algorithm and model weights
-#   - We use a shallow git clone for reduced bandwidth usage
-#   - We remove unnecessary files for a compacter docker layer
-#   - Subsequently we remove the .git directory to procuce a compacter docker layer, but keep the latest commit hash in the HEAD file
-RUN git clone --depth 1 --branch v2.0.0 https://github.com/DIAGNijmegen/bodyct-dsb2017-grt123.git /grt123_lung_cancer && \
-    rm -rf /grt123_lung_cancer/tests && \
-    rm -rf /grt123_lung_cancer/training && \
-    rm -rf /grt123_lung_cancer/processor && \
-    rm -rf /grt123_lung_cancer/images && \
-    rm /grt123_lung_cancer/README.md && \
-    rm /grt123_lung_cancer/solution-grt123-team.pdf && \
-    mv /grt123_lung_cancer/.git/HEAD /grt123_lung_cancer && \
-    rm -rf /grt123_lung_cancer/.git/* && \
-    mv /grt123_lung_cancer/HEAD /grt123_lung_cancer/.git
-
-# Clone MHub model (m-grt123-lung-cancer branch, fixed to commit TODO)
-#RUN git init \
-# && git sparse-checkout set "models/grt123_lung_cancer" \
-# && git fetch https://github.com/MHubAI/models.git m-grt123-lung-cancer \
-# && git merge TODO
-
-# Add lobe segmentation code base to python path
-ENV PYTHONPATH="/grt123_lung_cancer:/app"
-
-# Default run script
-CMD ["python3", "/app/models/grt123_lung_cancer/scripts/run.py"]
diff --git a/models/grt123_lung_cancer/dockerfiles/cuda12.0/Dockerfile b/models/grt123_lung_cancer/dockerfiles/cuda12.0/Dockerfile
deleted file mode 100644
index 0a9724e6..00000000
--- a/models/grt123_lung_cancer/dockerfiles/cuda12.0/Dockerfile
+++ /dev/null
@@ -1,42 +0,0 @@
-# Specify the base image for the environment
-FROM mhubai/base:cuda12.0
-
-# Specify/override authors label
-LABEL authors="sil.vandeleemput@radboudumc.nl"
-
-# Install panimg to test conversion integration TODO should later be installed with MHub/mhubio
-RUN apt-get update && apt-get install -y --no-install-recommends \
-  python3-openslide \
-  && rm -rf /var/lib/apt/lists/*
-RUN pip3 install panimg
-
-# install required dependencies for grt123 algorithm (CPU-only)
-RUN pip3 install --no-cache-dir \
-    torch===2.0.1+cu118 -f https://download.pytorch.org/whl/torch_stable.html
-
-# Install grt123 algorithm and model weights
-#   - We use a shallow git clone for reduced bandwidth usage
-#   - We remove unnecessary files for a compacter docker layer
-#   - Subsequently we remove the .git directory to procuce a compacter docker layer, but keep the latest commit hash in the HEAD file
-RUN git clone --depth 1 --branch v2.0.0 https://github.com/DIAGNijmegen/bodyct-dsb2017-grt123.git /grt123_lung_cancer && \
-    rm -rf /grt123_lung_cancer/tests && \
-    rm -rf /grt123_lung_cancer/training && \
-    rm -rf /grt123_lung_cancer/processor && \
-    rm -rf /grt123_lung_cancer/images && \
-    rm /grt123_lung_cancer/README.md && \
-    rm /grt123_lung_cancer/solution-grt123-team.pdf && \
-    mv /grt123_lung_cancer/.git/HEAD /grt123_lung_cancer && \
-    rm -rf /grt123_lung_cancer/.git/* && \
-    mv /grt123_lung_cancer/HEAD /grt123_lung_cancer/.git
-
-# Clone MHub model (m-grt123-lung-cancer branch, fixed to commit TODO)
-#RUN git init \
-# && git sparse-checkout set "models/grt123_lung_cancer" \
-# && git fetch https://github.com/MHubAI/models.git m-grt123-lung-cancer \
-# && git merge TODO
-
-# Add lobe segmentation code base to python path
-ENV PYTHONPATH="/grt123_lung_cancer:/app"
-
-# Default run script
-CMD ["python3", "/app/models/grt123_lung_cancer/scripts/run.py"]
diff --git a/models/grt123_lung_cancer/dockerfiles/nocuda/Dockerfile b/models/grt123_lung_cancer/dockerfiles/nocuda/Dockerfile
deleted file mode 100644
index 862a1dc7..00000000
--- a/models/grt123_lung_cancer/dockerfiles/nocuda/Dockerfile
+++ /dev/null
@@ -1,42 +0,0 @@
-# Specify the base image for the environment
-FROM mhubai/base:nocuda
-
-# Specify/override authors label
-LABEL authors="sil.vandeleemput@radboudumc.nl"
-
-# Install panimg to test conversion integration TODO should later be installed with MHub/mhubio
-RUN apt-get update && apt-get install -y --no-install-recommends \
-  python3-openslide \
-  && rm -rf /var/lib/apt/lists/*
-RUN pip3 install panimg
-
-# install required dependencies for grt123 algorithm (CPU-only)
-RUN pip3 install --no-cache-dir \
-    torch===2.0.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
-
-# Install grt123 algorithm and model weights
-#   - We use a shallow git clone for reduced bandwidth usage
-#   - We remove unnecessary files for a compacter docker layer
-#   - Subsequently we remove the .git directory to procuce a compacter docker layer, but keep the latest commit hash in the HEAD file
-RUN git clone --depth 1 --branch v2.0.0 https://github.com/DIAGNijmegen/bodyct-dsb2017-grt123.git /grt123_lung_cancer && \
-    rm -rf /grt123_lung_cancer/tests && \
-    rm -rf /grt123_lung_cancer/training && \
-    rm -rf /grt123_lung_cancer/processor && \
-    rm -rf /grt123_lung_cancer/images && \
-    rm /grt123_lung_cancer/README.md && \
-    rm /grt123_lung_cancer/solution-grt123-team.pdf && \
-    mv /grt123_lung_cancer/.git/HEAD /grt123_lung_cancer && \
-    rm -rf /grt123_lung_cancer/.git/* && \
-    mv /grt123_lung_cancer/HEAD /grt123_lung_cancer/.git
-
-# Clone MHub model (m-grt123-lung-cancer branch, fixed to commit TODO)
-#RUN git init \
-# && git sparse-checkout set "models/grt123_lung_cancer" \
-# && git fetch https://github.com/MHubAI/models.git m-grt123-lung-cancer \
-# && git merge TODO
-
-# Add lobe segmentation code base to python path
-ENV PYTHONPATH="/grt123_lung_cancer:/app"
-
-# Default run script
-CMD ["python3", "/app/models/grt123_lung_cancer/scripts/run.py"]

From 4f05036be8d2fca5f9ebc32e17ea7bb1eaa0875a Mon Sep 17 00:00:00 2001
From: silvandeleemput <silvandeleemput@gmail.com>
Date: Wed, 13 Sep 2023 12:32:22 +0200
Subject: [PATCH 07/19] Updated the grt123 mhub model files

* default.yml
  * renamed from config.yml
  * added version and description
  * updated pipeline with panimg mhaconverter
* Dockerfile
  * added fixed commit hash for grt123 repo git clone
  * updated entrypoint
* LungCancerClassifierRunner.py
  * removed tmp_path config option
  * added requestTempDir for tmp_path
  * added more comments
* Removed script files and custom PanImgConverter
---
 .../config/{config.yml => default.yml}        |   6 +-
 .../dockerfiles/Dockerfile                    |  10 +-
 models/gc_grt123_lung_cancer/scripts/run.py   |  40 ------
 .../utils/LungCancerClassifierRunner.py       |  10 +-
 .../utils/PanImgConverters.py                 | 122 ------------------
 5 files changed, 16 insertions(+), 172 deletions(-)
 rename models/gc_grt123_lung_cancer/config/{config.yml => default.yml} (77%)
 delete mode 100644 models/gc_grt123_lung_cancer/scripts/run.py
 delete mode 100644 models/gc_grt123_lung_cancer/utils/PanImgConverters.py

diff --git a/models/gc_grt123_lung_cancer/config/config.yml b/models/gc_grt123_lung_cancer/config/default.yml
similarity index 77%
rename from models/gc_grt123_lung_cancer/config/config.yml
rename to models/gc_grt123_lung_cancer/config/default.yml
index c6947ac6..a068585b 100644
--- a/models/gc_grt123_lung_cancer/config/config.yml
+++ b/models/gc_grt123_lung_cancer/config/default.yml
@@ -1,5 +1,7 @@
 general:
   data_base_dir: /app/data
+  version: 1.0
+  description: grt123 lung nodule and lung cancer classifier default (dicom to json)
 
 execute:
   - DicomImporter
@@ -15,8 +17,10 @@ modules:
     meta: 
       mod: ct
 
+  MhaConverter:
+    engine: panimg
+
   LungCancerClassifierRunner:
-    tmp_path: /app/tmp
     n_preprocessing_workers: 8
 
   DataOrganizer:
diff --git a/models/gc_grt123_lung_cancer/dockerfiles/Dockerfile b/models/gc_grt123_lung_cancer/dockerfiles/Dockerfile
index 03ed18e8..ea2416cc 100644
--- a/models/gc_grt123_lung_cancer/dockerfiles/Dockerfile
+++ b/models/gc_grt123_lung_cancer/dockerfiles/Dockerfile
@@ -9,10 +9,11 @@ RUN pip3 install --no-cache-dir \
     torch===2.0.1+cu118 -f https://download.pytorch.org/whl/torch_stable.html
 
 # Install grt123 algorithm and model weights
-#   - We use a shallow git clone for reduced bandwidth usage
+#   - Git clone the algorithm repository for v2.0.0 (fixed to v2.0.0 tag commit on 2023/09/13)
 #   - We remove unnecessary files for a compacter docker layer
 #   - Subsequently we remove the .git directory to procuce a compacter docker layer, but keep the latest commit hash in the HEAD file
-RUN git clone --depth 1 --branch v2.0.0 https://github.com/DIAGNijmegen/bodyct-dsb2017-grt123.git /gc_grt123_lung_cancer && \
+RUN git clone --branch v2.0.0 https://github.com/DIAGNijmegen/bodyct-dsb2017-grt123.git /gc_grt123_lung_cancer && \
+    cd /gc_grt123_lung_cancer && git reset --hard 9a4ca0415c7fc1d3023a16650bf1cdce86f8bb59 && \
     rm -rf /gc_grt123_lung_cancer/tests && \
     rm -rf /gc_grt123_lung_cancer/training && \
     rm -rf /gc_grt123_lung_cancer/processor && \
@@ -32,5 +33,6 @@ RUN git clone --depth 1 --branch v2.0.0 https://github.com/DIAGNijmegen/bodyct-d
 # Add lobe segmentation code base to python path
 ENV PYTHONPATH="/gc_grt123_lung_cancer:/app"
 
-# Default run script # TODO should be direct call to config.yml waiting for MhaConverter with panimg backend
-CMD ["python3", "/app/models/gc_grt123_lung_cancer/scripts/run.py"]
+# Default entrypoint
+ENTRYPOINT ["python3", "-m", "mhubio.run"]
+CMD ["--config", "/app/models/gc_grt123_lung_cancer/config/default.yml"]
diff --git a/models/gc_grt123_lung_cancer/scripts/run.py b/models/gc_grt123_lung_cancer/scripts/run.py
deleted file mode 100644
index e53880cd..00000000
--- a/models/gc_grt123_lung_cancer/scripts/run.py
+++ /dev/null
@@ -1,40 +0,0 @@
-"""
-------------------------------------------------------
-MHub / GC - Run grt123 Lung Cancer Classifier locally
------------------------------------------------------
-
------------------------------------------------------
-Author: Sil van de Leemput
-Email:  sil.vandeleemput@radboudumc.nl
------------------------------------------------------
-"""
-
-import sys
-sys.path.append('.')
-
-from mhubio.core import Config, DataType, FileType
-from mhubio.modules.importer.DicomImporter import DicomImporter
-from mhubio.modules.organizer.DataOrganizer import DataOrganizer
-from models.gc_grt123_lung_cancer.utils.LungCancerClassifierRunner import LungCancerClassifierRunner
-from models.gc_grt123_lung_cancer.utils.PanImgConverters import MhaPanImgConverter
-
-# clean-up
-import shutil
-shutil.rmtree("/app/data/sorted_data", ignore_errors=True)
-shutil.rmtree("/app/tmp", ignore_errors=True)
-shutil.rmtree("/app/data/output_data", ignore_errors=True)
-
-# config
-config = Config('/app/models/gc_grt123_lung_cancer/config/config.yml')
-
-# import (ct:dicom)
-DicomImporter(config).execute()
-
-# convert (ct:dicom -> ct:mha)
-MhaPanImgConverter(config).execute()
-
-# execute model (nnunet)
-LungCancerClassifierRunner(config).execute()
-
-# organize data into output folder
-DataOrganizer(config, set_file_permissions=sys.platform.startswith('linux')).execute()
diff --git a/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py b/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py
index d1b6643c..eb170d67 100644
--- a/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py
+++ b/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py
@@ -19,9 +19,10 @@
 
 import torch
 
+# Import the main module for the grt123 algorithm, which must be used for running the classification
 import main
 
-
+# This method cleans the raw results from the grt123 algorithm output and only keeps the relevant details
 def cleanup_json_report(data: Dict):
     for key in ["trainingset1", "trainingset2"]:
         del data["lungcad"][key]
@@ -41,7 +42,6 @@ def cleanup_json_report(data: Dict):
 
 
 @IO.Config('n_preprocessing_workers', int, 6, the="number of preprocessing workers to use for the grt123 lung mask preprocessor")
-@IO.Config('tmp_path', str, "/app/tmp", the="the path to write intermediate grt123 files to")
 class LungCancerClassifierRunner(Module):
 
     n_preprocessing_workers: int
@@ -51,13 +51,14 @@ class LungCancerClassifierRunner(Module):
     @IO.Input('in_data', 'mha:mod=ct', the='input ct scan')
     @IO.Output('out_data', 'grt123_lung_cancer_findings.json', 'json:model=grt123LungCancerClassification', 'in_data', the='predicted nodules and lung cancer findings of the lung lobe')
     def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData) -> None:
-
-        tmp_path = Path(self.tmp_path)
+        # create temporary directories for the preprocessed data and the cropped bounding boxes
+        tmp_path = Path(self.config.data.requestTempDir('grt123'))
         tmp_output_bbox_dir = tmp_path / "bbox"
         tmp_output_prep_dir = tmp_path / "prep"
         tmp_output_bbox_dir.mkdir(exist_ok=True, parents=True)
         tmp_output_prep_dir.mkdir(exist_ok=True, parents=True)
 
+        # determine the number of GPUs we can use
         if torch.cuda.is_available():
             self.v("Running with a GPU")
             n_gpu = 1
@@ -85,4 +86,3 @@ def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData
         cleanup_json_report(results_json)
         with open(out_data.abspath, "w") as f:
             json.dump(results_json, f, indent=4)
-
diff --git a/models/gc_grt123_lung_cancer/utils/PanImgConverters.py b/models/gc_grt123_lung_cancer/utils/PanImgConverters.py
deleted file mode 100644
index 824d20f4..00000000
--- a/models/gc_grt123_lung_cancer/utils/PanImgConverters.py
+++ /dev/null
@@ -1,122 +0,0 @@
-"""
--------------------------------------------------------------
-MHub - PanImg Conversion Modules Dicom2Mha and WSI-Dicom2Tiff
--------------------------------------------------------------
-
--------------------------------------------------------------
-Author: Sil van de Leemput
-Email:  sil.vandeleemput@radboudumc.nl
--------------------------------------------------------------
-"""
-
-
-from typing import Optional
-
-from mhubio.modules.convert.DataConverter import DataConverter
-from mhubio.core import Instance, InstanceData, DataType, FileType
-
-import os
-from pathlib import Path
-import shutil
-
-from panimg.exceptions import UnconsumedFilesException
-from panimg.image_builders.dicom import image_builder_dicom
-from panimg.image_builders.tiff import image_builder_tiff
-from panimg.image_builders.metaio_nrrd import image_builder_nrrd
-
-import SimpleITK
-
-
-class MhaPanImgConverter(DataConverter):
-    """
-    Conversion module.
-    Convert instance data from dicom or nrrd to mha.
-    """
-
-    def convert(self, instance: Instance) -> Optional[InstanceData]:
-
-        # create a converted instance
-        has_instance_dicom = instance.hasType(DataType(FileType.DICOM))
-        has_instance_nrrd = instance.hasType(DataType(FileType.NRRD))
-
-        assert has_instance_dicom or has_instance_nrrd, f"CONVERT ERROR: required datatype (dicom or nrrd) not available in instance {str(instance)}."
-
-        # select input data, dicom has priority over nrrd
-        input_data = instance.data.filter(DataType(FileType.DICOM) if has_instance_dicom else DataType(FileType.NRRD)).first()
-
-        # out data
-        mha_data = InstanceData("image.mha", DataType(FileType.MHA, input_data.type.meta))
-        mha_data.instance = instance
-
-        # paths
-        inp_data_dir = Path(input_data.abspath)
-        out_mha_file = Path(mha_data.abspath)
-
-        # sanity check
-        assert(inp_data_dir.is_dir())
-
-        # DICOM CT to MHA conversion (if the file doesn't exist yet)
-        if out_mha_file.is_file():
-            print("CONVERT ERROR: File already exists: ", out_mha_file)
-            return None
-        else:
-            # run conversion using panimg
-            input_files = {f for f in inp_data_dir.glob(["*.nrrd", "*.dcm"][has_instance_dicom]) if f.is_file()}
-            img_builder = image_builder_dicom if has_instance_dicom else image_builder_nrrd
-            try:
-                for result in img_builder(files=input_files):
-                    sitk_image = result.image  # SimpleITK image
-                    SimpleITK.WriteImage(sitk_image, str(out_mha_file))
-            except UnconsumedFilesException as e:
-                # e.file_errors is keyed with a Path to a file that could not be consumed,
-                # with a list of all the errors found with loading it,
-                # the user can then choose what to do with that information
-                print("CONVERT ERROR: UnconsumedFilesException during PanImg conversion: ", e.file_errors)
-                return None
-
-            return mha_data
-
-
-class TiffPanImgConverter(DataConverter):
-    """
-    Conversion module.
-    Convert instance data from WSI-dicom to tiff.
-    """
-
-    def convert(self, instance: Instance) -> Optional[InstanceData]:
-
-        # create a converted instance
-        assert instance.hasType(DataType(FileType.DICOM)), f"CONVERT ERROR: required datatype (dicom) not available in instance {str(instance)}."
-        dicom_data = instance.data.filter(DataType(FileType.DICOM)).first()
-
-        # out data
-        tiff_data = InstanceData("image.tiff", DataType(FileType.TIFF, dicom_data.type.meta))
-        tiff_data.instance = instance
-
-        # paths
-        inp_dicom_dir = Path(dicom_data.abspath)
-        out_tiff_file = Path(tiff_data.abspath)
-
-        # sanity check
-        assert(inp_dicom_dir.is_dir())
-
-        # WSI-DICOM to TIFF conversion (if the file doesn't exist yet)
-        if out_tiff_file.is_file():
-            print("CONVERT ERROR: File already exists: ", out_tiff_file)
-            return None
-        else:
-            # run conversion using panimg
-            dcm_input_files = {f for f in inp_dicom_dir.glob("*.dcm") if f.is_file()}
-
-            try:
-                for result in image_builder_tiff(files=dcm_input_files):
-                    tiff_image = result.file  # Path to the tiff file
-                    shutil.move(str(tiff_image), str(out_tiff_file))
-            except UnconsumedFilesException as e:
-                # e.file_errors is keyed with a Path to a file that could not be consumed,
-                # with a list of all the errors found with loading it,
-                # the user can then choose what to do with that information
-                print("CONVERT ERROR: UnconsumedFilesException during PanImg conversion: ", e.file_errors)
-                return None
-
-            return tiff_data

From 364831a2e525bba94d1157399d5702dfc1304d02 Mon Sep 17 00:00:00 2001
From: Sil van de Leemput <silvandeleemput@users.noreply.github.com>
Date: Tue, 3 Oct 2023 12:55:18 +0200
Subject: [PATCH 08/19] Create meta.json

---
 models/gc_grt123_lung_cancer/meta.json | 121 +++++++++++++++++++++++++
 1 file changed, 121 insertions(+)
 create mode 100644 models/gc_grt123_lung_cancer/meta.json

diff --git a/models/gc_grt123_lung_cancer/meta.json b/models/gc_grt123_lung_cancer/meta.json
new file mode 100644
index 00000000..000cd2ce
--- /dev/null
+++ b/models/gc_grt123_lung_cancer/meta.json
@@ -0,0 +1,121 @@
+{
+  "id": "2e67a3cc-4680-4058-bf4e-f965cf50f06f",
+  "name": "lung_cancer_risk_estimation",
+  "title": "Lung cancer risk estimation on thorax CT scans",
+  "summary": {
+    "description": "This algorithm analyzes non-contrast CT scans of the thorax and predicts the lung cancer risk. ",
+    "inputs": [
+      {
+        "label": "CT",
+        "description": "Chest CT",
+        "format": "DICOM",
+        "modality": "CT",
+        "bodypartexamined": "Chest",
+        "slicethickness": "2.5mm",
+        "non-contrast": true,
+        "contrast": false
+      }
+    ],
+    "outputs": [
+      {
+        "type": "Prediction",
+        "valueType": "number",
+        "label": "Cancer probability score",
+        "description": "Probability that the scan contains cancer nodules",
+        "classes": []
+      }
+    ],
+    "model": {
+      "architecture": "3D convolutional neural network",
+      "training": "supervised",
+      "cmpapproach": "3D"
+    },
+    "data": {
+      "training": {
+        "vol_samples": 2285
+      },
+      "evaluation": {
+        "vol_samples": 506
+      },
+      "public": true,
+      "external": false
+    }
+  },
+  "details": {
+    "name": " bodyct-dsb2017-grt123",
+    "version": "",
+    "devteam": "DIAGNijmegen (Diagnostic Image Analysis Group, Radboud UMC, The Netherlands)",
+    "type": "3D Deep Leaky Noisy-or Network",
+    "date": {
+      "weights": "",
+      "code": "2023-07-04",
+      "pub": "2017-11-22"
+    },
+    "cite": "Liao F, Liang M, Li Z, Hu X, Song S. Evaluate the Malignancy of Pulmonary Nodules Using the 3-D Deep Leaky Noisy-OR Network. IEEE Trans Neural Netw Learning Syst. 2019;30(11):3484-3495.",
+    "license": {
+      "code": "MIT",
+      "weights": ""
+    },
+    "publications": [
+      {
+        "title": "JOURNAL OF LATEX CLASS FILES, VOL. 14, NO. 8, AUGUST 2015 1 Evaluate the Malignancy of Pulmonary Nodules Using the 3D Deep Leaky Noisy-or Network",
+        "uri": "https://arxiv.org/pdf/1711.08324.pdf"
+      }
+    ],
+    "github": "https://github.com/DIAGNijmegen/bodyct-dsb2017-grt123",
+    "zenodo": "",
+    "colab": "",
+    "slicer": false
+  },
+  "info": {
+    "use": {
+      "title": "Intended use",
+      "text": "This algorithm analyzes non-contrast CT scans of the thorax and predicts the lung cancer risk. ",
+      "references": [],
+      "tables": []
+    },
+    "analyses": {
+      "title": "",
+      "text": "",
+      "references": [],
+      "tables": []
+    },
+    "evaluation": {
+      "title": "",
+      "text": "",
+      "references": [],
+      "tables": []
+    },
+    "training": {
+      "title": "Training data",
+      "text": "The Luna dataset includes the images from the LIDC/IDRI dataset in a different format, together with additional annotations. The LIDC/IDRI dataset is available at https://wiki.cancerimagingarchive.net/display/Public/LIDC-IDRI under a Creative Commons Attribution 3.0 Unported License.",
+      "references": [
+        {
+          "label": "LUng Nodule Analysis 2016 dataset part 1",
+          "uri": "https://zenodo.org/record/3723295"
+        },
+        {
+          "label": "LUng Nodule Analysis 2016 dataset part 2",
+          "uri": "https://zenodo.org/record/4121926"
+        },
+        {
+          "label": "Data Science Bowl 2017 dataset",
+          "uri": "https://www.kaggle.com/competitions/data-science-bowl-2017/data"
+        }
+      ],
+      "tables": []
+    },
+    "ethics": {
+      "title": "",
+      "text": "",
+      "references": [],
+      "tables": []
+    },
+    "limitations": {
+      "title": "",
+      "text": "",
+      "references": [],
+      "tables": []
+    }
+  }
+}

From ecdd16601a6f21ecfb6999daa973c0e91e7db65f Mon Sep 17 00:00:00 2001
From: silvandeleemput <silvandeleemput@gmail.com>
Date: Fri, 24 Nov 2023 22:37:34 +0100
Subject: [PATCH 09/19] added mhub model definition and removed first comment
 line Dockerfile

---
 models/gc_grt123_lung_cancer/dockerfiles/Dockerfile | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/models/gc_grt123_lung_cancer/dockerfiles/Dockerfile b/models/gc_grt123_lung_cancer/dockerfiles/Dockerfile
index ea2416cc..d70f0ab3 100644
--- a/models/gc_grt123_lung_cancer/dockerfiles/Dockerfile
+++ b/models/gc_grt123_lung_cancer/dockerfiles/Dockerfile
@@ -1,4 +1,3 @@
-# Specify the base image for the environment
 FROM mhubai/base:latest
 
 # Specify/override authors label
@@ -24,11 +23,9 @@ RUN git clone --branch v2.0.0 https://github.com/DIAGNijmegen/bodyct-dsb2017-grt
     rm -rf /gc_grt123_lung_cancer/.git/* && \
     mv /gc_grt123_lung_cancer/HEAD /gc_grt123_lung_cancer/.git
 
-# Clone MHub model (m-gc-grt123-lung-cancer branch, fixed to commit TODO)
-#RUN git init \
-# && git sparse-checkout set "models/grt123_lung_cancer" \
-# && git fetch https://github.com/MHubAI/models.git m-gc-grt123-lung-cancer \
-# && git merge TODO
+# Import the MHub model definition
+ARG MHUB_MODELS_REPO
+RUN buildutils/import_mhub_model.sh gc_grt123_lung_cancer ${MHUB_MODELS_REPO}
 
 # Add lobe segmentation code base to python path
 ENV PYTHONPATH="/gc_grt123_lung_cancer:/app"

From 5ab7f20c6ba894fcd8a5e30cdd47c70c54edc102 Mon Sep 17 00:00:00 2001
From: silvandeleemput <silvandeleemput@gmail.com>
Date: Fri, 24 Nov 2023 22:50:19 +0100
Subject: [PATCH 10/19] cleanup runner imports, add new style logging

---
 .../utils/LungCancerClassifierRunner.py               | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py b/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py
index eb170d67..1ae1e129 100644
--- a/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py
+++ b/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py
@@ -14,8 +14,6 @@
 from typing import Dict
 import json
 from pathlib import Path
-import numpy as np
-import SimpleITK as sitk
 
 import torch
 
@@ -45,7 +43,6 @@ def cleanup_json_report(data: Dict):
 class LungCancerClassifierRunner(Module):
 
     n_preprocessing_workers: int
-    tmp_path: str
 
     @IO.Instance()
     @IO.Input('in_data', 'mha:mod=ct', the='input ct scan')
@@ -60,10 +57,10 @@ def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData
 
         # determine the number of GPUs we can use
         if torch.cuda.is_available():
-            self.v("Running with a GPU")
+            self.log("Running with a GPU", "NOTICE")
             n_gpu = 1
         else:
-            self.v("Running on the CPU, might be slow...")
+            self.log("Running on the CPU, might be slow...", "NOTICE")
             n_gpu = 0
 
         # apply grt123 algorithm
@@ -79,8 +76,8 @@ def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData
             data_filter=r".*.mha"
         )
 
-        # store classification results
-        self.v(f"Writing classification results to {out_data.abspath}")
+        # store classification results (original json file)
+        self.log(f"Writing classification results to {out_data.abspath}", "NOTICE")
         assert len(results) > 0, "LungCancerClassifierRunner - Always expects at least one output report"
         results_json = results[0].to_json()
         cleanup_json_report(results_json)

From 2d4365ad91a33be3bf86f666a1a4768d8e9dbc81 Mon Sep 17 00:00:00 2001
From: silvandeleemput <silvandeleemput@gmail.com>
Date: Mon, 27 Nov 2023 10:17:52 +0100
Subject: [PATCH 11/19] added value output for overall score and added dynamic
 value output for all findings

---
 .../utils/LungCancerClassifierRunner.py       | 50 ++++++++++++++++---
 1 file changed, 42 insertions(+), 8 deletions(-)

diff --git a/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py b/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py
index 1ae1e129..56181b72 100644
--- a/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py
+++ b/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py
@@ -9,7 +9,7 @@
 --------------------------------------------------------
 """
 import torch.cuda
-from mhubio.core import Instance, InstanceData, IO, Module
+from mhubio.core import Instance, InstanceData, IO, Module, ValueOutput, Meta
 
 from typing import Dict
 import json
@@ -20,6 +20,25 @@
 # Import the main module for the grt123 algorithm, which must be used for running the classification
 import main
 
+
+@ValueOutput.Name('lncancerprob')
+@ValueOutput.Meta(Meta(min=0.0, max=1.0, type="probability"))
+@ValueOutput.Label('Lung Nodule cancer probability score.')
+@ValueOutput.Type(float)
+@ValueOutput.Description('The predicted cancer probability score for a single lung nodule detected by the algorithm')
+class LNCancerProb(ValueOutput):
+   pass
+
+
+@ValueOutput.Name('clcancerprob')
+@ValueOutput.Meta(Meta(min=0.0, max=1.0, type="probability"))
+@ValueOutput.Label('Case level cancer probability score.')
+@ValueOutput.Type(float)
+@ValueOutput.Description('The predicted cancer probability score for the whole case')
+class CLCancerProb(ValueOutput):
+    pass
+
+
 # This method cleans the raw results from the grt123 algorithm output and only keeps the relevant details
 def cleanup_json_report(data: Dict):
     for key in ["trainingset1", "trainingset2"]:
@@ -46,8 +65,10 @@ class LungCancerClassifierRunner(Module):
 
     @IO.Instance()
     @IO.Input('in_data', 'mha:mod=ct', the='input ct scan')
-    @IO.Output('out_data', 'grt123_lung_cancer_findings.json', 'json:model=grt123LungCancerClassification', 'in_data', the='predicted nodules and lung cancer findings of the lung lobe')
-    def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData) -> None:
+    @IO.Output('out_data', 'grt123_lung_cancer_findings.json', 'json:model=grt123LungCancerClassification', data='in_data', the='predicted nodules and lung cancer findings of the lung lobe')
+    @IO.OutputData('clcancerprob', CLCancerProb, the='Case level probability score')
+    @IO.OutputDatas('lncancerprobs', LNCancerProb, the='Individual lung nodule probability scores')
+    def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData, clcancerprob: CLCancerProb, lncancerprobs: LNCancerProb) -> None:
         # create temporary directories for the preprocessed data and the cropped bounding boxes
         tmp_path = Path(self.config.data.requestTempDir('grt123'))
         tmp_output_bbox_dir = tmp_path / "bbox"
@@ -76,10 +97,23 @@ def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData
             data_filter=r".*.mha"
         )
 
-        # store classification results (original json file)
-        self.log(f"Writing classification results to {out_data.abspath}", "NOTICE")
+        # retrieve classification results
         assert len(results) > 0, "LungCancerClassifierRunner - Always expects at least one output report"
-        results_json = results[0].to_json()
-        cleanup_json_report(results_json)
+        results_dict = results[0].to_json()
+        cleanup_json_report(results_dict)
+
+        # export to JSON (original json file)
+        self.log(f"Writing classification results to {out_data.abspath}", "NOTICE")
         with open(out_data.abspath, "w") as f:
-            json.dump(results_json, f, indent=4)
+            json.dump(results_dict, f, indent=4)
+
+        # set output value for case level cancer probability
+        clcancerprob.value = results_dict["cancerinfo"]["casecancerprobability"]
+
+        # set output values for nodule level cancer probabilities
+        for finding in results_dict["findings"]:
+            nodule_cancer_prob = LNCancerProb()
+            nodule_cancer_prob.meta = Meta(id=finding['id'], x=finding['x'], y=finding['y'], z=finding['z'], )
+            nodule_cancer_prob.description += f" (for nodule {finding['id']} at location ({finding['x']}, {finding['y']}, {finding['z']}))"
+            nodule_cancer_prob.value = finding["cancerprobability"]
+            lncancerprobs.add(nodule_cancer_prob)

From 49cdd7a0359c553039b1eac1cbcdb4488f66ca55 Mon Sep 17 00:00:00 2001
From: Miriam Groeneveld <miriam.groeneveld.diag@gmail.com>
Date: Wed, 3 Jan 2024 10:51:38 +0100
Subject: [PATCH 12/19] PR comments on mata.json

---
 models/gc_grt123_lung_cancer/meta.json | 32 +++++++++++++++++++-------
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/models/gc_grt123_lung_cancer/meta.json b/models/gc_grt123_lung_cancer/meta.json
index 000cd2ce..0abc7b87 100644
--- a/models/gc_grt123_lung_cancer/meta.json
+++ b/models/gc_grt123_lung_cancer/meta.json
@@ -3,7 +3,7 @@
   "name": "lung_cancer_risk_estimation",
   "title": "Lung cancer risk estimation on thorax CT scans",
   "summary": {
-    "description": "This algorithm analyzes non-contrast CT scans of the thorax and predicts the lung cancer risk. ",
+    "description": "This algorithm analyzes non-contrast CT scans of the thorax and predicts the lung cancer risk. The model consists of two modules. The first one is a 3D region proposal network for nodule detection, which outputs all suspicious nodules for a subject. The second one selects the top five nodules based on the detection confidence, evaluates their cancer probabilities and combines them with a leaky noisy-or gate to obtain the probability of lung cancer for the subject",
     "inputs": [
       {
         "label": "CT",
@@ -71,19 +71,35 @@
     "use": {
       "title": "Intended use",
       "text": "This algorithm analyzes non-contrast CT scans of the thorax and predicts the lung cancer risk. ",
-      "references": [],
+      "references": [
+        {
+          "label": "Lung cancer risk estimation algorithm on grand-challenge",
+          "url": "https://grand-challenge.org/algorithms/dsb2017-grt123/"
+        }
+      ],
       "tables": []
     },
     "analyses": {
-      "title": "",
-      "text": "",
+      "title": "Evaluation",
+      "text": "The nodule detection was evaluated on the validation set of DSB. It contains data from 198 cases and there are 71 (7 nodules smaller than 6 mm are ruled out) nodules in total. The Free Response Operating Characteristic (FROC) is used to evaluate the performance and can be viewed in the publication. The Case classification is evaluated using the Area Under the Curve (AUC) metric.",
       "references": [],
-      "tables": []
+      "tables": [{
+        "label": "AUC for the Case classification",
+        "entries": {
+          "AUC training set": 0.90,
+          "AUC test set": 0.87
+        }
+      }]
     },
     "evaluation": {
-      "title": "",
-      "text": "",
-      "references": [],
+      "title": "Evaluation data",
+      "text": "The model was evaluated against a private dataset of 300 low-dose CT images. 150 patient scans were from the competition set and 150 were from an independent dataset. Both test datasets contained 50 cancer-positive scans and 100 cancer-negative scans.",
+      "references": [
+        {
+          "label": "Evaluation paper",
+          "uri": "https://pubmed.ncbi.nlm.nih.gov/34870218/"
+        }
+      ],
       "tables": []
     },
     "training": {

From 0b4ac0adb63241a45bc46a58eb0dd795bcaefd77 Mon Sep 17 00:00:00 2001
From: Miriam Groeneveld <miriam.groeneveld.diag@gmail.com>
Date: Wed, 3 Jan 2024 13:34:20 +0100
Subject: [PATCH 13/19] PR comments on mata.json

---
 models/gc_grt123_lung_cancer/meta.json | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/models/gc_grt123_lung_cancer/meta.json b/models/gc_grt123_lung_cancer/meta.json
index 0abc7b87..669de299 100644
--- a/models/gc_grt123_lung_cancer/meta.json
+++ b/models/gc_grt123_lung_cancer/meta.json
@@ -60,6 +60,10 @@
       {
         "title": "JOURNAL OF LATEX CLASS FILES, VOL. 14, NO. 8, AUGUST 2015 1 Evaluate the Malignancy of Pulmonary Nodules Using the 3D Deep Leaky Noisy-or Network",
         "uri": "https://arxiv.org/pdf/1711.08324.pdf"
+      },
+      {
+        "title": "Deep Learning for Lung Cancer Detection on Screening CT Scans: Results of a Large-Scale Public Competition and an Observer Study with 11 Radiologists",
+        "uri": "https://pubmed.ncbi.nlm.nih.gov/34870218/"
       }
     ],
     "github": "https://github.com/DIAGNijmegen/bodyct-dsb2017-grt123",
@@ -70,11 +74,11 @@
   "info": {
     "use": {
       "title": "Intended use",
-      "text": "This algorithm analyzes non-contrast CT scans of the thorax and predicts the lung cancer risk. ",
+      "text": "This algorithm analyzes non-contrast CT scans of the thorax and predicts the lung cancer risk.",
       "references": [
         {
           "label": "Lung cancer risk estimation algorithm on grand-challenge",
-          "url": "https://grand-challenge.org/algorithms/dsb2017-grt123/"
+          "uri": "https://grand-challenge.org/algorithms/dsb2017-grt123/"
         }
       ],
       "tables": []

From c0d9076f8f5d7bb9fae1ec09fa6d3d23604fab51 Mon Sep 17 00:00:00 2001
From: Miriam Groeneveld <miriam.groeneveld.diag@gmail.com>
Date: Wed, 3 Jan 2024 13:40:28 +0100
Subject: [PATCH 14/19] PR comments on mata.json

---
 models/gc_grt123_lung_cancer/meta.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/models/gc_grt123_lung_cancer/meta.json b/models/gc_grt123_lung_cancer/meta.json
index 669de299..413557fc 100644
--- a/models/gc_grt123_lung_cancer/meta.json
+++ b/models/gc_grt123_lung_cancer/meta.json
@@ -90,8 +90,8 @@
       "tables": [{
         "label": "AUC for the Case classification",
         "entries": {
-          "AUC training set": 0.90,
-          "AUC test set": 0.87
+          "AUC training set": "0.90",
+          "AUC test set": "0.87"
         }
       }]
     },

From f2532293eabd977455423475a9a91f73db21b215 Mon Sep 17 00:00:00 2001
From: Miriam Groeneveld <miriam.groeneveld.diag@gmail.com>
Date: Wed, 3 Jan 2024 14:30:10 +0100
Subject: [PATCH 15/19] DSB and evaluation dataset

---
 models/gc_grt123_lung_cancer/meta.json | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/models/gc_grt123_lung_cancer/meta.json b/models/gc_grt123_lung_cancer/meta.json
index 413557fc..b523b9e7 100644
--- a/models/gc_grt123_lung_cancer/meta.json
+++ b/models/gc_grt123_lung_cancer/meta.json
@@ -85,8 +85,11 @@
     },
     "analyses": {
       "title": "Evaluation",
-      "text": "The nodule detection was evaluated on the validation set of DSB. It contains data from 198 cases and there are 71 (7 nodules smaller than 6 mm are ruled out) nodules in total. The Free Response Operating Characteristic (FROC) is used to evaluate the performance and can be viewed in the publication. The Case classification is evaluated using the Area Under the Curve (AUC) metric.",
-      "references": [],
+      "text": "The nodule detection was evaluated on the validation set of Data Science Bowl 2017 challenge. It contains data from 198 cases and there are 71 (7 nodules smaller than 6 mm are ruled out) nodules in total. The Free Response Operating Characteristic (FROC) is used to evaluate the performance and can be viewed in the publication. The Case classification is evaluated using the Area Under the Curve (AUC) metric.",
+      "references": [{
+        "label": "Data Science Bowl 2017 challenge",
+        "uri": "https://www.kaggle.com/c/data-science-bowl-2017"
+      }],
       "tables": [{
         "label": "AUC for the Case classification",
         "entries": {
@@ -97,7 +100,7 @@
     },
     "evaluation": {
       "title": "Evaluation data",
-      "text": "The model was evaluated against a private dataset of 300 low-dose CT images. 150 patient scans were from the competition set and 150 were from an independent dataset. Both test datasets contained 50 cancer-positive scans and 100 cancer-negative scans.",
+      "text": "The model was evaluated against a private dataset of 300 low-dose CT images, containing 100 cancer-positive scans and 200 cancer-negative scans.",
       "references": [
         {
           "label": "Evaluation paper",

From fb5f2c939d1c1be317936c61ede13f578874d860 Mon Sep 17 00:00:00 2001
From: silvandeleemput <silvandeleemput@gmail.com>
Date: Thu, 11 Jan 2024 15:48:22 +0100
Subject: [PATCH 16/19] meta.json - update links and data details #27

---
 models/gc_grt123_lung_cancer/meta.json | 75 +++++++++++++++++---------
 1 file changed, 51 insertions(+), 24 deletions(-)

diff --git a/models/gc_grt123_lung_cancer/meta.json b/models/gc_grt123_lung_cancer/meta.json
index b523b9e7..07024e69 100644
--- a/models/gc_grt123_lung_cancer/meta.json
+++ b/models/gc_grt123_lung_cancer/meta.json
@@ -3,7 +3,7 @@
   "name": "lung_cancer_risk_estimation",
   "title": "Lung cancer risk estimation on thorax CT scans",
   "summary": {
-    "description": "This algorithm analyzes non-contrast CT scans of the thorax and predicts the lung cancer risk. The model consists of two modules. The first one is a 3D region proposal network for nodule detection, which outputs all suspicious nodules for a subject. The second one selects the top five nodules based on the detection confidence, evaluates their cancer probabilities and combines them with a leaky noisy-or gate to obtain the probability of lung cancer for the subject",
+    "description": "This algorithm analyzes non-contrast CT scans of the thorax and predicts the lung cancer risk. The model consists of two modules. The first one is a 3D region proposal network for nodule detection, which outputs all suspicious nodules for a subject. The second one selects the top five nodules based on the detection confidence, evaluates their cancer probabilities and combines them with a leaky noisy-or gate to obtain the probability of lung cancer for the subject. This model was the winner of the Data Science Bowl 2017 competition hosted on Kaggle.",
     "inputs": [
       {
         "label": "CT",
@@ -32,7 +32,7 @@
     },
     "data": {
       "training": {
-        "vol_samples": 2285
+        "vol_samples": 2483
       },
       "evaluation": {
         "vol_samples": 506
@@ -51,15 +51,15 @@
       "code": "2023-07-04",
       "pub": "2017-11-22"
     },
-    "cite": "Liao F, Liang M, Li Z, Hu X, Song S. Evaluate the Malignancy of Pulmonary Nodules Using the 3-D Deep Leaky Noisy-OR Network. IEEE Trans Neural Netw Learning Syst. 2019;30(11):3484-3495.",
+    "cite": "F. Liao, M. Liang, Z. Li, X. Hu and S. Song, 'Evaluate the Malignancy of Pulmonary Nodules Using the 3D Deep Leaky Noisy-or Network', in IEEE Transactions on Neural Networks and Learning Systems, vol. 30, no. 11, pp. 3484-3495, Nov. 2019, doi: 10.1109/TNNLS.2019.2892409.",
     "license": {
       "code": "MIT",
-      "weights": ""
+      "weights": "MIT"
     },
     "publications": [
       {
-        "title": "JOURNAL OF LATEX CLASS FILES, VOL. 14, NO. 8, AUGUST 2015 1 Evaluate the Malignancy of Pulmonary Nodules Using the 3D Deep Leaky Noisy-or Network",
-        "uri": "https://arxiv.org/pdf/1711.08324.pdf"
+        "title": "Evaluate the Malignancy of Pulmonary Nodules Using the 3D Deep Leaky Noisy-or Network",
+        "uri": "https://ieeexplore.ieee.org/abstract/document/8642524"
       },
       {
         "title": "Deep Learning for Lung Cancer Detection on Screening CT Scans: Results of a Large-Scale Public Competition and an Observer Study with 11 Radiologists",
@@ -74,44 +74,67 @@
   "info": {
     "use": {
       "title": "Intended use",
-      "text": "This algorithm analyzes non-contrast CT scans of the thorax and predicts the lung cancer risk.",
+      "text": "This algorithm analyzes non-contrast CT scans of the thorax, first it segments the lungs, subsequently it detects lung nodules within the lungs, and finally it predicts the lung cancer risk for the individual nodules and the scan as a whole. The algorithm is also hosted on Grand Challenge [1] and was the winner of the Data Science Bowl 2017 challenge on Kaggle [2]. ",
       "references": [
         {
           "label": "Lung cancer risk estimation algorithm on grand-challenge",
           "uri": "https://grand-challenge.org/algorithms/dsb2017-grt123/"
+        },
+        {
+          "label": "Data Science Bowl 2017 challenge",
+          "uri": "https://www.kaggle.com/c/data-science-bowl-2017"
         }
       ],
       "tables": []
     },
     "analyses": {
       "title": "Evaluation",
-      "text": "The nodule detection was evaluated on the validation set of Data Science Bowl 2017 challenge. It contains data from 198 cases and there are 71 (7 nodules smaller than 6 mm are ruled out) nodules in total. The Free Response Operating Characteristic (FROC) is used to evaluate the performance and can be viewed in the publication. The Case classification is evaluated using the Area Under the Curve (AUC) metric.",
-      "references": [{
-        "label": "Data Science Bowl 2017 challenge",
-        "uri": "https://www.kaggle.com/c/data-science-bowl-2017"
-      }],
-      "tables": [{
-        "label": "AUC for the Case classification",
-        "entries": {
-          "AUC training set": "0.90",
-          "AUC test set": "0.87"
+      "text": "The evaluation of the model was done on the Data Science Bowl 2017 (DSB) dataset hosted on Kaggle [1] (this is no longer publicly available). The nodule detection was evaluated on the validation of the DSB dataset, which contained data from 198 cases and there were 71 (7 nodules smaller than 6 mm are ruled out) nodules in total. The Free Response Operating Characteristic (FROC) is used to evaluate the performance of the nodule detection. The case cancer classification was evaluated using the Area Under the Curve (AUC) metric on the training set and the testing set of respectively 1397 and 506 patient cases. The AUC and FROC graphs can be viewed in the publication [2]. For the final evaluation on the Data Science Bowl 2017 challenge, the model's performance was evaluated using the logistic loss on a private external dataset of 300 low-dose CT images [3], containing 100 cancer-positive scans and 200 cancer-negative scans. See tables for a summary of the results.",
+      "references": [
+        {
+          "label": "Data Science Bowl 2017 challenge",
+          "uri": "https://www.kaggle.com/c/data-science-bowl-2017"
+        },
+        {
+          "label": "Evaluate the Malignancy of Pulmonary Nodules Using the 3D Deep Leaky Noisy-or Network",
+          "uri": "https://ieeexplore.ieee.org/abstract/document/8642524"
+        },
+        {
+          "label": "Evaluation paper external dataset Data Science Bowl 2017",
+          "uri": "https://pubmed.ncbi.nlm.nih.gov/34870218/"
         }
-      }]
+      ],
+      "tables": [
+        {
+          "label": "Case cancer classification results on the DSB 2017 dataset",
+          "entries": {
+            "AUC on training set": "0.90",
+            "AUC on test set": "0.87",
+            "Logistic loss on test set": "0.39975"
+          }
+        },
+        {
+          "label": "Case cancer classification results on private external evaluation dataset.",
+          "entries": {
+            "AUC on all scans": "0.877 (95% CI: 0.842, 0.910)"
+          }
+        }
+      ]
     },
     "evaluation": {
       "title": "Evaluation data",
-      "text": "The model was evaluated against a private dataset of 300 low-dose CT images, containing 100 cancer-positive scans and 200 cancer-negative scans.",
+      "text": "The model was evaluated on the testing set of 506 patient cases the Data Science Bowl 2017 (DSB) hosted on Kaggle [1] (this is no longer publicly available). ",
       "references": [
         {
-          "label": "Evaluation paper",
-          "uri": "https://pubmed.ncbi.nlm.nih.gov/34870218/"
+          "label": "Data Science Bowl 2017 challenge",
+          "uri": "https://www.kaggle.com/c/data-science-bowl-2017"
         }
       ],
       "tables": []
     },
     "training": {
       "title": "Training data",
-      "text": "The Luna dataset includes the images from the LIDC/IDRI dataset in a different format, together with additional annotations. The LIDC/IDRI dataset is available at https://wiki.cancerimagingarchive.net/display/Public/LIDC-IDRI under a Creative Commons Attribution 3.0 Unported License.",
+      "text": "Two lung scan datasets were used to train the model: the LUng Nodule Analysis 2016 (LUNA16) dataset [1] [2] and the training set of the Data Science Bowl 2017 (DSB) hosted on Kaggle [3] (this is no longer publicly available). Nodules smaller than 6 mm were removed from the LUNA16 annotations before training. The LUNA16 dataset includes 1186 nodule labels in 888 patient cases annotated by radiologists. The DSB dataset includes 1397 and 198 patient cases in its training and validation sets respectively. The LUNA16 dataset is a subset from the images from the LIDC/IDRI dataset [3] that is available under a Creative Commons Attribution 3.0 Unported License.",
       "references": [
         {
           "label": "LUng Nodule Analysis 2016 dataset part 1",
@@ -122,8 +145,12 @@
           "uri": "https://zenodo.org/record/4121926"
         },
         {
-          "label": "Data Science Bowl 2017 dataset",
-          "uri": "https://www.kaggle.com/competitions/data-science-bowl-2017/data"
+          "label": "Data Science Bowl 2017 challenge",
+          "uri": "https://www.kaggle.com/c/data-science-bowl-2017"
+        },
+        {
+          "label": "The LIDC/IDRI dataset",
+          "uri": "https://www.cancerimagingarchive.net/collection/lidc-idri/"
         }
       ],
       "tables": []

From 15123ca88f9a1c07d0a7fb632d2e96d6abe9f225 Mon Sep 17 00:00:00 2001
From: silvandeleemput <silvandeleemput@gmail.com>
Date: Fri, 12 Jan 2024 23:55:52 +0100
Subject: [PATCH 17/19] move main import inside the task method of the runner
 to squelch import print statement #27

---
 .../utils/LungCancerClassifierRunner.py                     | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py b/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py
index 56181b72..803a8126 100644
--- a/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py
+++ b/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py
@@ -17,9 +17,6 @@
 
 import torch
 
-# Import the main module for the grt123 algorithm, which must be used for running the classification
-import main
-
 
 @ValueOutput.Name('lncancerprob')
 @ValueOutput.Meta(Meta(min=0.0, max=1.0, type="probability"))
@@ -84,6 +81,9 @@ def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData
             self.log("Running on the CPU, might be slow...", "NOTICE")
             n_gpu = 0
 
+        # Import the main module for the grt123 algorithm, which must be used for running the classification
+        import main
+
         # apply grt123 algorithm
         results = main.main(
             skip_detect=False,

From 62871f2b24515bdf5289fced3a333cdd5b22bc72 Mon Sep 17 00:00:00 2001
From: silvandeleemput <silvandeleemput@gmail.com>
Date: Thu, 18 Jan 2024 10:32:06 +0100
Subject: [PATCH 18/19] meta.json - matched model name, updated output label
 and description

---
 models/gc_grt123_lung_cancer/meta.json | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/models/gc_grt123_lung_cancer/meta.json b/models/gc_grt123_lung_cancer/meta.json
index 07024e69..27ee2088 100644
--- a/models/gc_grt123_lung_cancer/meta.json
+++ b/models/gc_grt123_lung_cancer/meta.json
@@ -1,6 +1,6 @@
 {
   "id": "2e67a3cc-4680-4058-bf4e-f965cf50f06f",
-  "name": "lung_cancer_risk_estimation",
+  "name": "gc_grt123_lung_cancer",
   "title": "Lung cancer risk estimation on thorax CT scans",
   "summary": {
     "description": "This algorithm analyzes non-contrast CT scans of the thorax and predicts the lung cancer risk. The model consists of two modules. The first one is a 3D region proposal network for nodule detection, which outputs all suspicious nodules for a subject. The second one selects the top five nodules based on the detection confidence, evaluates their cancer probabilities and combines them with a leaky noisy-or gate to obtain the probability of lung cancer for the subject. This model was the winner of the Data Science Bowl 2017 competition hosted on Kaggle.",
@@ -20,8 +20,8 @@
       {
         "type": "Prediction",
         "valueType": "number",
-        "label": "Cancer probability score",
-        "description": "Probability that the scan contains cancer nodules",
+        "label": "Lung thorax cancer nodule probability score",
+        "description": "The likelihood of the presence of cancer nodules in the lungs.",
         "classes": []
       }
     ],

From 2f6a999336845fa14f2c78906bffd070240776df Mon Sep 17 00:00:00 2001
From: silvandeleemput <silvandeleemput@gmail.com>
Date: Mon, 26 Feb 2024 11:51:29 +0100
Subject: [PATCH 19/19] meta.json - add version 2.0.0 to details

---
 models/gc_grt123_lung_cancer/meta.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/models/gc_grt123_lung_cancer/meta.json b/models/gc_grt123_lung_cancer/meta.json
index 27ee2088..b3d24de0 100644
--- a/models/gc_grt123_lung_cancer/meta.json
+++ b/models/gc_grt123_lung_cancer/meta.json
@@ -43,7 +43,7 @@
   },
   "details": {
     "name": " bodyct-dsb2017-grt123",
-    "version": "",
+    "version": "2.0.0",
     "devteam": "DIAGNijmegen (Diagnostic Image Analysis Group, Radboud UMC, The Netherlands)",
     "type": "3D Deep Leaky Noisy-or Network",
     "date": {