MHubAI · LennyN95 · Mar 29, 2024 · Aug 24, 2023 · Aug 25, 2023 · Aug 25, 2023
diff --git a/models/bamf_nnunet_ct_kidney/config/default.yml b/models/bamf_nnunet_ct_kidney/config/default.yml
@@ -0,0 +1,35 @@
+general:
+  data_base_dir: /app/data
+  version: 1.0
+  description: default configuration for Bamf NNUnet Kidney segmentation (dicom to dicom)
+
+execute:
+- DicomImporter
+- NiftiConverter
+- NNUnetRunner
+- BamfProcessorRunner
+- DsegConverter
+- DataOrganizer
+
+modules:
+  DicomImporter:
+    source_dir: input_data
+    import_dir: sorted_data
+    sort_data: true
+    meta: 
+      mod: '%Modality'
+
+  NNUnetRunner:
+    in_data: nifti:mod=ct
+    nnunet_task: Task779_Kidneys_KIRC
+    nnunet_model: 3d_fullres
+    roi: KIDNEY,KIDNEY+NEOPLASM_MALIGNANT_PRIMARY,KIDNEY+CYST
+
+  DsegConverter:
+    source_segs: nifti:mod=seg
+    model_name: Bamf NNUnet CT Kidney
+    skip_empty_slices: True
+
+  DataOrganizer:
+    targets:
+    - dicomseg-->[i:sid]/bamf_nnunet_ct_kidney.seg.dcm
diff --git a/models/bamf_nnunet_ct_kidney/config/slicer.yml b/models/bamf_nnunet_ct_kidney/config/slicer.yml
@@ -0,0 +1,34 @@
+general:
+  data_base_dir: /app/data
+  version: 1.0
+  description: configuration for Bamf NNUnet Kidney segmentation in 3D Slicer (nrrd to nifti)
+
+execute:
+- NrrdImporter
+- NiftiConverter
+- NNUnetRunner
+- BamfProcessorRunner
+- JsonSegExporter
+- DataOrganizer
+
+modules:
+  NrrdImporter:
+    input_dir: input_data
+    input_file_name: image.nrrd
+
+  JsonSegExporter:
+    segment_id_meta_key: roi
+    targets:
+    - nifti:mod=seg-->[basename]
+
+  NNUnetRunner:
+    nnunet_task: Task779_Kidneys_KIRC
+    nnunet_model: 3d_fullres
+    roi: Kidney+NEOPLASM_MALIGNANT_PRIMARY,KIDNEY+CYST
+
+  BamfProcessorRunner:
+
+  DataOrganizer:
+    targets:
+    - nifti:mod=seg-->[basename]
+    - json:mod=seg-->segdef.json
diff --git a/models/bamf_nnunet_ct_kidney/dockerfiles/Dockerfile b/models/bamf_nnunet_ct_kidney/dockerfiles/Dockerfile
@@ -0,0 +1,32 @@
+FROM mhubai/base:latest
+
+# FIXME: set this environment variable as a shortcut to avoid nnunet crashing the build
+# by pulling sklearn instead of scikit-learn
+# N.B. this is a known issue:
+# https://github.com/MIC-DKFZ/nnUNet/issues/1281 
+# https://github.com/MIC-DKFZ/nnUNet/pull/1209
+ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True
+
+# Install nnunet and platipy
+RUN pip3 install --no-cache-dir \
+  nnunet 
+
+# Clone the main branch of MHubAI/models
+ARG MHUB_MODELS_REPO
+RUN buildutils/import_mhub_model.sh bamf_nnunet_ct_kidney ${MHUB_MODELS_REPO}
+
+# Pull weights into the container
+ENV WEIGHTS_DIR=/root/.nnunet/nnUNet_models/nnUNet/
+RUN mkdir -p $WEIGHTS_DIR
+ENV WEIGHTS_FN=Task779_Kidney.zip
+ENV WEIGHTS_URL=https://zenodo.org/record/8277846/files/$WEIGHTS_FN
+RUN wget --directory-prefix ${WEIGHTS_DIR} ${WEIGHTS_URL}
+RUN unzip ${WEIGHTS_DIR}${WEIGHTS_FN} -d ${WEIGHTS_DIR}
+RUN rm ${WEIGHTS_DIR}${WEIGHTS_FN}
+
+# specify nnunet specific environment variables
+ENV WEIGHTS_FOLDER=$WEIGHTS_DIR
+
+# Default run script
+ENTRYPOINT ["mhub.run"]
+CMD ["--config", "/app/models/bamf_nnunet_ct_kidney/config/default.yml"]
diff --git a/models/bamf_nnunet_ct_kidney/meta.json b/models/bamf_nnunet_ct_kidney/meta.json
@@ -0,0 +1,139 @@
+{
+    "id": "",
+    "name": "bamf_nnunet_ct_kidney",
+    "title": "AIMI CT Kidney",
+    "summary": {
+        "description": "An nnU-Net based model to segment kidney from CT scans",
+        "inputs": [
+            {
+                "label": "Input Image",
+                "description": "The CT scan of a patient.",
+                "format": "DICOM",
+                "modality": "CT",
+                "bodypartexamined": "Kidney",
+                "slicethickness": "2.5mm",
+                "non-contrast": true,
+                "contrast": false
+            }
+        ],
+        "outputs": [
+            {
+                "label": "Segmentation",
+                "type": "Segmentation",
+                "description": "Segmentation kidney",
+                "classes": [
+                    "KIDNEY",
+                    "KIDNEY+NEOPLASM_MALIGNANT_PRIMARY",
+                    "KIDNEY+CYST"
+                ]
+            }
+        ],
+        "model": {
+            "architecture": "U-net",
+            "training": "supervised",
+            "cmpapproach": "3D"
+        },
+        "data": {
+            "training": {
+                "vol_samples": 534
+            },
+            "evaluation": {
+                "vol_samples": 156
+            },
+            "public": true,
+            "external": true
+        }
+    },
+    "details": {
+        "name": "AIMI CT Kidney",
+        "version": "1.0.0",
+        "devteam": "BAMF Health",
+        "authors": [
+            "Soni, Rahul",
+            "McCrumb, Diana",
+            "Murugesan, Gowtham Krishnan",
+            "Van Oss, Jeff"
+        ],
+        "type": "nnU-Net (U-Net structure, optimized by data-driven heuristics)",
+        "date": {
+            "code": "17.10.2023",
+            "weights": "28.08.2023",
+            "pub": "23.10.2023"
+        },
+        "cite": "Murugesan, Gowtham Krishnan, Diana McCrumb, Mariam Aboian, Tej Verma, Rahul Soni, Fatima Memon, and Jeff Van Oss. The AIMI Initiative: AI-Generated Annotations for Imaging Data Commons Collections. arXiv preprint arXiv:2310.14897 (2023).",
+        "license": {
+            "code": "MIT",
+            "weights": "CC BY-NC 4.0"
+        },
+        "publications": [
+            {
+                "title": "The AIMI Initiative: AI-Generated Annotations in IDC Collections",
+                "uri": "https://arxiv.org/abs/2310.14897"
+            }
+        ],
+        "github": "https://github.com/bamf-health/aimi-kidney-ct"
+    },
+    "info": {
+        "use": {
+            "title": "Intended Use",
+            "text": "This model is intended to perform kidney segmentation in CT scans. The model has been trained and tested on scans aquired during clinical care of patients, so it might not be suited for a healthy population. The generalization capabilities of the model on a range of ages, genders, and ethnicities are unknown. For detailed information on the training set design, please refer to reference section in the training section"
+        },
+        "analyses": {
+            "title": "Quantitative Analyses",
+            "text": "The model's performance was assessed using the Dice Coefficient and Normalized Surface Distance (NSD) with tolerance 7mm, as specified in the CT Kidney segmentation task in the Medical Segmentation Decathlon challenge. The model was used to segment cases from the IDC collection TCGA-KIRC [1]. Seven of those cases were reviewed and corrected by a board-certified radiologist and a non-expert. The analysis is published here [2]",
+            "tables": [
+                {
+                    "label": "Label-wise metrics (mean (standard deviation)) between AI derived and manually corrected CT kidney annotations",
+                    "entries": {
+                        "Dice: Radiologist": "0.93 (0.22)",
+                        "NSD: Radiologist": "0.91 (0.23)",
+                        "Dice: Non-expert": "0.99 (0.06)",
+                        "NSD: Non-expert": "0.98 (0.09)"
+                    }
+                }
+            ],
+            "references": [
+                {
+                    "label": "TCGA-KIRC",
+                    "uri": "https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=5800386"
+                },
+                {
+                    "label": "The AIMI Initiative: AI-Generated Annotations for Imaging Data Commons Collections",
+                    "uri": "https://arxiv.org/abs/2310.14897"
+                }
+            ]
+        },
+        "evaluation": {
+            "title": "Evaluation Data",
+            "text": "The model was used to segment cases from the IDC [1] collection TCGA-KIRC [1]. Approximately 20% of those cases were randomly selected to be reviewed and corrected by a board-certified radiologist. The model predictions, and radiologist corrections are published on zenodo [3]",
+            "references": [
+                {
+                    "label": "Imaging Data Collections (IDC)",
+                    "uri": "https://datacommons.cancer.gov/repository/imaging-data-commons"
+                },
+                {
+                    "label": "TCGA-KIRC",
+                    "uri": "https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=5800386"
+                },
+                {
+                    "label": "Image segmentations produced by the AIMI Annotations initiative",
+                    "uri": "https://zenodo.org/records/10009368"
+                }
+            ]
+        },
+        "training": {
+            "title": "Training Data",
+            "text": "The training dataset consists of  489 CT kidney annotations taken from KiTS 2023 dataset [1] (N=489) and TCGA-KIRC Dataset [2] (N=45). Dataset was collected by reviewing patients who underwent partial or radical nephrectomy for suspicion of renal cancer by a physician in the urology service of University of Minnesota Health between 2010 and mid-2018. 544 patients met this initial criteria. Patients for whom imaging in the late arterial phase was not available where excluded from the training set. Patients with tumor thrombus where also excluded since in these cases the tumor extends out beyond what was considered to be the primary site",
+            "references": [
+                {
+                    "label": "KiTS 2023 dataset",
+                    "uri": "https://github.com/neheller/kits23"
+                },
+                {
+                    "label": "TCGA-KIRC",
+                    "uri": "https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=5800386"
+                }
+            ]
+        }
+    }
+}
diff --git a/models/bamf_nnunet_ct_kidney/utils/BamfProcessorRunner.py b/models/bamf_nnunet_ct_kidney/utils/BamfProcessorRunner.py
@@ -0,0 +1,66 @@
+"""
+-------------------------------------------------
+MHub - Run Module for ensembling nnUNet inference.
+-------------------------------------------------
+-------------------------------------------------
+Author: Rahul Soni
+Email:  [email protected]
+-------------------------------------------------
+"""
+
+from mhubio.core import Instance, InstanceData
+from mhubio.core import Module, IO
+import numpy as np
+import SimpleITK as sitk
+from skimage import measure
+import numpy as np
+
+
+
+class BamfProcessorRunner(Module):
+
+    @IO.Instance
+    @IO.Input('in_data', 'nifti:mod=ct|mr', the='input data to run nnunet on')
+    @IO.Output('out_data', 'bamf_processed.nrrd', 'nrrd:mod=seg:processor=bamf', data='in_data', the="keep the two largest connected components of the segmentation and remove all other ones")
+    def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData) -> None:
+
+        # Log bamf runner info
+        self.log("Running BamfProcessor on....")
+        self.log(f" > input data:  {in_data.abspath}")
+        self.log(f" > output data: {out_data.abspath}")
+
+        # read image
+        self.log(f"Reading image from {in_data.abspath}")
+        img_itk = sitk.ReadImage(in_data.abspath)
+        img_np = sitk.GetArrayFromImage(img_itk)
+
+        # apply post-processing
+        img_bamf_processed = self.n_connected(img_np)
+
+        # store image temporarily
+        self.log(f"Writing tmp image to {out_data.abspath}")
+        img_bamf_processed_itk = sitk.GetImageFromArray(img_bamf_processed)
+        img_bamf_processed_itk.CopyInformation(img_itk)
+        sitk.WriteImage(img_bamf_processed_itk, out_data.abspath)
+
+
+    def n_connected(self, img_data):
+        img_data_mask = np.zeros(img_data.shape)
+        img_data_mask[img_data > 0] = 1
+        img_filtered = np.zeros(img_data_mask.shape)
+        blobs_labels = measure.label(img_data_mask, background=0)
+        lbl, counts = np.unique(blobs_labels, return_counts=True)
+        lbl_dict = {}
+        for i, j in zip(lbl, counts):
+            lbl_dict[i] = j
+        sorted_dict = dict(sorted(lbl_dict.items(), key=lambda x: x[1], reverse=True))
+        count = 0
+
+        for key, value in sorted_dict.items():
+            if count >= 1:
+                print(key, value)
+                img_filtered[blobs_labels == key] = 1
+            count += 1
+
+        img_data[img_filtered != 1] = 0
+        return img_data
diff --git a/models/bamf_nnunet_ct_kidney/utils/__init__.py b/models/bamf_nnunet_ct_kidney/utils/__init__.py
@@ -0,0 +1 @@
+from .BamfProcessorRunner import *