MHubAI · jithenece · May 1, 2024 · May 1, 2024 · May 1, 2024 · May 2, 2024
diff --git a/models/bamf_nnunet_pet_ct_breast/config/default.yml b/models/bamf_nnunet_pet_ct_breast/config/default.yml
@@ -0,0 +1,49 @@
+general:
+  data_base_dir: /app/data
+  version: 1.0
+  description: default configuration for Bamf NNUnet Breast FDG-avid lesions segmentation (dicom to dicom)
+
+execute:
+- FileStructureImporter
+- NiftiConverter
+- Registration
+- NNUnetPETCTRunner
+- TotalSegmentatorMLRunner
+- BreastPostProcessor
+- DsegConverter
+- DataOrganizer
+
+modules:
+  FileStructureImporter:
+    input_dir: 'input_data'
+    structures:
+      - $patientID@instance/CT@dicom:mod=ct
+      - $patientID/PT@dicom:mod=pt
+    import_id: patientID
+
+  NiftiConverter:
+    in_datas: dicom:mod=pt|ct
+    allow_multi_input: true
+
+  NNUnetPETCTRunner:
+    in_ct_data: nifti:mod=ct:registered=true
+    nnunet_task: Task762_PET_CT_Breast
+    nnunet_model: 3d_fullres
+    roi: LIVER,KIDNEY,URINARY_BLADDER,SPLEEN,LUNG,BRAIN,HEART,STOMACH,BREAST+FDG_AVID_TUMOR
+
+  TotalSegmentatorMLRunner:
+    in_data: nifti:mod=ct:registered=true
+    use_fast_mode: true
+
+  BreastPostProcessor:
+    in_ct_data: nifti:mod=ct:registered=true
+
+  DsegConverter:
+    source_segs: nifti:mod=seg:processor=bamf
+    model_name: BAMF Breast FDG PET CT
+    target_dicom: dicom:mod=ct
+    skip_empty_slices: True
+
+  DataOrganizer:
+    targets:
+    - dicomseg-->[i:patientID]/bamf_nnunet_pet_ct_breast.seg.dcm
diff --git a/models/bamf_nnunet_pet_ct_breast/dockerfiles/Dockerfile b/models/bamf_nnunet_pet_ct_breast/dockerfiles/Dockerfile
@@ -0,0 +1,36 @@
+FROM mhubai/base:latest
+
+# FIXME: set this environment variable as a shortcut to avoid nnunet crashing the build
+# by pulling sklearn instead of scikit-learn
+# N.B. this is a known issue:
+# https://github.com/MIC-DKFZ/nnUNet/issues/1281 
+# https://github.com/MIC-DKFZ/nnUNet/pull/1209
+ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True
+
+# Install nnunet and platipy
+RUN pip3 install --no-cache-dir \
+  nnunet 
+
+# Install TotalSegmentator
+RUN pip3 install --no-cache-dir totalsegmentator==1.5.6
+
+# Clone the main branch of MHubAI/models
+ARG MHUB_MODELS_REPO
+RUN buildutils/import_mhub_model.sh bamf_nnunet_pet_ct_breast ${MHUB_MODELS_REPO}
+
+# Pull nnUNet model weights into the container for Task777_CT_Nodules
+ENV WEIGHTS_DIR=/root/.nnunet/nnUNet_models/nnUNet/
+RUN mkdir -p $WEIGHTS_DIR
+ENV TASK_NAME=Task762_PET_CT_Breast
+ENV WEIGHTS_FN=$TASK_NAME.zip
+ENV WEIGHTS_URL=https://zenodo.org/record/8290055/files/$WEIGHTS_FN
+RUN wget --directory-prefix ${WEIGHTS_DIR} ${WEIGHTS_URL} --no-check-certificate
+RUN unzip ${WEIGHTS_DIR}${WEIGHTS_FN} -d ${WEIGHTS_DIR}
+RUN rm ${WEIGHTS_DIR}${WEIGHTS_FN}
+
+# specify nnunet specific environment variables
+ENV WEIGHTS_FOLDER=$WEIGHTS_DIR
+
+# Default run script
+ENTRYPOINT ["mhub.run"]
+CMD ["--config", "/app/models/bamf_nnunet_pet_ct_breast/config/default.yml"]
diff --git a/models/bamf_nnunet_pet_ct_breast/meta.json b/models/bamf_nnunet_pet_ct_breast/meta.json
@@ -0,0 +1,148 @@
+{
+    "id": "",
+    "name": "bamf_nnunet_pet_ct_breast",
+    "title": "Bamf NNunet PET CT Breast",
+    "summary": {
+        "description": "This model used to detect FDG-avid lesions in breast from FDG PET/CT scans",
+        "inputs": [
+            {
+                "label": "Input Image",
+                "description": "The CT scan of a patient.",
+                "format": "DICOM",
+                "modality": "CT",
+                "bodypartexamined": "Lung",
+                "slicethickness": "5mm",
+                "non-contrast": true,
+                "contrast": false
+            },
+            {
+                "label": "Input Image",
+                "description": "The PET scan of a patient.",
+                "format": "DICOM",
+                "modality": "CT",
+                "bodypartexamined": "Lung",
+                "slicethickness": "3.38mm",
+                "non-contrast": false,
+                "contrast": false
+            }
+        ],
+        "outputs": [
+            {
+                "label": "Segmentation",
+                "type": "Segmentation",
+                "description": "FDG-avid Breast tumor",
+                "classes": [
+                    "FDG_AVID_TUMOR"
+                ]
+            }
+        ],
+        "model": {
+            "architecture": "U-net",
+            "training": "supervised",
+            "cmpapproach": "3D"
+        },
+        "data": {
+            "training": {
+                "vol_samples": 1014
+            },
+            "evaluation": {
+                "vol_samples": 11
+            },
+            "public": true,
+            "external": true
+        }        
+    },
+    "details": {
+        "name": "AIMI PET CT BREAST",
+        "version": "1.0.0",
+        "devteam": "BAMF Health",
+        "type": "nnU-Net (U-Net structure, optimized by data-driven heuristics)",
+        "date": {
+            "code": "17.10.2023",
+            "weights": "28.08.2023",
+            "pub": "23.10.2023"
+        },
+        "cite": "Murugesan, Gowtham Krishnan, Diana McCrumb, Mariam Aboian, Tej Verma, Rahul Soni, Fatima Memon, and Jeff Van Oss. The AIMI Initiative: AI-Generated Annotations for Imaging Data Commons Collections. arXiv preprint arXiv:2310.14897 (2023).",
+        "license": {
+            "code": "MIT",
+            "weights": "CC BY-NC 4.0"
+        },
+        "publications": [
+            {
+                "title": "The AIMI Initiative: AI-Generated Annotations in IDC Collections",
+                "uri": "https://arxiv.org/abs/2310.14897"
+            }
+        ],
+        "github": "https://github.com/bamf-health/aimi-breast-pet-ct"
+    },
+    "info": {
+        "use": {
+            "title": "Intended Use",
+            "text": "This model is intended to perform segmentations of Breast FDG-avid Tumor region in PET, CT scans. The model has been trained and tested on scans acquired during clinical care of patients, so it might not be suited for a healthy population. The generalization capabilities of the model on a range of ages, genders, and ethnicities are unknown."
+        },
+        "analyses": {
+            "title": "Quantitative Analyses",
+            "text": "Label-wise metrics (mean (standard deviation)) between AI derived and expert corrected FDG PET/CT breast lesion annotations.",
+            "tables": [
+                {
+                  "label": "Segmentation Metric Expert",
+                  "entries": {
+                    "Tumor DSC": "0.80 (0.33)",
+                    "Tumor 95% Hausdorff (mm)": "29.70 (33.43)"
+                  }
+                },
+                {
+                  "label": "Segmentation Metric Tumor - Non-Expert",
+                  "entries": {
+                    "Tumor DSC": "0.94 (0.10)",
+                    "Tumor 95% Hausdorff (mm)": "13.53 (20.00)"
+                  }
+                },
+                {
+                  "label": "Detection Accuracy",
+                  "entries": {
+                    "Sensitivity": "0.43",
+                    "False negative rate": "0.57",
+                    "F1 score": "0.52"
+                  }
+                }
+            ],
+            "references": [
+                {
+                    "label": "The AIMI Initiative: AI-Generated Annotations for Imaging Data Commons Collections",
+                    "uri": "https://arxiv.org/abs/2310.14897"
+                }
+            ]
+        },
+        "evaluation": {
+            "title": "Evaluation Data",
+            "text": "77 validation cases were rated by a radiologist and non-expert",
+            "references": [
+                {
+                    "label": "Imaging Data Collections (IDC)",
+                    "uri": "https://datacommons.cancer.gov/repository/imaging-data-commons"
+                },
+                {
+                    "label": "Image segmentations produced by the AIMI Annotations initiative",
+                    "uri": "https://zenodo.org/records/10009368"
+                }
+            ]
+        },
+        "training": {
+            "title": "Training Data",
+            "text": "The AutoPET Challenge 2023 dataset is comprised of whole-body FDG-PET/CT data from 900 patients, encompassing 1014 studies with tumor annotations. This dataset was augmented by adding labels for the brain, bladder, kidneys, liver, stomach, spleen, lungs, and heart generated by the TotalSegmentator model. A multi-task AI model was trained using the augmented datasets",
+            "references": [
+                {
+                    "label": "AutoPET Challenge 2023 dataset",
+                    "uri": "https://doi.org/10.7937/gkr0-xv29"
+                },
+                {
+                    "label": "Total Segmentator",
+                    "uri": "https://doi.org/10.48550/arXiv.2208.05868"
+                }
+            ]
+        }
+    }
+}
+
+
diff --git a/models/bamf_nnunet_pet_ct_breast/utils/BreastPostProcessor.py b/models/bamf_nnunet_pet_ct_breast/utils/BreastPostProcessor.py
@@ -0,0 +1,134 @@
+import os
+import SimpleITK as sitk
+import numpy as np
+import os, shutil
+import cv2
+from skimage import measure
+from mhubio.core import IO
+from mhubio.core import Module, Instance, InstanceData, InstanceDataCollection
+
+
+class BreastPostProcessor(Module):
+
+    def mask_labels(self, labels, ts):
+        """
+        Create a mask based on given labels.
+
+        Args:
+            labels (list): List of labels to be masked.
+            ts (np.ndarray): Image data.
+
+        Returns:
+            np.ndarray: Masked image data.
+        """
+        lung = np.zeros(ts.shape)
+        for lbl in labels:
+            lung[ts == lbl] = 1
+        return lung
+
+    def bbox2_3D(self, img):
+        r = np.any(img, axis=(1, 2))
+        c = np.any(img, axis=(0, 2))
+        z = np.any(img, axis=(0, 1))
+
+        rmin, rmax = np.where(r)[0][[0, -1]]
+        cmin, cmax = np.where(c)[0][[0, -1]]
+        zmin, zmax = np.where(z)[0][[0, -1]]
+
+        return rmin, rmax, cmin, cmax, zmin, zmax
+
+    def n_connected(self, img_data):
+        """
+        Get the largest connected component in a binary image.
+
+        Args:
+            img_data (np.ndarray): image data.
+
+        Returns:
+            np.ndarray: Processed image with the largest connected component.
+        """
+        img_filtered = np.zeros(img_data.shape)
+        blobs_labels = measure.label(img_data, background=0)
+        lbl, counts = np.unique(blobs_labels, return_counts=True)
+        lbl_dict = {}
+        for i, j in zip(lbl, counts):
+            lbl_dict[i] = j
+        sorted_dict = dict(sorted(lbl_dict.items(), key=lambda x: x[1], reverse=True))
+        count = 0
+
+        for key, value in sorted_dict.items():
+            if count >= 1 and count <= 2 and value > 20:
+                print(key, value)
+                img_filtered[blobs_labels == key] = 1
+            count += 1
+
+        img_data[img_filtered != 1] = 0
+        return img_data
+
+    def arr_2_sitk_img(self, arr, ref):
+        """
+        Convert numpy array to SimpleITK image.
+
+        Args:
+            arr (np.ndarray): Input image data as a numpy array.
+            ref: Reference image for copying information.
+
+        Returns:
+            sitk.Image: Converted SimpleITK image.
+        """
+        op_img = sitk.GetImageFromArray(arr)
+        op_img.CopyInformation(ref)
+        return op_img
+
+    @IO.Instance()
+    @IO.Input('in_ct_data', 'nifti:mod=ct:registered=true', the='input ct data')
+    @IO.Input('in_tumor_data', 'nifti:mod=seg:model=nnunet', the='input tumor segmentation')
+    @IO.Input('in_total_seg_data', 'nifti:mod=seg:model=TotalSegmentator', the='input total segmentation')
+    @IO.Output('out_data', 'bamf_processed.nii.gz', 'nifti:mod=seg:processor=bamf:roi=BREAST+FDG_AVID_TUMOR', data='in_tumor_data',
+               the="FDG-avid lesions in breast")
+    def task(self, instance: Instance, in_ct_data: InstanceData, in_tumor_data: InstanceData,
+             in_total_seg_data: InstanceData, out_data: InstanceData):
+        """
+        Perform postprocessing and writes simpleITK Image
+        """
+        tumor_seg_path = in_tumor_data.abspath
+        total_seg_path = in_total_seg_data.abspath
+
+        ts_data = sitk.GetArrayFromImage(sitk.ReadImage(total_seg_path))
+        ts_abdominal = sitk.GetArrayFromImage(sitk.ReadImage(total_seg_path))
+        ts_data[ts_data > 1] = 1
+        lesions = sitk.GetArrayFromImage(sitk.ReadImage(tumor_seg_path))
+        tumor_label = 9
+        lesions[lesions != tumor_label] = 0
+        lesions[lesions == tumor_label] = 1
+
+        op_data = np.zeros(ts_data.shape)
+        ref = sitk.ReadImage(in_ct_data.abspath)
+        ct_data = sitk.GetArrayFromImage(ref)
+
+        op_data[lesions == 1] = 1
+        th = np.min(ct_data)
+        op_data[ct_data == th] = 0  # removing predicitons where CT not available
+        # Use the coordinates of the bounding box to crop the 3D numpy array.
+        ts_abdominal[ts_abdominal > 4] = 0
+        ts_abdominal[ts_abdominal > 1] = 1
+        if ts_abdominal.max() > 0:
+            x1, x2, y1, y2, z1, z2 = self.bbox2_3D(ts_abdominal)
+        # Create a structuring element with ones in the middle and zeros around it
+        structuring_element = np.ones((3, 3))
+
+        # Dilate the array with the structuring element
+        op_temp = cv2.dilate(ts_data, structuring_element, iterations=5)
+        op_temp = cv2.erode(op_temp, structuring_element, iterations=5)
+        op_data[op_temp == 1] = 0
+        if ts_abdominal.max() > 0:
+            op_data[x1:x2, y1:, :] = 0
+        op_data[0:3, :, :] = 0
+        op_data = self.n_connected(op_data)
+        op_img = sitk.GetImageFromArray(op_data)
+        op_img.CopyInformation(ref)
+        tmp_dir = self.config.data.requestTempDir(label="breast-post-processor")
+        tmp_file = os.path.join(tmp_dir, f'final.nii.gz')
+        sitk.WriteImage(op_img, tmp_file)
+
+        shutil.copyfile(tmp_file, out_data.abspath)