MHubAI · LennyN95 · Mar 15, 2024 · Feb 16, 2024 · Mar 6, 2024 · Mar 6, 2024
diff --git a/models/fmcib_radiomics/config/default.yml b/models/fmcib_radiomics/config/default.yml
@@ -0,0 +1,20 @@
+general:
+  data_base_dir: /app/data
+  version: 1.0
+  description: "FMCIB pipeline"
+
+execute:
+- FileStructureImporter
+- FMCIBRunner
+- DataOrganizer
+
+modules:
+  FileStructureImporter:
+    structures:
+      - $patientID/CT.nrrd@instance@nrrd:mod=ct
+      - $patientID/masks/GTV-1.nrrd@nrrd
+    import_id: patientID
+
+  DataOrganizer:
+    targets:
+      - json-->[i:patientID]/features.json
diff --git a/models/fmcib_radiomics/dockerfiles/Dockerfile b/models/fmcib_radiomics/dockerfiles/Dockerfile
@@ -0,0 +1,20 @@
+FROM mhubai/base:latest
+
+LABEL authors="[email protected]"
+
+ARG MHUB_MODELS_REPO
+# Add pull models repo command here after local testingRUN
+RUN buildutils/import_mhub_model.sh fmcib_radiomics ${MHUB_MODELS_REPO}
+RUN wget https://zenodo.org/records/10528450/files/model_weights.torch?download=1 -O /app/model_weights.torch
+
+
+RUN mkdir models
+RUN mkdir models/fmcib
+
+# Install FMCIB package, should install everything else ...
+RUN pip install foundation-cancer-image-biomarker --pre
+
+
+
+ENTRYPOINT ["python3", "-m", "mhubio.run"]
+CMD ["--workflow", "default"]
diff --git a/models/fmcib_radiomics/meta.json b/models/fmcib_radiomics/meta.json
@@ -0,0 +1,138 @@
+{
+  "id": "...",
+  "name": "fmcib_radiomics",
+  "title": "Foundation Model for Cancer Imaging Biomarkers",
+  "summary": {
+    "description": "A foundation model for cancer imaging biomarker discovery trained through self-supervised learning using a dataset of 11,467 radiographic lesions. The model features can be used as a data-driven substitute for classical radiomic features",
+    "inputs": [
+      {
+        "label": "Input CT Image",
+        "description": "CT imaging data containing lesions of interest, such as nodules or tumors",
+        "format": "DICOM",
+        "modality": "CT",
+        "slicethickness": "5mm",
+        "bodypartexamined": "Whole",
+        "non-contrast": true,
+        "contrast": true
+      },
+      {
+        "label": "Center of mass",        
+        "description": "Center of mass of the lesion in the CT image",
+        "format": "JSON",
+        "modality": "JSON",
+        "slicethickness": "5mm",
+        "bodypartexamined": "Whole",
+        "non-contrast": true,
+        "contrast": true
+      }
+    ],
+    "outputs": [
+      {
+        "type": "Prediction",
+        "valueType": "Feature vector",
+        "description": "A set of features extracted from the input CT image",
+        "label": "Features"
+
+      }
+    ],
+    "model": {
+      "architecture": "3D ResNet50",
+      "training": "other",
+      "cmpapproach": "3D"
+    },
+    "data": {
+      "training": {
+        "vol_samples": 11467
+      },
+      "evaluation": {
+        "vol_samples": 1944
+      },
+      "public": true,
+      "external": true
+    }
+  },
+  "details": {
+    "name": "Foundation Model for Cancer Imaging Biomarkers",
+    "version": "0.0.1",
+    "type": "Feature extractor",
+    "devteam": "Researchers from the Artificial Intelligence in Medicine (AIM) Program, Mass General Brigham, Harvard Medical School and other institutions",
+    "date": {
+      "pub": "2023 (preprint)",
+      "code": "n/a",
+      "weights": "18.01.2024"
+    },
+    "cite": "Pai, S., Bontempi, D., Hadzic, I., Prudente, V., et al. Foundation Model for Cancer Imaging Biomarkers. 2023.",
+    "license": {
+      "code": "MIT",
+      "weights": "CC BY-NC 4.0"
+    },
+    "publications": [
+      {
+        "title": "Foundation Model for Cancer Imaging Biomarkers",
+        "uri": "https://www.medrxiv.org/content/10.1101/2023.09.04.23294952v1"
+      }
+    ],
+    "github": "https://github.com/AIM-Harvard/foundation-cancer-image-biomarker",
+    "zenodo": "https://zenodo.org/records/10528450",
+    "colab": "https://colab.research.google.com/drive/1JMtj_4W0uNPzrVnM9EpN1_xpaB-5KC1H?usp=sharing",
+    "slicer": false    
+  },
+  "info": {
+    "use": {
+      "title": "Intended Use",
+      "text": "The foundation model is intended to extract features from several different types of lesions (lung, liver, kidney, mediastinal, abdominal, pelvic, bone and soft tissue). These features can be used for a variety of predictive and clustering tasks as a data-driven substitute for classical radiomic features."
+    },
+    "analyses": {
+      "title": "Quantitative Analyses",
+      "text": "The model's performance was assessed using three different downstream tasks, including malignancy prediction and lung cancer risk prediction. Refer to the publication for more details [1].",
+      "references": [
+          {
+              "label": "Foundation model for cancer image biomarkers",
+              "uri": "https://www.medrxiv.org/content/10.1101/2023.09.04.23294952v1"
+          }
+        ]
+    },
+    "evaluation": {
+        "title": "Evaluation Data",
+        "text": "The evaluation dataset consists of 1,944 lesions, including 1,221 lesions for anatomical site classification, 170 nodules for malignancy prediction, and 553 tumors (420 LUNG1 + 133 RADIO) for prognostication. The dataset was held out from the training data and gathered from several different sources [1, 2, 3, 4].",
+        "tables": [
+            {
+                "label": "Evaluation Tasks & Datasets",
+                "entries": {
+                    "Lesion Anatomical Site Prediction": "DeepLesion (n=1221)",
+                    "Nodule Malignancy Prediction": "LUNA16 (n=170)",
+                    "Tumor Prognostication": "NSCLC-Radiomics (n=420) + NSCLC-Radiogenomics (n=133)"
+                }
+            }
+        ],
+        "references": [
+                {
+                "label": "DeepLesion: automated mining of large-scale lesion annotations and universal lesion detection with deep learning.",
+                "uri": "https://pubmed.ncbi.nlm.nih.gov/30035154/"
+            },
+            {
+                "label": "LUNA16",
+                "uri": "https://www.cancerimagingarchive.net/collection/lidc-idri/"
+            },
+            {
+              "label": "NSCLC-Radiomics",
+              "uri": "https://www.cancerimagingarchive.net/collection/nsclc-radiomics/"
+          },
+          {
+            "label": "NSCLC-Radiogenomics",
+            "uri": "https://www.cancerimagingarchive.net/analysis-result/nsclc-radiogenomics-stanford/"
+          }
+        ]
+    },
+    "training": {
+    "title": "Training Data",
+    "text": "The training dataset consists of 11467 lesions sourced from 5,513 unique CT scans across 2,312 different patients. This was curated from the DeepLesion dataset [1] following two steps - 1) Lesions that did not contain anatomical labels were selected, 2) Scans with spacing 5mm or more were removed.",
+    "references": [
+      {
+        "label": "DeepLesion: automated mining of large-scale lesion annotations and universal lesion detection with deep learning.",
+        "uri": "https://pubmed.ncbi.nlm.nih.gov/30035154/"
+    }
+    ]
+    }
+  }
+}
diff --git a/models/fmcib_radiomics/utils/FMCIBRunner.py b/models/fmcib_radiomics/utils/FMCIBRunner.py
@@ -0,0 +1,53 @@
+"""
+---------------------------------------------------------
+Author: Suraj Pia
+Email:  [email protected]
+---------------------------------------------------------
+"""
+
+import json
+import torch
+from fmcib.models import fmcib_model 
+import SimpleITK as sitk
+from mhubio.core import Instance, InstanceData, IO, Module
+from fmcib.preprocessing import preprocess
+
+
+class FMCIBRunner(Module):
+    @IO.Instance()
+    @IO.Input('in_data', 'nrrd:mod=ct', the='Input NRRD file')
+    @IO.Input('in_mask', 'nrrd|json', the='Tumor mask for the input NRRD file')
+    @IO.Output('feature_json', 'features.json', "json", bundle='model', the='output JSON file')
+    def task(self, instance: Instance, in_data: InstanceData, in_mask: InstanceData, feature_json: InstanceData) -> None:
+        mask_path = in_mask.abspath
+        mask = sitk.ReadImage(mask_path)
+
+        # Get the CoM of the mask
+        label_shape_filter = sitk.LabelShapeStatisticsImageFilter()
+        label_shape_filter.Execute(mask)
+        try:
+            centroid = label_shape_filter.GetCentroid(255)
+        except:
+            centroid = label_shape_filter.GetCentroid(1)
+
+        x, y, z = centroid
+
+        input_dict = {
+            "image_path": in_data.abspath,
+            "coordX": x,
+            "coordY": y,
+            "coordZ": z,
+        }
+
+        image = preprocess(input_dict)
+        image = image.unsqueeze(0)
+        model = fmcib_model()
+
+        model.eval()
+        with torch.no_grad():
+            features = model(image)
+
+        feature_dict = {f"feature_{idx}": feature for idx, feature in enumerate(features.flatten().tolist())}
+
+        with open(feature_json.abspath, "w") as f:
+            json.dump(feature_dict, f)
diff --git a/models/fmcib_radiomics/utils/__init__.py b/models/fmcib_radiomics/utils/__init__.py
@@ -0,0 +1 @@
+from .FMCIBRunner import FMCIBRunner