MHubAI · LennyN95 · Mar 15, 2024 · Feb 16, 2024 · Mar 6, 2024 · Mar 6, 2024
diff --git a/models/fmcib_radiomics/config/default.yml b/models/fmcib_radiomics/config/default.yml
@@ -0,0 +1,21 @@
+general:
+  data_base_dir: /app/data
+  version: 1.0
+  description: "FMCIB pipeline"
+
+execute:
+- FileStructureImporter
+- CentroidExtractor
+- FMCIBRunner
+- DataOrganizer
+
+modules:
+  FileStructureImporter:
+    structures:
+      - $patientID/CT.nrrd@instance@nrrd:mod=ct
+      - $patientID/masks/GTV-1.nrrd@nrrd:mod=seg
+    import_id: patientID
+
+  DataOrganizer:
+    targets:
+      - json:type=fmcibfeatures-->[i:patientID]/features.json
diff --git a/models/fmcib_radiomics/config/from_centroids.yml b/models/fmcib_radiomics/config/from_centroids.yml
@@ -0,0 +1,20 @@
+general:
+  data_base_dir: /app/data
+  version: 1.0
+  description: "FMCIB pipeline starting from a coordinate json file"
+
+execute:
+- FileStructureImporter
+- FMCIBRunner
+- DataOrganizer
+
+modules:
+  FileStructureImporter:
+    structures:
+      - $patientID/CT.nrrd@instance@nrrd:mod=ct
+      - $patientID/centroids.json@json:type=fmcibcoordinates
+    import_id: patientID
+
+  DataOrganizer:
+    targets:
+      - json:type=fmcibfeatures-->[i:patientID]/features.json
diff --git a/models/fmcib_radiomics/config/from_slicer.yml b/models/fmcib_radiomics/config/from_slicer.yml
@@ -0,0 +1,20 @@
+general:
+  data_base_dir: /app/data
+  version: 1.0
+  description: "FMCIB pipeline"
+
+execute:
+- FileStructureImporter
+- FMCIBRunner
+- DataOrganizer
+
+modules:
+  FileStructureImporter:
+    structures:
+      - $patientID@instance/re:^.*\.nrrd$::@nrrd:mod=ct
+      - $patientID/re:^.*\.json$::@json:type=fmcibcoordinates
+    import_id: patientID
+
+  DataOrganizer:
+    targets:
+      - json:type=fmcibfeatures-->[i:patientID]/features.json
diff --git a/models/fmcib_radiomics/dockerfiles/Dockerfile b/models/fmcib_radiomics/dockerfiles/Dockerfile
@@ -0,0 +1,21 @@
+FROM mhubai/base:latest
+
+LABEL authors="[email protected],[email protected]"
+
+# download model weights
+RUN wget https://zenodo.org/records/10528450/files/model_weights.torch?download=1 -O /app/model_weights.torch
+
+# clone mhub implementation
+ARG MHUB_MODELS_REPO
+RUN buildutils/import_mhub_model.sh fmcib_radiomics ${MHUB_MODELS_REPO}
+
+
+# Install additional pip packages
+RUN pip3 install --upgrade pip && pip3 install --no-cache-dir \
+  jsonschema==4.21.1
+
+# Install FMCIB package, should install everything else ...
+RUN pip3 install foundation-cancer-image-biomarker --pre
+
+ENTRYPOINT ["mhub.run"]
+CMD ["--workflow", "default"]
diff --git a/models/fmcib_radiomics/meta.json b/models/fmcib_radiomics/meta.json
@@ -0,0 +1,137 @@
+{
+  "id": "26e98e14-b605-4007-bd8b-79d517c935b5",
+  "name": "fmcib_radiomics",
+  "title": "Foundation Model for Cancer Imaging Biomarkers",
+  "summary": {
+    "description": "A foundation model for cancer imaging biomarker discovery trained through self-supervised learning using a dataset of 11,467 radiographic lesions. The model features can be used as a data-driven substitute for classical radiomic features",
+    "inputs": [
+      {
+        "label": "Input CT Image",
+        "description": "CT imaging data containing lesions of interest, such as nodules or tumors",
+        "format": "DICOM",
+        "modality": "CT",
+        "slicethickness": "5mm",
+        "bodypartexamined": "Whole",
+        "non-contrast": true,
+        "contrast": true
+      },
+      {
+        "label": "Center of mass",        
+        "description": "Center of mass of the lesion in the CT image",
+        "format": "JSON",
+        "modality": "JSON",
+        "slicethickness": "5mm",
+        "bodypartexamined": "Whole",
+        "non-contrast": true,
+        "contrast": true
+      }
+    ],
+    "outputs": [
+      {
+        "type": "Prediction",
+        "valueType": "Feature vector",
+        "description": "A set of features extracted from the input CT image",
+        "label": "Features"
+      }
+    ],
+    "model": {
+      "architecture": "3D ResNet50",
+      "training": "other",
+      "cmpapproach": "3D"
+    },
+    "data": {
+      "training": {
+        "vol_samples": 11467
+      },
+      "evaluation": {
+        "vol_samples": 1944
+      },
+      "public": true,
+      "external": true
+    }
+  },
+  "details": {
+    "name": "Foundation Model for Cancer Imaging Biomarkers",
+    "version": "0.0.1",
+    "type": "Feature extractor",
+    "devteam": "Researchers from the Artificial Intelligence in Medicine (AIM) Program, Mass General Brigham, Harvard Medical School and other institutions",
+    "date": {
+      "pub": "2023 (preprint)",
+      "code": "n/a",
+      "weights": "18.01.2024"
+    },
+    "cite": "Pai, S., Bontempi, D., Hadzic, I., Prudente, V., et al. Foundation Model for Cancer Imaging Biomarkers. 2023.",
+    "license": {
+      "code": "MIT",
+      "weights": "CC BY-NC 4.0"
+    },
+    "publications": [
+      {
+        "title": "Foundation Model for Cancer Imaging Biomarkers",
+        "uri": "https://www.medrxiv.org/content/10.1101/2023.09.04.23294952v1"
+      }
+    ],
+    "github": "https://github.com/AIM-Harvard/foundation-cancer-image-biomarker",
+    "zenodo": "https://zenodo.org/records/10528450",
+    "colab": "https://colab.research.google.com/drive/1JMtj_4W0uNPzrVnM9EpN1_xpaB-5KC1H?usp=sharing",
+    "slicer": false    
+  },
+  "info": {
+    "use": {
+      "title": "Intended Use",
+      "text": "The foundation model is intended to extract features from several different types of lesions (lung, liver, kidney, mediastinal, abdominal, pelvic, bone and soft tissue). These features can be used for a variety of predictive and clustering tasks as a data-driven substitute for classical radiomic features."
+    },
+    "analyses": {
+      "title": "Quantitative Analyses",
+      "text": "The model's performance was assessed using three different downstream tasks, including malignancy prediction and lung cancer risk prediction. Refer to the publication for more details [1].",
+      "references": [
+          {
+              "label": "Foundation model for cancer image biomarkers",
+              "uri": "https://www.medrxiv.org/content/10.1101/2023.09.04.23294952v1"
+          }
+        ]
+    },
+    "evaluation": {
+        "title": "Evaluation Data",
+        "text": "The evaluation dataset consists of 1,944 lesions, including 1,221 lesions for anatomical site classification, 170 nodules for malignancy prediction, and 553 tumors (420 LUNG1 + 133 RADIO) for prognostication. The dataset was held out from the training data and gathered from several different sources [1, 2, 3, 4].",
+        "tables": [
+            {
+                "label": "Evaluation Tasks & Datasets",
+                "entries": {
+                    "Lesion Anatomical Site Prediction": "DeepLesion (n=1221)",
+                    "Nodule Malignancy Prediction": "LUNA16 (n=170)",
+                    "Tumor Prognostication": "NSCLC-Radiomics (n=420) + NSCLC-Radiogenomics (n=133)"
+                }
+            }
+        ],
+        "references": [
+                {
+                "label": "DeepLesion: automated mining of large-scale lesion annotations and universal lesion detection with deep learning.",
+                "uri": "https://pubmed.ncbi.nlm.nih.gov/30035154/"
+            },
+            {
+                "label": "LUNA16",
+                "uri": "https://www.cancerimagingarchive.net/collection/lidc-idri/"
+            },
+            {
+              "label": "NSCLC-Radiomics",
+              "uri": "https://www.cancerimagingarchive.net/collection/nsclc-radiomics/"
+          },
+          {
+            "label": "NSCLC-Radiogenomics",
+            "uri": "https://www.cancerimagingarchive.net/analysis-result/nsclc-radiogenomics-stanford/"
+          }
+        ]
+    },
+    "training": {
+    "title": "Training Data",
+    "text": "The training dataset consists of 11467 lesions sourced from 5,513 unique CT scans across 2,312 different patients. This was curated from the DeepLesion dataset [1] following two steps - 1) Lesions that did not contain anatomical labels were selected, 2) Scans with spacing 5mm or more were removed.",
+    "references": [
+      {
+        "label": "DeepLesion: automated mining of large-scale lesion annotations and universal lesion detection with deep learning.",
+        "uri": "https://pubmed.ncbi.nlm.nih.gov/30035154/"
+    }
+    ]
+    }
+  }
+}
diff --git a/models/fmcib_radiomics/utils/CentroidExtractor.py b/models/fmcib_radiomics/utils/CentroidExtractor.py
@@ -0,0 +1,43 @@
+"""
+---------------------------------------------------------
+Author: Leonard Nürnberg
+Email:  [email protected]
+Date:   06.03.2024
+---------------------------------------------------------
+"""
+
+import json, jsonschema
+from mhubio.core import Instance, InstanceData, IO, Module
+import SimpleITK as sitk
+
+class CentroidExtractor(Module):
+
+    @IO.Instance()
+    @IO.Input('in_mask', 'nrrd:mod=seg', the='Tumor segmentation mask for the input NRRD file.')
+    @IO.Output('centroids_json', 'centroids.json', "json:type=fmcibcoordinates", the='JSON file containing 3D coordinates of the centroid of the input mask.')
+    def task(self, instance: Instance, in_mask: InstanceData, centroids_json: InstanceData) -> None:
+
+        # read the input mask 
+        mask = sitk.ReadImage(in_mask.abspath)
+
+        # get the center of massk from the mask via ITK
+        label_shape_filter = sitk.LabelShapeStatisticsImageFilter()
+        label_shape_filter.Execute(mask)
+        try:
+            centroid = label_shape_filter.GetCentroid(255)
+        except:
+            centroid = label_shape_filter.GetCentroid(1)
+
+        # extract x, y, and z coordinates from the centroid
+        x, y, z = centroid
+
+        # set up the coordinate dictionary
+        coordinate_dict = {
+            "coordX": x,
+            "coordY": y,
+            "coordZ": z,
+        }
+
+        # write the coordinate dictionary to a json file
+        with open(centroids_json.abspath, "w") as f:
+            json.dump(coordinate_dict, f)
diff --git a/models/fmcib_radiomics/utils/FMCIBRunner.py b/models/fmcib_radiomics/utils/FMCIBRunner.py
@@ -0,0 +1,115 @@
+"""
+---------------------------------------------------------
+Author: Suraj Pai, Leonard Nürnberg 
+Email:  [email protected], [email protected]
+Date:   06.03.2024
+---------------------------------------------------------
+"""
+import json, jsonschema, os
+from mhubio.core import Instance, InstanceData, IO, Module
+
+COORDS_SCHEMA_PATH = os.path.join(os.path.dirname(__file__), "coords.schema.json")
+SLICERMARKUP_SCHEMA_PATH = os.path.join(os.path.dirname(__file__), "slicermarkup.schema.json")
+
+def is_valid(json_data: dict, schema_file_path: str) -> bool:
+    """Check if a json file is valid according to a given schema.
+
+    Args:
+        json_data (dict): The json data to be validated.
+        schema_file_path (str): The path to the schema file.
+
+    Returns:
+        bool: True if the json file is valid according to the schema, False otherwise.
+    """
+    with open(schema_file_path) as f:
+        schema = json.load(f)
+
+    try:
+        jsonschema.validate(json_data, schema)
+        return True
+    except:
+        return False
+
+def get_coordinates(json_file_path: str) -> dict:
+
+    # read json file
+    with open(json_file_path) as f:
+        json_data = json.load(f)
+
+    # check which schema the json file adheres to
+    if is_valid(json_data, COORDS_SCHEMA_PATH):
+        return json_data
+
+    if is_valid(json_data, SLICERMARKUP_SCHEMA_PATH):
+        markups = json_data["markups"]
+
+        assert len(markups) == 1, "Currently, only one point per file is supported."
+        markup = markups[0]
+
+        assert markup["coordinateSystem"] == "LPS"
+
+        controlPoints = markup["controlPoints"]
+        assert len(controlPoints) == 1
+
+        position = controlPoints[0]["position"]
+        return {
+            "coordX": position[0],
+            "coordY": position[1],
+            "coordZ": position[2]
+        }
+
+    #
+    raise ValueError("The input json file does not adhere to the expected schema.")
+
+
+def fmcib(input_dict: dict, json_output_file_path: str):
+    """Run the FCMIB pipeline.
+
+    Args:
+        input_dict (dict): The input dictionary containing the image path and the seed point coordinates.
+        json_output_file_path (str): The path were the features are exported to as a json file.
+    """
+    # model dependency imports
+    import torch
+    from fmcib.models import fmcib_model 
+    from fmcib.preprocessing import preprocess
+
+    # initialize the ResNet50 model with pretrained weights
+    model = fmcib_model()
+
+    # run model preroecessing
+    image = preprocess(input_dict)
+    image = image.unsqueeze(0)
+
+    # run model inference
+    model.eval()
+    with torch.no_grad():
+        features = model(image)
+
+    # generate fearure dictionary
+    feature_dict = {f"feature_{idx}": feature for idx, feature in enumerate(features.flatten().tolist())}
+
+    # write feature dictionary to json file
+    with open(json_output_file_path, "w") as f:
+        json.dump(feature_dict, f)
+
+class FMCIBRunner(Module):
+
+    @IO.Instance()
+    @IO.Input('in_data', 'nrrd:mod=ct', the='Input NRRD file')
+    @IO.Input('centroids_json', "json:type=fmcibcoordinates", the='JSON file containing 3D coordinates of the centroid of the input mask.')
+    @IO.Output('feature_json', 'features.json', "json:type=fmcibfeatures", bundle='model', the='Features extracted from the input image at the specified seed point.')
+    def task(self, instance: Instance, in_data: InstanceData, centroids_json: InstanceData, feature_json: InstanceData) -> None:
+
+        # read centroids from json file
+        coordinates = get_coordinates(centroids_json.abspath)
+
+        # define input dictionary
+        input_dict = {
+            "image_path": in_data.abspath,
+            **coordinates
+        }
+
+
+        # run model
+        fmcib(input_dict, feature_json.abspath)
diff --git a/models/fmcib_radiomics/utils/__init__.py b/models/fmcib_radiomics/utils/__init__.py
@@ -0,0 +1 @@
+from .FMCIBRunner import FMCIBRunner