-
Notifications
You must be signed in to change notification settings - Fork 16
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Added first working FMCIB model container #73
Changes from 2 commits
a52b616
c864936
351eff8
4cf674d
fcb7f63
0a255ed
df1d8ba
a50b345
f4618e2
21206b4
4e1525c
c073f80
6243b98
a184cfe
94d46a8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
general: | ||
data_base_dir: /app/data | ||
version: 1.0 | ||
description: "FMCIB pipeline" | ||
|
||
execute: | ||
- FileStructureImporter | ||
- FMCIBRunner | ||
- DataOrganizer | ||
|
||
modules: | ||
FileStructureImporter: | ||
structures: | ||
- $patientID/CT.nrrd@instance@nrrd:mod=ct | ||
- $patientID/masks/GTV-1.nrrd@nrrd | ||
import_id: patientID | ||
|
||
DataOrganizer: | ||
targets: | ||
- json-->[i:patientID]/features.json |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
FROM mhubai/base:latest | ||
|
||
LABEL authors="[email protected]" | ||
|
||
ARG MHUB_MODELS_REPO | ||
# Add pull models repo command here after local testingRUN | ||
RUN buildutils/import_mhub_model.sh fmcib_radiomics ${MHUB_MODELS_REPO} | ||
RUN wget https://zenodo.org/records/10528450/files/model_weights.torch?download=1 -O /app/model_weights.torch | ||
|
||
|
||
RUN mkdir models | ||
RUN mkdir models/fmcib | ||
surajpaib marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
# Install FMCIB package, should install everything else ... | ||
RUN pip install foundation-cancer-image-biomarker --pre | ||
|
||
|
||
|
||
ENTRYPOINT ["python3", "-m", "mhubio.run"] | ||
CMD ["--workflow", "default"] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
{ | ||
"id": "...", | ||
"name": "fmcib_radiomics", | ||
"title": "Foundation Model for Cancer Imaging Biomarkers", | ||
"summary": { | ||
"description": "A foundation model for cancer imaging biomarker discovery trained through self-supervised learning using a dataset of 11,467 radiographic lesions. The model features can be used as a data-driven substitute for classical radiomic features", | ||
"inputs": [ | ||
{ | ||
"label": "Input CT Image", | ||
"description": "CT imaging data containing lesions of interest, such as nodules or tumors", | ||
"format": "DICOM", | ||
"modality": "CT", | ||
"slicethickness": "5mm", | ||
"bodypartexamined": "Whole", | ||
"non-contrast": true, | ||
"contrast": true | ||
}, | ||
{ | ||
"label": "Center of mass", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This doesn't match with the current default workflow but we can address this later and decide which of the two (json / mask) we want to keep as the default. I'd choose whatever is the easiest and most standard one. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I provided an alternative workflow that can start from the JSON file. I've some ideas on how we could provide a workflow starting from Dicom (which should be the default then) that I want to discuss later! |
||
"description": "Center of mass of the lesion in the CT image", | ||
"format": "JSON", | ||
"modality": "JSON", | ||
"slicethickness": "5mm", | ||
"bodypartexamined": "Whole", | ||
"non-contrast": true, | ||
"contrast": true | ||
} | ||
], | ||
"outputs": [ | ||
{ | ||
"type": "Prediction", | ||
"valueType": "Feature vector", | ||
"description": "A set of features extracted from the input CT image", | ||
"label": "Features" | ||
|
||
} | ||
], | ||
"model": { | ||
"architecture": "3D ResNet50", | ||
"training": "other", | ||
"cmpapproach": "3D" | ||
}, | ||
"data": { | ||
"training": { | ||
"vol_samples": 11467 | ||
}, | ||
"evaluation": { | ||
"vol_samples": 1944 | ||
}, | ||
"public": true, | ||
"external": true | ||
} | ||
}, | ||
"details": { | ||
"name": "Foundation Model for Cancer Imaging Biomarkers", | ||
"version": "0.0.1", | ||
"type": "Feature extractor", | ||
"devteam": "Researchers from the Artificial Intelligence in Medicine (AIM) Program, Mass General Brigham, Harvard Medical School and other institutions", | ||
"date": { | ||
"pub": "2023 (preprint)", | ||
"code": "n/a", | ||
"weights": "18.01.2024" | ||
}, | ||
"cite": "Pai, S., Bontempi, D., Hadzic, I., Prudente, V., et al. Foundation Model for Cancer Imaging Biomarkers. 2023.", | ||
"license": { | ||
"code": "MIT", | ||
"weights": "CC BY-NC 4.0" | ||
}, | ||
"publications": [ | ||
{ | ||
"title": "Foundation Model for Cancer Imaging Biomarkers", | ||
"uri": "https://www.medrxiv.org/content/10.1101/2023.09.04.23294952v1" | ||
} | ||
], | ||
"github": "https://github.com/AIM-Harvard/foundation-cancer-image-biomarker", | ||
"zenodo": "https://zenodo.org/records/10528450", | ||
"colab": "https://colab.research.google.com/drive/1JMtj_4W0uNPzrVnM9EpN1_xpaB-5KC1H?usp=sharing", | ||
"slicer": false | ||
}, | ||
"info": { | ||
"use": { | ||
"title": "Intended Use", | ||
"text": "The foundation model is intended to extract features from several different types of lesions (lung, liver, kidney, mediastinal, abdominal, pelvic, bone and soft tissue). These features can be used for a variety of predictive and clustering tasks as a data-driven substitute for classical radiomic features." | ||
}, | ||
"analyses": { | ||
"title": "Quantitative Analyses", | ||
"text": "The model's performance was assessed using three different downstream tasks, including malignancy prediction and lung cancer risk prediction. Refer to the publication for more details [1].", | ||
"references": [ | ||
{ | ||
"label": "Foundation model for cancer image biomarkers", | ||
"uri": "https://www.medrxiv.org/content/10.1101/2023.09.04.23294952v1" | ||
} | ||
] | ||
}, | ||
"evaluation": { | ||
"title": "Evaluation Data", | ||
"text": "The evaluation dataset consists of 1,944 lesions, including 1,221 lesions for anatomical site classification, 170 nodules for malignancy prediction, and 553 tumors (420 LUNG1 + 133 RADIO) for prognostication. The dataset was held out from the training data and gathered from several different sources [1, 2, 3, 4].", | ||
"tables": [ | ||
{ | ||
"label": "Evaluation Tasks & Datasets", | ||
"entries": { | ||
"Lesion Anatomical Site Prediction": "DeepLesion (n=1221)", | ||
"Nodule Malignancy Prediction": "LUNA16 (n=170)", | ||
"Tumor Prognostication": "NSCLC-Radiomics (n=420) + NSCLC-Radiogenomics (n=133)" | ||
} | ||
} | ||
], | ||
"references": [ | ||
{ | ||
"label": "DeepLesion: automated mining of large-scale lesion annotations and universal lesion detection with deep learning.", | ||
"uri": "https://pubmed.ncbi.nlm.nih.gov/30035154/" | ||
}, | ||
{ | ||
"label": "LUNA16", | ||
"uri": "https://www.cancerimagingarchive.net/collection/lidc-idri/" | ||
}, | ||
{ | ||
"label": "NSCLC-Radiomics", | ||
"uri": "https://www.cancerimagingarchive.net/collection/nsclc-radiomics/" | ||
}, | ||
{ | ||
"label": "NSCLC-Radiogenomics", | ||
"uri": "https://www.cancerimagingarchive.net/analysis-result/nsclc-radiogenomics-stanford/" | ||
} | ||
] | ||
}, | ||
"training": { | ||
"title": "Training Data", | ||
"text": "The training dataset consists of 11467 lesions sourced from 5,513 unique CT scans across 2,312 different patients. This was curated from the DeepLesion dataset [1] following two steps - 1) Lesions that did not contain anatomical labels were selected, 2) Scans with spacing 5mm or more were removed.", | ||
"references": [ | ||
{ | ||
"label": "DeepLesion: automated mining of large-scale lesion annotations and universal lesion detection with deep learning.", | ||
"uri": "https://pubmed.ncbi.nlm.nih.gov/30035154/" | ||
} | ||
] | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
""" | ||
--------------------------------------------------------- | ||
Author: Suraj Pia | ||
Email: [email protected] | ||
--------------------------------------------------------- | ||
""" | ||
|
||
import json | ||
import torch | ||
from fmcib.models import fmcib_model | ||
import SimpleITK as sitk | ||
from mhubio.core import Instance, InstanceData, IO, Module | ||
from fmcib.preprocessing import preprocess | ||
|
||
|
||
class FMCIBRunner(Module): | ||
@IO.Instance() | ||
@IO.Input('in_data', 'nrrd:mod=ct', the='Input NRRD file') | ||
@IO.Input('in_mask', 'nrrd|json', the='Tumor mask for the input NRRD file') | ||
@IO.Output('feature_json', 'features.json', "json", bundle='model', the='output JSON file') | ||
def task(self, instance: Instance, in_data: InstanceData, in_mask: InstanceData, feature_json: InstanceData) -> None: | ||
mask_path = in_mask.abspath | ||
mask = sitk.ReadImage(mask_path) | ||
|
||
# Get the CoM of the mask | ||
label_shape_filter = sitk.LabelShapeStatisticsImageFilter() | ||
label_shape_filter.Execute(mask) | ||
try: | ||
centroid = label_shape_filter.GetCentroid(255) | ||
except: | ||
centroid = label_shape_filter.GetCentroid(1) | ||
|
||
x, y, z = centroid | ||
|
||
input_dict = { | ||
"image_path": in_data.abspath, | ||
"coordX": x, | ||
"coordY": y, | ||
"coordZ": z, | ||
} | ||
|
||
image = preprocess(input_dict) | ||
surajpaib marked this conversation as resolved.
Show resolved
Hide resolved
|
||
image = image.unsqueeze(0) | ||
model = fmcib_model() | ||
|
||
model.eval() | ||
with torch.no_grad(): | ||
features = model(image) | ||
|
||
feature_dict = {f"feature_{idx}": feature for idx, feature in enumerate(features.flatten().tolist())} | ||
|
||
with open(feature_json.abspath, "w") as f: | ||
json.dump(feature_dict, f) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from .FMCIBRunner import FMCIBRunner |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe we choose a distinct file name or place the weights inside a folder to keep us the option to add then later successive models and have all their weights organized. Let's spend a minute thinking about this.