Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MHub / GC - Add the Node21 baseline model #41

Merged
merged 18 commits into from
Apr 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
a0eac94
added working MHub implementation of the node21 baseline
silvandeleemput Aug 2, 2023
890f2e6
minor modification Node21 output JSON description
silvandeleemput Aug 4, 2023
125f251
Merge branch 'MHubAI:main' into m-gc-node21-baseline
silvandeleemput Sep 14, 2023
b283d7d
Update and clean Node21 model related files
silvandeleemput Sep 14, 2023
4940d9a
Merge branch 'MHubAI:main' into m-gc-node21-baseline
silvandeleemput Oct 10, 2023
75cea2b
fix revert to default workdir
silvandeleemput Oct 10, 2023
8e9ea1c
Merge branch 'm-gc-node21-baseline' of github.com:DIAGNijmegen/MHubAI…
silvandeleemput Oct 10, 2023
8e17ccc
add meta.json
silvandeleemput Oct 23, 2023
9e9c1df
add mhub model definition import to Dockerfile
silvandeleemput Nov 23, 2023
3111827
Merge branch 'MHubAI:main' into m-gc-node21-baseline
silvandeleemput Nov 23, 2023
67fc8b5
Merge branch 'MHubAI:main' into m-gc-node21-baseline
miriam-groeneveld Apr 16, 2024
06e1e25
Updated meta.json
miriam-groeneveld Apr 18, 2024
325e8a5
meta.json - updated version number
silvandeleemput Apr 18, 2024
777ff39
meta.json - changed table entries from float into string
silvandeleemput Apr 18, 2024
5cc1190
Dockerfile - fixed source pull to version tag
silvandeleemput Apr 18, 2024
2bafc52
runner.py - add outputdatas for nodule probabilities and bounding boxes
silvandeleemput Apr 18, 2024
fb17009
wrap algorithm in cli to allow proper capture within mhub framework
silvandeleemput Apr 18, 2024
e27ec43
Dockerfile, meta.json - Updated source to v1.1, moved cli to src dir,…
silvandeleemput Apr 24, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions models/gc_node21_baseline/config/default.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
general:
data_base_dir: /app/data
version: 1.0
description: GC Node21 Baseline nodule prediction from chest X-Rays (dicom to json)

execute:
- DicomImporter
- MhaConverter
- Node21BaselineRunner
- DataOrganizer

modules:
DicomImporter:
source_dir: input_data
import_dir: sorted_data
sort_data: True
meta:
mod: cr

MhaConverter:
engine: panimg

DataOrganizer:
targets:
- json-->[i:sid]/nodules.json
41 changes: 41 additions & 0 deletions models/gc_node21_baseline/dockerfiles/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
FROM mhubai/base:latest

# Update authors label
LABEL authors="[email protected]"

# Install PyTorch 2.0.1 (CUDA enabled)
RUN pip3 install --no-cache-dir torch==2.0.1+cu118 torchvision==0.15.2+cu118 -f https://download.pytorch.org/whl/torch_stable.html

# Install git-lfs (required for unpacking model weights)
RUN apt update && apt install -y --no-install-recommends git-lfs && rm -rf /var/lib/apt/lists/*

# Install node21 baseline algorithm
# - Git clone to tmp directory (main branch, tag v1.1)
# - Extract relevant files to /opt/algorithm/
# - Remove tmp directory
RUN git clone --depth 1 --branch v1.1 https://github.com/node21challenge/node21_detection_baseline.git /tmp/algorithm && \
mkdir -p /opt/algorithm && \
mv /tmp/algorithm/process.py /opt/algorithm/process.py && \
mv /tmp/algorithm/postprocessing.py /opt/algorithm/postprocessing.py && \
mv /tmp/algorithm/training_utils /opt/algorithm/training_utils && \
mv /tmp/algorithm/model.pth /opt/algorithm/model.pth && \
mkdir -p /opt/algorithm/hub/checkpoints && \
mv /tmp/algorithm/resnet50-19c8e357.pth /opt/algorithm/hub/checkpoints/resnet50-19c8e357.pth && \
rm -rf /tmp/algorithm

# Install Node21 additional dependencies
RUN pip3 install --no-cache-dir evalutils==0.2.4

# Import the MHub model definiton
ARG MHUB_MODELS_REPO
RUN buildutils/import_mhub_model.sh gc_node21_baseline ${MHUB_MODELS_REPO}

# Configure app /opt/algorithm to be the torch hub directory to pick up the model weights correctly
ENV TORCH_HOME /opt/algorithm

# Add Node21 source path to the PYTHONPATH to be able to import the processor code
ENV PYTHONPATH "/app:/opt/algorithm"

# Specify default entrypoint
ENTRYPOINT ["python3", "-m", "mhubio.run"]
CMD ["--config", "/app/models/gc_node21_baseline/config/default.yml"]
154 changes: 154 additions & 0 deletions models/gc_node21_baseline/meta.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
{
"id": "37ec076c-6cca-4601-a7fc-bdbe7eecbdcf",
"name": "gc_node21_baseline",
"title": "NODE21 challenge baseline",
"summary": {
"description": "This model detects the location of nodules in Chest radiographs, and generates bounding boxes around these nodules.",
"inputs": [
{
"label": "Chest radiograph",
"description": "Chest radiograph",
"format": "DICOM",
"modality": "CR",
"bodypartexamined": "Chest",
"slicethickness": "",
"non-contrast": false,
"contrast": false
}
],
"outputs": [
{
"type": "Prediction",
"valueType": "JSON",
"label": "Nodule bounding boxes",
"description": "Multiple bounding boxes for the nodule locations for the input chest radiograph. Each bounding box also has an associated nodule likelihood (probability).",
"classes": []
}
],
"model": {
"architecture": "Faster R-CNN",
"training": "supervised",
"cmpapproach": "2D"
},
"data": {
"training": {
"vol_samples": 4882
},
"evaluation": {
"vol_samples": 579
},
"public": false,
"external": false
}
},
"details": {
"name": "NODE21 baseline",
"version": "1.1",
"devteam": "DIAGNijmegen (Diagnostic Image Analysis Group, Radboud UMC, The Netherlands)",
"type": "Faster R-CNN architecture using ResNet50 as the backbone.",
"date": {
"weights": "2021-11-01",
"code": "2022-02-01",
"pub": ""
},
"cite": "E. Sogancioglu et al., Nodule detection and generation on chest X-rays: NODE21 Challenge, in IEEE Transactions on Medical Imaging, doi: 10.1109/TMI.2024.3382042.",
"license": {
"code": "Apache 2.0",
"weights": "Apache 2.0"
},
"publications": [
{
"title": "Nodule detection and generation on chest X-rays: NODE21 Challenge",
"uri": "https://ieeexplore.ieee.org/document/10479589"
}
],
"github": "https://github.com/node21challenge/node21_detection_baseline",
"zenodo": "",
"colab": "",
"slicer": false
},
"info": {
"use": {
"title": "Intended use",
"text": "The algorithm processes a frontal radiograph of the chest and predicts the location and likelihood of nodules.",
"references": [{
"label": "Node21 baseline algorithm on grand-challenge",
"uri": "https://grand-challenge.org/algorithms/node21_baseline_detector/"
}],
"tables": []
},
"analyses": {
"title": "Evaluation",
"text": "The evaluation of this model was performed in two parts, firstly as part of the Node21 challenge [1], and secondly as part of the experiments for the publication. Specifically the AUC score and the sensitivity at various average false positive rates using FROC (1/4, 1/2, 1) were computed. The final metric used to rank the leaderboard will be calculated as follows: rank_metric = (0.75 * AUC) + (0.25 * Sensitivity at 1/4 FP/image) [2]",
"references": [
{
"label": "NODE21 challenge details",
"uri": "https://node21.grand-challenge.org/Details/"
},
{
"label": "NODE21 baseline algorithm evaluation results on grand-challenge.",
"uri": "https://node21.grand-challenge.org/evaluation/a626f004-1c38-45e1-9e35-89ccfb807e2d/"
}
],
"tables": [
{
"label": "Evaluation results on the first NODE21 testing cohort of 281 cases as reported in the NODE21 challenge.",
"entries": {
"AUC": "0.839",
"sensitivity_5": "0.532",
"sensitivity_25": "0.443",
"sensitivity_125": "0.350",
"final_ranking": "0.740"
}
},
{
"label": "Evaluation results on the second NODE21 testing cohort of 298 cases as described in the publication.",
"entries": {
"AUC": "0.816",
"sensitivity_5": "0.714",
"sensitivity_25": "0.635",
"sensitivity_125": "0.504"
}
}
]
},
"evaluation": {
"title": "Evaluation data",
"text": "The model was evaluated with two separate, private datasets [1]. The first dataset consists of 281 frontal chest X-rays, 166 of which are positive (with nodules). The second dataset used in the experiments described in the publication consist of 298 frontal radiographs with or without nodules. They originate from multiple medical centers and have been acquired with multiple different x-ray machines.",
"references": [
{
"label": "NODE21 data section",
"uri": "https://node21.grand-challenge.org/Data/"
}
],
"tables": []
},
"training": {
"title": "Training data",
"text": "The model was trained on the NODE21 training dataset [1] that was preprocessed with the publicly available OpenCXR library [2]. This dataset consist of 4882 radiographs, of which 1476 contain nodules. In order to tackle the data imbalance issue, images with nodules were oversampled until the number of negative images was reached. The model was trained for 30 epochs, and early stopping was used in case of no improvement in the validation set performance for 5 consecutive epochs",
"references": [
{
"label": "NODE21 training data",
"uri": "https://zenodo.org/record/5548363"
},
{
"label": "OpenCXR library",
"uri": "https://github.com/DIAGNijmegen/opencxr"
}
],
"tables": []
},
"ethics": {
"title": "",
"text": "",
"references": [],
"tables": []
},
"limitations": {
"title": "Limitations",
"text": "This algorithm was developed for research purposes only.",
"references": [],
"tables": []
}
}
}
61 changes: 61 additions & 0 deletions models/gc_node21_baseline/src/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
"""
---------------------------------------------------
Mhub / DIAG - CLI for the Node21 baseline Algorithm
The model algorith was wrapped in a CLI to ensure
the mhub framework is able to properly capture
the stdout generated by the algorithm
---------------------------------------------------

---------------------------------------------------
Author: Sil van de Leemput
Email: [email protected]
---------------------------------------------------
"""

import argparse
from pathlib import Path
import json

import SimpleITK

# Import Node21 baseline nodule detection algorithm from the node21_detection_baseline repo
from process import Noduledetection


def run_classifier(input_cxr: Path, output_json_file: Path):
# Read input image
input_image = SimpleITK.ReadImage(str(input_cxr))

# Run nodule detection algorithm on the input image and generate predictions
tmp_path = Path("/app/tmp")
predictions = Noduledetection(input_dir=tmp_path, output_dir=tmp_path).predict(input_image=input_image)

# sort predictions on probability first (descending), corner positions second (ascending)
# this was implemented because the old sorting only sorts on probability (descending) and can give different
# results if two probabilities are the same
predictions["boxes"] = list(
sorted(
predictions["boxes"],
key=lambda x : tuple([-x["probability"]] + [tuple(c) for c in x["corners"]]),
reverse=False
)
)

# Export the predictions to a JSON file
with open(output_json_file, "w") as f:
json.dump(predictions, f, indent=4)


def run_classifier_cli():
parser = argparse.ArgumentParser("CLI to run the Node21 baseline classifier")
parser.add_argument("input_cxr", type=str, help="input CXR image (MHA)")
parser.add_argument("output_json_file", type=str, help="Output nodule bounding boxes and probabilities predictions (JSON)")
args = parser.parse_args()
run_classifier(
input_cxr=Path(args.input_cxr),
output_json_file=Path(args.output_json_file)
)


if __name__ == "__main__":
run_classifier_cli()
82 changes: 82 additions & 0 deletions models/gc_node21_baseline/utils/Node21BaselineRunner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
"""
-------------------------------------------------------------
Mhub / DIAG - Run Module for the GC Node21 baseline Algorithm
-------------------------------------------------------------

-------------------------------------------------------------
Author: Sil van de Leemput
Email: [email protected]
-------------------------------------------------------------
"""
import json
import sys
from pathlib import Path

from mhubio.core import Instance, InstanceData, IO, Module, Meta, ValueOutput, OutputDataCollection


CLI_PATH = Path(__file__).parent.parent.absolute() / "src" / "cli.py"


@ValueOutput.Name('noduleprob')
@ValueOutput.Label('Nodule probability score.')
@ValueOutput.Meta(Meta(min=0.0, max=1.0, type="probability"))
@ValueOutput.Type(float)
@ValueOutput.Description('The predicted probability for a single lung nodule detected by the Node21Baseline algorithm.')
class NoduleProbability(ValueOutput):
pass


@ValueOutput.Name('nodulebbox')
@ValueOutput.Label('Nodule 2D bounding box.')
@ValueOutput.Meta(Meta(format='json'))
@ValueOutput.Type(str)
@ValueOutput.Description('The predicted 2D bounding box for a single lung nodule detected by the Node21Baseline algorithm.')
class NoduleBoundingBox(ValueOutput):
pass


class Node21BaselineRunner(Module):

@IO.Instance()
@IO.Input('in_data', 'mha|nrrd|nifti:mod=cr', the='input chest X-Ray')
@IO.Output('out_data', 'nodules.json', 'json:model=Node21Baseline', 'in_data', the='Node21 baseline nodule prediction in JSON format')
@IO.OutputDatas('nodule_probs', NoduleProbability)
@IO.OutputDatas('nodule_bounding_boxes', NoduleBoundingBox)
def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData, nodule_probs: OutputDataCollection, nodule_bounding_boxes: OutputDataCollection) -> None:
# build command (order matters!)
cmd = [
sys.executable,
str(CLI_PATH),
in_data.abspath,
out_data.abspath
]

# run the command as subprocess
self.subprocess(cmd, text=True)

# Confirm the expected output file was generated
if not Path(out_data.abspath).is_file():
raise FileNotFoundError(f"Node21BaseLineRunner - Could not find the expected "
f"output file: {out_data.abspath}, something went wrong running the CLI.")

# Read the predictions to a JSON file
with open(out_data.abspath, "r") as f:
predictions = json.load(f)

# Export the relevant data
for nodule_idx, box in enumerate(predictions["boxes"]):
probability, corners = box["probability"], box["corners"]

nodule_prob = NoduleProbability()
nodule_prob.description += f" (for nodule {nodule_idx})"
nodule_prob.meta = Meta(id=nodule_idx, min=0.0, max=1.0, type="probability")
nodule_prob.value = probability

nodule_bounding_box = NoduleBoundingBox()
nodule_bounding_box.description += f" (for nodule {nodule_idx})"
nodule_bounding_box.meta = Meta(id=nodule_idx, format="json")
nodule_bounding_box.value = json.dumps(corners)

nodule_probs.add(nodule_prob)
nodule_bounding_boxes.add(nodule_bounding_box)
1 change: 1 addition & 0 deletions models/gc_node21_baseline/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .Node21BaselineRunner import *
Loading