diff --git a/.github/schemas/meta.schema.json b/.github/schemas/meta.schema.json index 245ab7ea..47a93327 100644 --- a/.github/schemas/meta.schema.json +++ b/.github/schemas/meta.schema.json @@ -298,6 +298,7 @@ }, "version": { "type": "string", + "pattern": "^\\d+(\\.\\d+)*$", "description": "The version of the model." }, "devteam": { diff --git a/.github/schemas/testmodel.schema.json b/.github/schemas/testmodel.schema.json new file mode 100644 index 00000000..3ef4692e --- /dev/null +++ b/.github/schemas/testmodel.schema.json @@ -0,0 +1,77 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "properties": { + "sample": { + "type": "object", + "properties": { + "idc_version": { + "type": ["string", "number"] + }, + "data": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "properties": { + "SeriesInstanceUID": { + "type": "string", + "pattern": "^[\\d\\.]+$" + }, + "aws_url": { + "type": "string", + "pattern": "^s3://[\\w\\-/]+/\\*$" + }, + "url": { + "type": "string", + "format": "uri" + }, + "description": { + "type": "string" + }, + "path": { + "type": "string", + "pattern": "^[^\\/][\\w\\-\\.\\/]+$" + } + }, + "oneOf": [ + { + "required": [ + "SeriesInstanceUID", + "aws_url", + "path" + ] + }, + { + "required": [ + "url", + "path" + ] + } + ] + } + } + }, + "required": [ + "idc_version", + "data" + ] + }, + "reference": { + "type": "object", + "properties": { + "url": { + "type": "string", + "format": "uri" + } + }, + "required": [ + "url" + ] + } + }, + "required": [ + "sample", + "reference" + ] +} \ No newline at end of file diff --git a/.github/scripts/comment_check.py b/.github/scripts/comment_check.py new file mode 100644 index 00000000..3f1fd569 --- /dev/null +++ b/.github/scripts/comment_check.py @@ -0,0 +1,81 @@ +import sys, os, yaml, json, jsonschema + +YAML_TEST_DEFINITION_SCHEMA_FILE = ".github/schemas/testmodel.schema.json" + +def extract_yaml_test_definition(comment: str): + + # find a code block starting with ```yaml and ending with ``` + start = comment.find("```yaml") + end = comment.find("```", start + 1) + if start == -1 or end == -1: + raise Exception("No YAML code block found in comment") + + # extract the code block + yaml_code = comment[start:end] + + # remove the code block markers + yaml_code = yaml_code.replace("```yaml", "").strip() + + return yaml_code + +def validate_yaml_test_definition(yaml_str: str): + + # load yaml into dict + test_definition = yaml.safe_load(yaml_str) + + # load schema + with open(YAML_TEST_DEFINITION_SCHEMA_FILE, "r") as f: + schema = json.load(f) + + # validate + jsonschema.validate(test_definition, schema) + + +def set_action_output(output_name, value) : + """Sets the GitHub Action output. + + Keyword arguments: + output_name - The name of the output + value - The value of the output + """ + if "GITHUB_OUTPUT" in os.environ : + with open(os.environ["GITHUB_OUTPUT"], "a") as f : + print("{0}={1}".format(output_name, value), file=f) + + +if __name__ == "__main__": + + try: + # get comment body from first argument + comment = sys.argv[1] + + # print comment + print(f"Comment ----------------------") + print(comment) + print() + + # extract yaml test definition + yaml_str = extract_yaml_test_definition(comment) + + # validate yaml test definition + validate_yaml_test_definition(yaml_str) + + # print yaml + print(f"Test Definition --------------") + print(yaml_str) + print() + + # print success message + print("YAML test definition is valid") + + # set environment variable for following steps + set_action_output("test_report", "passed") + + except Exception as e: + # set environment variable for following steps + set_action_output("test_report", "failed") + + # print error message + print("YAML test definition is invalid") + print(e) + diff --git a/.github/scripts/mhub_check.py b/.github/scripts/mhub_check.py index 6a1c0c2e..a53c3c70 100644 --- a/.github/scripts/mhub_check.py +++ b/.github/scripts/mhub_check.py @@ -34,9 +34,12 @@ # check folder structure utils.validateModelFolder(base='models', model_name=model_name) - # check meta.json + # check meta.json (schema) utils.validateModelMetaJson(model_meta_json_file=os.path.join('models', model_name, 'meta.json')) + # check additional requirements for meta.json + utils.validateModelMetaJson_modelName(model_meta_json_file=os.path.join('models', model_name, 'meta.json'), model_name=model_name) + # validate dockerfile utils.validateDockerfile(base='models', model_name=model_name) @@ -52,6 +55,7 @@ print() print("---------------- CHECK FAILED ----------------") print("An unexpected error occured during compliance checks.") + print(str(e)) print() sys.exit(1) @@ -60,4 +64,4 @@ print("---------------- CHECK PASSED ----------------") print("All compliance checks passed.") print("Note: compliance checks are a beta feature. Passing all automated compliance checks does not guarantee that your model is compliant with the MHub standard. We will now perform a manual review of your model. Testing your model on a public dataset is obligatory.") -print() \ No newline at end of file +print() diff --git a/.github/scripts/utils.py b/.github/scripts/utils.py index eb0eddb6..6da3e60c 100644 --- a/.github/scripts/utils.py +++ b/.github/scripts/utils.py @@ -110,6 +110,16 @@ def validateModelMetaJson(model_meta_json_file: str): except jsonschema.ValidationError as e: raise MHubComplianceError(f"Model meta json is not compliant with the schema: {e.message}", DocuRef.MODEL_META_JSON) +def validateModelMetaJson_modelName(model_meta_json_file: str, model_name: str): + + # load model meta json + with open(model_meta_json_file, "r") as f: + model_meta_json = json.load(f) + + # check that the model name is correct + if model_meta_json["name"] != model_name: + raise MHubComplianceError(f"Model name in meta.json does not match model name in folder structure: {model_meta_json['name']} != {model_name}", DocuRef.MODEL_META_JSON) + def validateDockerfile(base: str, model_name: str): # get dockerfile path diff --git a/.github/workflows/submission_review.yml b/.github/workflows/submission_review.yml new file mode 100644 index 00000000..81496258 --- /dev/null +++ b/.github/workflows/submission_review.yml @@ -0,0 +1,77 @@ +name: MHub Contribution Magic Keywords + +on: + issue_comment: + types: [created, edited] + +permissions: + pull-requests: write + +jobs: + request_review: + if: ${{ github.event.issue.pull_request && startsWith(github.event.comment.body, '/review')}} + name: Request Review + runs-on: [ubuntu-latest] + + steps: + - name: Add Request Review Label + uses: actions-ecosystem/action-add-labels@v1 + with: + labels: REQUEST REVIEW + + request_test: + if: ${{ github.event.issue.pull_request && startsWith(github.event.comment.body, '/test') }} + name: Request Test + runs-on: [ubuntu-latest] + + steps: + + # Checkout the latest code from the repo + - name: Checkout repo + uses: actions/checkout@v4 + + # Setup which version of Python to use + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: 3.8 + + # install python dependencies + - name: Install Python dependencies + run: | + python -m pip install --upgrade pip + pip install jsonschema PyYAML + + # Display the Python version being used + - name: Display Python version + run: python -c "import sys; print(sys.version)" + + - name: Run check comment script + id: check_comment + env: + COMMENT: ${{ github.event.comment.body }} + run: python .github/scripts/comment_check.py "$COMMENT" + + - name: Add TEST REQUESTED Label + uses: actions-ecosystem/action-add-labels@v1 + if: ${{ steps.check_comment.outputs.test_report == 'passed' }} + with: + labels: TEST REQUESTED + + - name: Remove INVALID TEST REQUEST Label + uses: actions-ecosystem/action-remove-labels@v1 + if: ${{ steps.check_comment.outputs.test_report == 'passed' }} + with: + labels: INVALID TEST REQUEST + + - name: Remove TEST REQUESTED Label + uses: actions-ecosystem/action-remove-labels@v1 + if: ${{ steps.check_comment.outputs.test_report == 'failed' }} + with: + labels: TEST REQUESTED + + - name: Add INVALID TEST REQUEST Label + uses: actions-ecosystem/action-add-labels@v1 + if: ${{ steps.check_comment.outputs.test_report == 'failed' }} + with: + labels: INVALID TEST REQUEST diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..45181a18 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 MHub.ai + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/base/bin/mhub.version b/base/bin/mhub.version index 9c161feb..da91b5e5 100755 --- a/base/bin/mhub.version +++ b/base/bin/mhub.version @@ -15,7 +15,7 @@ echo -e "segdb==${SEGDB}" if [ -d "$MODEL_FOLDER" ]; then # model repo commit - MODEL=$(git -C $MODEL_FOLDER show -s | grep commit | cut -d" " -f 2) + { MODEL=$(< /app/buildutils/model_commit_hash.txt); } 2> /dev/null echo -e "model==${MODEL}" echo -e "+++" @@ -24,8 +24,9 @@ if [ -d "$MODEL_FOLDER" ]; then for mdir in /app/models/* ; do MVERSION=$(jq -r '.details.version' ${mdir}/meta.json) echo -e "mhub.$(basename ${mdir})==${MVERSION}" - echo -e "+++" done + + echo -e "+++" else echo -e "+++" echo -e " " @@ -36,4 +37,21 @@ fi # pip freeze without segdb and mhubio (already on top of the lists, # since for now they are commits). Ideally, this should return only pip versions # (although some package might be installed from git by contributors) -pip freeze | grep -v "segdb" | grep -v "mhubio" \ No newline at end of file +pip freeze | grep -v "segdb" | grep -v "mhubio" + +# collect additional information on installed system dependencies. +# to allow contributors to include additional dependencies, we should use a environment variable or a file instead. + +# versions of python, pip, plastimatch, jq, git, libopenslide-dev, libvips-dev, dcm2niix, ffmpeg, libsm6, libxext6 +# echo -e "+++" +# echo -e "python==$(python3 --version 2>&1)" +# echo -e "pip==$(pip --version 2>&1)" +# echo -e "plastimatch==$(plastimatch --version 2>&1)" +# echo -e "jq==$(jq --version 2>&1)" +# echo -e "git==$(git --version 2>&1)" +# echo -e "libopenslide-dev==$(dpkg -s libopenslide-dev | grep Version)" +# echo -e "libvips-dev==$(dpkg -s libvips-dev | grep Version)" +# echo -e "dcm2niix==$(dcm2niix -h | grep "dcm2niiX version" | cut -d"v" -f3)" +# echo -e "ffmpeg==$(ffmpeg -version 2>&1 | grep ffmpeg | cut -d" " -f3)" +# echo -e "libsm6==$(dpkg -s libsm6 | grep Version)" +# echo -e "libxext6==$(dpkg -s libxext6 | grep Version)" diff --git a/base/buildutils/import_mhub_model.sh b/base/buildutils/import_mhub_model.sh index 7968b452..2bdd6f5c 100755 --- a/base/buildutils/import_mhub_model.sh +++ b/base/buildutils/import_mhub_model.sh @@ -40,4 +40,16 @@ git init git fetch ${REPO_URL} ${REPO_BRANCH} git merge FETCH_HEAD git sparse-checkout set "models/${MODEL_NAME}" + +# get the commit hash, store it in a file and print it out +MODEL_COMMIT_HASH=$(git rev-parse HEAD) +echo ${MODEL_COMMIT_HASH} > buildutils/model_commit_hash.txt + +# print models commit +echo +echo "Imported model definition from MHub models repository." +echo "└── COMMIT HASH .... ${MODEL_COMMIT_HASH}" +echo + +# remove the .git folder rm -r .git \ No newline at end of file diff --git a/base/dockerfiles/Dockerfile b/base/dockerfiles/Dockerfile index ba55e8f2..9b916203 100644 --- a/base/dockerfiles/Dockerfile +++ b/base/dockerfiles/Dockerfile @@ -47,6 +47,7 @@ RUN pip3 install --upgrade pip && pip3 install --no-cache-dir \ panimg \ pydicom \ pydicom-seg \ + highdicom \ rt_utils \ PyYAML \ pyplastimatch \ diff --git a/models/casust/meta.json b/models/casust/meta.json new file mode 100644 index 00000000..b007c70c --- /dev/null +++ b/models/casust/meta.json @@ -0,0 +1,102 @@ +{ + "id": "abaa7929-b02c-422f-8c97-7e4217d63487", + "name": "casust", + "title": "CaSuSt", + "summary": { + "description": "A deep learning model for cardiac sub-structure delineation on planning CT scans. The model delineates the heart contours and seven cardiac substructures based on individually trained binary models.", + "inputs": [ { + "label": "Input Image", + "description": "The planning chest CT scan of a RT patient.", + "format": "DICOM", + "modality": "CT", + "bodypartexamined": "Chest", + "slicethickness": "2.5mm", + "non-contrast": true, + "contrast": false + } ], + "outputs": [ { + "type": "Segmentation", + "classes": [ + "HEART", + "LEFT_VENTRICLE", + "RIGHT_VENTRICLE", + "LEFT_ATRIUM", + "RIGHT_ATRIUM", + "CORONARY_ARTERY_LAD", + "CORONARY_ARTERY_CFLX", + "CORONARY_ARTERY_RIGHT" + ] + } ], + "model": { + "architecture": "Seven individual binary U-Net models", + "training": "supervised", + "cmpapproach": "2D" + }, + "data": { + "training": { + "vol_samples": 126 + }, + "evaluation": { + "vol_samples": 22 + }, + "public": false, + "external": false + } + }, + "details": { + "name": "Cardiac Substructure Delineation", + "version": "1.0.0", + "devteam": "Leonard N\u00fcrnberg, MAASTRO Clinic, Clinical Data Science Radiotherapie", + "type": "Individual 2D binary U-Net models", + "date": { + "weights": "22/03/02", + "code": "22/03/02", + "pub": "22/06/23" + }, + "cite": "N\u00fcrnberg, L, Bontempi, D, De Ruysscher, D, et al. Deep learning segmentation of heart substructures in radiotherapy treatment planning. Physica Medica: European journal of medical physics, 2022", + "license": { + "code": "Apache 2.0", + "weights": "Apache 2.0" + }, + "publications": [ + { + "title": "Deep learning segmentation of heart substructures in radiotherapy treatment planning", + "uri": "https:\/\/cris.maastrichtuniversity.nl\/en\/publications\/deep-learning-segmentation-of-heart-substructures-in-radiotherapy" + } + ], + "github": "https:\/\/github.com\/LennyN95\/CaSuSt", + "slicer": true + }, + "info": { + "use": { + "title": "Intended Use", + "text": "This model is intended to segment the heart and seven substructures (left ventricle, right ventricle, left atrium, right atrium, coronary artery LAD, coronary artery CFLX, coronary artery right) on planning CT scans of radiotherapy patients. For each substructure, an individually model has been trained. The model is intended to be used in the context of radiotherapy treatment planning, to support the delineation of the heart and its substructures and has been validated by two radiation oncologists." + }, + "analyses": { + "title": "Quantitative Analyses", + "text": "The model's performance was assessed against test data and compared to human readers using the surface Dice score with a 5mm tolerance for the four larger structures (ventricles and atria) and a 3mm tolerance for the three smaller structures (vessels).", + "tables": [ + { + "label": "Mean Surface Dice Score", + "entries": { + "LEFT_VENTRICLE": "0.88 ± 0.07", + "RIGHT_VENTRICLE": "0.83 ± 0.08", + "LEFT_ATRIUM": "0.88 ± 0.09", + "RIGHT_ATRIUM": "0.87 ± 0.09", + "CORONARY_ARTERY_LAD": "0.7 ± 0.16", + "CORONARY_ARTERY_CFLX": "0.56 ± 0.27", + "CORONARY_ARTERY_RIGHT": "0.48 ± 0.18" + } + } + ] + }, + "evaluation": { + "title": "Evaluation Data", + "text": "The model was evaluated on a 15% split of the dataset." + }, + "training": { + "title": "Training Data", + "text": "A dataset was provided by the University Hospital of Turin, Italy. The dataset contains a single pCT scan for 80 lung cancer patients and 80 lymphoma patients, resulting in a total of 160 fully annotated pCT scans in DICOM format. The ground truth delineations were performed by multiple radiation oncologists and residents with a different level of expertise (range 1-12 years). To the best of our knowledge, no standard protocol was followed. The most common spacing among 100 of all scans was 1.171875 x 1.171875 x 3.0. All training samples have been re-sampled to this precision." + } + } +} diff --git a/models/fmcib_radiomics/config/default.yml b/models/fmcib_radiomics/config/default.yml new file mode 100644 index 00000000..297a8a14 --- /dev/null +++ b/models/fmcib_radiomics/config/default.yml @@ -0,0 +1,22 @@ +general: + data_base_dir: /app/data + version: 1.0 + description: FMCIB pipeline starting from DICOM files and centroids in json files or slicer exports named by their SeriesInstanceUID + +execute: +- DicomImporter +- FileImporter +- NiftiConverter +- FMCIBRunner +- DataOrganizer + +modules: + + FileImporter: + instance_id: sid + meta: type=fmcibcoordinates + type: json + + DataOrganizer: + targets: + - json:type=fmcibfeatures-->[i:sid]/features.json \ No newline at end of file diff --git a/models/fmcib_radiomics/config/from_centroids.yml b/models/fmcib_radiomics/config/from_centroids.yml new file mode 100644 index 00000000..462fc8b4 --- /dev/null +++ b/models/fmcib_radiomics/config/from_centroids.yml @@ -0,0 +1,20 @@ +general: + data_base_dir: /app/data + version: 1.0 + description: "FMCIB pipeline starting from a coordinate json file" + +execute: +- FileStructureImporter +- FMCIBRunner +- DataOrganizer + +modules: + FileStructureImporter: + structures: + - $patientID/CT.nrrd@instance@nrrd:mod=ct + - $patientID/centroids.json@json:type=fmcibcoordinates + import_id: patientID + + DataOrganizer: + targets: + - json:type=fmcibfeatures-->[i:patientID]/features.json \ No newline at end of file diff --git a/models/fmcib_radiomics/config/from_nrrd_mask.yml b/models/fmcib_radiomics/config/from_nrrd_mask.yml new file mode 100644 index 00000000..22644ffc --- /dev/null +++ b/models/fmcib_radiomics/config/from_nrrd_mask.yml @@ -0,0 +1,21 @@ +general: + data_base_dir: /app/data + version: 1.0 + description: "FMCIB pipeline starting from a nrrd file image and a nnrd binary mask of the GTV." + +execute: +- FileStructureImporter +- CentroidExtractor +- FMCIBRunner +- DataOrganizer + +modules: + FileStructureImporter: + structures: + - $patientID/CT.nrrd@instance@nrrd:mod=ct + - $patientID/masks/GTV-1.nrrd@nrrd:mod=seg + import_id: patientID + + DataOrganizer: + targets: + - json:type=fmcibfeatures-->[i:patientID]/features.json \ No newline at end of file diff --git a/models/fmcib_radiomics/config/from_slicer.yml b/models/fmcib_radiomics/config/from_slicer.yml new file mode 100644 index 00000000..1c5682a9 --- /dev/null +++ b/models/fmcib_radiomics/config/from_slicer.yml @@ -0,0 +1,20 @@ +general: + data_base_dir: /app/data + version: 1.0 + description: "FMCIB pipeline" + +execute: +- FileStructureImporter +- FMCIBRunner +- DataOrganizer + +modules: + FileStructureImporter: + structures: + - $patientID@instance/re:^.*\.nrrd$::@nrrd:mod=ct + - $patientID/re:^.*\.json$::@json:type=fmcibcoordinates + import_id: patientID + + DataOrganizer: + targets: + - json:type=fmcibfeatures-->[i:patientID]/features.json \ No newline at end of file diff --git a/models/fmcib_radiomics/dockerfiles/Dockerfile b/models/fmcib_radiomics/dockerfiles/Dockerfile new file mode 100644 index 00000000..54059428 --- /dev/null +++ b/models/fmcib_radiomics/dockerfiles/Dockerfile @@ -0,0 +1,21 @@ +FROM mhubai/base:latest + +LABEL authors="bspai@bwh.harvard.edu,lnuernberg@bwh.harvard.edu" + +# download model weights +RUN wget https://zenodo.org/records/10528450/files/model_weights.torch?download=1 -O /app/model_weights.torch + +# clone mhub implementation +ARG MHUB_MODELS_REPO +RUN buildutils/import_mhub_model.sh fmcib_radiomics ${MHUB_MODELS_REPO} + + +# Install additional pip packages +RUN pip3 install --upgrade pip && pip3 install --no-cache-dir \ + jsonschema==4.21.1 + +# Install FMCIB package, should install everything else ... +RUN pip3 install foundation-cancer-image-biomarker --pre + +ENTRYPOINT ["mhub.run"] +CMD ["--workflow", "default"] diff --git a/models/fmcib_radiomics/meta.json b/models/fmcib_radiomics/meta.json new file mode 100644 index 00000000..ba3f4087 --- /dev/null +++ b/models/fmcib_radiomics/meta.json @@ -0,0 +1,137 @@ +{ + "id": "26e98e14-b605-4007-bd8b-79d517c935b5", + "name": "fmcib_radiomics", + "title": "Foundation Model for Cancer Imaging Biomarkers", + "summary": { + "description": "A foundation model for cancer imaging biomarker discovery trained through self-supervised learning using a dataset of 11,467 radiographic lesions. The model features can be used as a data-driven substitute for classical radiomic features", + "inputs": [ + { + "label": "Input CT Image", + "description": "CT imaging data containing lesions of interest, such as nodules or tumors", + "format": "DICOM", + "modality": "CT", + "slicethickness": "5mm", + "bodypartexamined": "Whole", + "non-contrast": true, + "contrast": true + }, + { + "label": "Center of mass", + "description": "Center of mass of the lesion in the CT image", + "format": "JSON", + "modality": "JSON", + "slicethickness": "5mm", + "bodypartexamined": "Whole", + "non-contrast": true, + "contrast": true + } + ], + "outputs": [ + { + "type": "Prediction", + "valueType": "Feature vector", + "description": "A set of features extracted from the input CT image", + "label": "Features" + } + ], + "model": { + "architecture": "3D ResNet50", + "training": "other", + "cmpapproach": "3D" + }, + "data": { + "training": { + "vol_samples": 11467 + }, + "evaluation": { + "vol_samples": 1944 + }, + "public": true, + "external": true + } + }, + "details": { + "name": "Foundation Model for Cancer Imaging Biomarkers", + "version": "0.0.1", + "type": "Feature extractor", + "devteam": "Researchers from the Artificial Intelligence in Medicine (AIM) Program, Mass General Brigham, Harvard Medical School and other institutions", + "date": { + "pub": "2023 (preprint)", + "code": "n/a", + "weights": "18.01.2024" + }, + "cite": "Pai, S., Bontempi, D., Hadzic, I., Prudente, V., et al. Foundation Model for Cancer Imaging Biomarkers. 2023.", + "license": { + "code": "MIT", + "weights": "CC BY-NC 4.0" + }, + "publications": [ + { + "title": "Foundation Model for Cancer Imaging Biomarkers", + "uri": "https://www.medrxiv.org/content/10.1101/2023.09.04.23294952v1" + } + ], + "github": "https://github.com/AIM-Harvard/foundation-cancer-image-biomarker", + "zenodo": "https://zenodo.org/records/10528450", + "colab": "https://colab.research.google.com/drive/1JMtj_4W0uNPzrVnM9EpN1_xpaB-5KC1H?usp=sharing", + "slicer": false + }, + "info": { + "use": { + "title": "Intended Use", + "text": "The foundation model is intended to extract features from several different types of lesions (lung, liver, kidney, mediastinal, abdominal, pelvic, bone and soft tissue). These features can be used for a variety of predictive and clustering tasks as a data-driven substitute for classical radiomic features." + }, + "analyses": { + "title": "Quantitative Analyses", + "text": "The model's performance was assessed using three different downstream tasks, including malignancy prediction and lung cancer risk prediction. Refer to the publication for more details [1].", + "references": [ + { + "label": "Foundation model for cancer image biomarkers", + "uri": "https://www.medrxiv.org/content/10.1101/2023.09.04.23294952v1" + } + ] + }, + "evaluation": { + "title": "Evaluation Data", + "text": "The evaluation dataset consists of 1,944 lesions, including 1,221 lesions for anatomical site classification, 170 nodules for malignancy prediction, and 553 tumors (420 LUNG1 + 133 RADIO) for prognostication. The dataset was held out from the training data and gathered from several different sources [1, 2, 3, 4].", + "tables": [ + { + "label": "Evaluation Tasks & Datasets", + "entries": { + "Lesion Anatomical Site Prediction": "DeepLesion (n=1221)", + "Nodule Malignancy Prediction": "LUNA16 (n=170)", + "Tumor Prognostication": "NSCLC-Radiomics (n=420) + NSCLC-Radiogenomics (n=133)" + } + } + ], + "references": [ + { + "label": "DeepLesion: automated mining of large-scale lesion annotations and universal lesion detection with deep learning.", + "uri": "https://pubmed.ncbi.nlm.nih.gov/30035154/" + }, + { + "label": "LUNA16", + "uri": "https://www.cancerimagingarchive.net/collection/lidc-idri/" + }, + { + "label": "NSCLC-Radiomics", + "uri": "https://www.cancerimagingarchive.net/collection/nsclc-radiomics/" + }, + { + "label": "NSCLC-Radiogenomics", + "uri": "https://www.cancerimagingarchive.net/analysis-result/nsclc-radiogenomics-stanford/" + } + ] + }, + "training": { + "title": "Training Data", + "text": "The training dataset consists of 11467 lesions sourced from 5,513 unique CT scans across 2,312 different patients. This was curated from the DeepLesion dataset [1] following two steps - 1) Lesions that did not contain anatomical labels were selected, 2) Scans with spacing 5mm or more were removed.", + "references": [ + { + "label": "DeepLesion: automated mining of large-scale lesion annotations and universal lesion detection with deep learning.", + "uri": "https://pubmed.ncbi.nlm.nih.gov/30035154/" + } + ] + } + } +} diff --git a/models/fmcib_radiomics/utils/CentroidExtractor.py b/models/fmcib_radiomics/utils/CentroidExtractor.py new file mode 100644 index 00000000..1e5154cb --- /dev/null +++ b/models/fmcib_radiomics/utils/CentroidExtractor.py @@ -0,0 +1,43 @@ +""" +--------------------------------------------------------- +Author: Leonard Nürnberg +Email: lnuernberg@bwh.harvard.edu +Date: 06.03.2024 +--------------------------------------------------------- +""" + +import json, jsonschema +from mhubio.core import Instance, InstanceData, IO, Module +import SimpleITK as sitk + +class CentroidExtractor(Module): + + @IO.Instance() + @IO.Input('in_mask', 'nrrd:mod=seg', the='Tumor segmentation mask for the input NRRD file.') + @IO.Output('centroids_json', 'centroids.json', "json:type=fmcibcoordinates", the='JSON file containing 3D coordinates of the centroid of the input mask.') + def task(self, instance: Instance, in_mask: InstanceData, centroids_json: InstanceData) -> None: + + # read the input mask + mask = sitk.ReadImage(in_mask.abspath) + + # get the center of massk from the mask via ITK + label_shape_filter = sitk.LabelShapeStatisticsImageFilter() + label_shape_filter.Execute(mask) + try: + centroid = label_shape_filter.GetCentroid(255) + except: + centroid = label_shape_filter.GetCentroid(1) + + # extract x, y, and z coordinates from the centroid + x, y, z = centroid + + # set up the coordinate dictionary + coordinate_dict = { + "coordX": x, + "coordY": y, + "coordZ": z, + } + + # write the coordinate dictionary to a json file + with open(centroids_json.abspath, "w") as f: + json.dump(coordinate_dict, f) diff --git a/models/fmcib_radiomics/utils/FMCIBRunner.py b/models/fmcib_radiomics/utils/FMCIBRunner.py new file mode 100644 index 00000000..0729413d --- /dev/null +++ b/models/fmcib_radiomics/utils/FMCIBRunner.py @@ -0,0 +1,114 @@ +""" +--------------------------------------------------------- +Author: Suraj Pai, Leonard Nürnberg +Email: bspai@bwh.harvard.edu, lnuernberg@bwh.harvard.edu +Date: 06.03.2024 +--------------------------------------------------------- +""" +import json, jsonschema, os +from mhubio.core import Instance, InstanceData, IO, Module + +COORDS_SCHEMA_PATH = os.path.join(os.path.dirname(__file__), "coords.schema.json") +SLICERMARKUP_SCHEMA_PATH = os.path.join(os.path.dirname(__file__), "slicermarkup.schema.json") + +def is_valid(json_data: dict, schema_file_path: str) -> bool: + """Check if a json file is valid according to a given schema. + + Args: + json_data (dict): The json data to be validated. + schema_file_path (str): The path to the schema file. + + Returns: + bool: True if the json file is valid according to the schema, False otherwise. + """ + with open(schema_file_path) as f: + schema = json.load(f) + + try: + jsonschema.validate(json_data, schema) + return True + except: + return False + +def get_coordinates(json_file_path: str) -> dict: + + # read json file + with open(json_file_path) as f: + json_data = json.load(f) + + # check which schema the json file adheres to + if is_valid(json_data, COORDS_SCHEMA_PATH): + return json_data + + if is_valid(json_data, SLICERMARKUP_SCHEMA_PATH): + markups = json_data["markups"] + + assert len(markups) == 1, "Currently, only one point per file is supported." + markup = markups[0] + + assert markup["coordinateSystem"] == "LPS" + + controlPoints = markup["controlPoints"] + assert len(controlPoints) == 1 + + position = controlPoints[0]["position"] + return { + "coordX": position[0], + "coordY": position[1], + "coordZ": position[2] + } + + # + raise ValueError("The input json file does not adhere to the expected schema.") + +def fmcib(input_dict: dict, json_output_file_path: str): + """Run the FCMIB pipeline. + + Args: + input_dict (dict): The input dictionary containing the image path and the seed point coordinates. + json_output_file_path (str): The path were the features are exported to as a json file. + """ + # model dependency imports + import torch + from fmcib.models import fmcib_model + from fmcib.preprocessing import preprocess + + # initialize the ResNet50 model with pretrained weights + model = fmcib_model() + + # run model preroecessing + image = preprocess(input_dict) + image = image.unsqueeze(0) + + # run model inference + model.eval() + with torch.no_grad(): + features = model(image) + + # generate fearure dictionary + feature_dict = {f"feature_{idx}": feature for idx, feature in enumerate(features.flatten().tolist())} + + # write feature dictionary to json file + with open(json_output_file_path, "w") as f: + json.dump(feature_dict, f) + +class FMCIBRunner(Module): + + @IO.Instance() + @IO.Input('in_data', 'nrrd|nifti:mod=ct', the='Input nrrd or nifti ct image file') + @IO.Input('centroids_json', "json:type=fmcibcoordinates", the='JSON file containing 3D coordinates of the centroid of the input mask.') + @IO.Output('feature_json', 'features.json', "json:type=fmcibfeatures", bundle='model', the='Features extracted from the input image at the specified seed point.') + def task(self, instance: Instance, in_data: InstanceData, centroids_json: InstanceData, feature_json: InstanceData) -> None: + + # read centroids from json file + coordinates = get_coordinates(centroids_json.abspath) + + # define input dictionary + input_dict = { + "image_path": in_data.abspath, + **coordinates + } + + + # run model + fmcib(input_dict, feature_json.abspath) \ No newline at end of file diff --git a/models/fmcib_radiomics/utils/__init__.py b/models/fmcib_radiomics/utils/__init__.py new file mode 100644 index 00000000..6d0f2d8d --- /dev/null +++ b/models/fmcib_radiomics/utils/__init__.py @@ -0,0 +1 @@ +from .FMCIBRunner import FMCIBRunner \ No newline at end of file diff --git a/models/fmcib_radiomics/utils/coords.schema.json b/models/fmcib_radiomics/utils/coords.schema.json new file mode 100644 index 00000000..1ee86a00 --- /dev/null +++ b/models/fmcib_radiomics/utils/coords.schema.json @@ -0,0 +1,20 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "properties": { + "coordX": { + "type": "number" + }, + "coordY": { + "type": "number" + }, + "coordZ": { + "type": "number" + } + }, + "required": [ + "coordX", + "coordY", + "coordZ" + ] +} \ No newline at end of file diff --git a/models/fmcib_radiomics/utils/slicermarkup.schema.json b/models/fmcib_radiomics/utils/slicermarkup.schema.json new file mode 100644 index 00000000..3ca04d45 --- /dev/null +++ b/models/fmcib_radiomics/utils/slicermarkup.schema.json @@ -0,0 +1,699 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/Slicer/Slicer/main/Modules/Loadable/Markups/Resources/Schema/markups-v1.0.3-schema.json#", + "type": "object", + "title": "Schema for storing one or more markups", + "description": "Stores points, lines, curves, etc.", + "required": ["@schema", "markups"], + "additionalProperties": true, + "properties": { + "@schema": { + "$id": "#schema", + "type": "string", + "title": "Schema", + "description": "URL of versioned schema." + }, + "markups": { + "$id": "#markups", + "type": "array", + "title": "Markups", + "description": "Stores position and display properties of one or more markups.", + "additionalItems": true, + "items": { + "$id": "#markupItems", + "anyOf": [ + { + "$id": "#markup", + "type": "object", + "title": "Markup", + "description": "Stores a single markup.", + "default": {}, + "required": ["type"], + "additionalProperties": true, + "properties": { + "type": { + "$id": "#markup/type", + "type": "string", + "title": "Basic type", + "enum": ["Fiducial", "Line", "Angle", "Curve", "ClosedCurve", "Plane", "ROI"] + }, + "name": { + "$id": "#markup/name", + "type": "string", + "title": "Name", + "description": "Displayed name of the markup.", + "default": "" + }, + "coordinateSystem": { + "$id": "#markup/coordinateSystem", + "type": "string", + "title": "Control point positions coordinate system name", + "description": "Coordinate system name. Medical images most commonly use LPS patient coordinate system.", + "default": "LPS", + "enum": ["LPS", "RAS"] + }, + "coordinateUnits": { + "$id": "#markup/coordinateUnits", + "anyOf": [ + { + "type": "string", + "title": "Units of control point coordinates", + "description": "Control point coordinate values are specified in this length unit. Specified in UCUM.", + "default": "mm", + "enum": ["mm", "um"] + }, + { + "type": "array", + "title": "Coordinates units code", + "description": "Standard DICOM-compliant terminology item containing code, coding scheme designator, code meaning.", + "examples": [["mm", "UCUM", "millimeter"]], + "additionalItems": false, + "items": { "type": "string" }, + "minItems": 3, + "maxItems": 3 + } + ] + }, + "locked": { + "$id": "#markup/locked", + "type": "boolean", + "title": "Locked", + "description": "Markup can be interacted with on the user interface.", + "default": true + }, + "fixedNumberOfControlPoints": { + "$id": "#markup/fixedNumberOfControlPoints", + "type": "boolean", + "title": "Fixed number of control points", + "description": "Number of control points is fixed at the current value. Control points may not be added or removed (point positions can be unset instead of deleting).", + "default": false + }, + "labelFormat": { + "$id": "#markup/labelFormat", + "type": "string", + "title": "Label format", + "description": "Format of generation new labels. %N refers to node name, %d refers to point index.", + "default": "%N-%d" + }, + "lastUsedControlPointNumber": { + "$id": "#markup/lastUsedControlPointNumber", + "type": "integer", + "title": "Last used control point number", + "description": "This value is used for generating number in the control point's name when a new point is added.", + "default": 0 + }, + "roiType": { + "$id": "#markup/roiType", + "type": "string", + "title": "ROI type", + "description": "Method used to determine ROI bounds from control points. Ex. 'Box', 'BoundingBox'.", + "default": "Box" + }, + "insideOut": { + "$id": "#markup/insideOut", + "type": "boolean", + "title": "Inside out", + "description": "ROI is inside out. Objects that would normally be inside are considered outside and vice versa.", + "default": false + }, + "planeType": { + "$id": "#markup/planeType", + "type": "string", + "title": "Plane type", + "description": "Method used to determine dimensions from control points. Ex. 'PointNormal', '3Points'.", + "default": "PointNormal" + }, + "sizeMode": { + "$id": "#markup/sizeMode", + "type": "string", + "title": "Plane size mode", + "description": "Mode used to calculate the size of the plane representation. (Ex. Static absolute or automatically calculated plane size based on control points).", + "default": "auto" + }, + "autoScalingSizeFactor": { + "$id": "#markup/autoScalingSizeFactor", + "type": "number", + "title": "Plane auto scaling size factor", + "description": "When the plane size mode is 'auto', the size of the plane is scaled by the auto size scaling factor.", + "default": "1.0" + }, + "center": { + "$id": "#markup/center", + "type": "array", + "title": "Center", + "description": "The center of the markups representation. Ex. center of ROI or plane markups.", + "examples": [[0.0, 0.0, 0.0]], + "additionalItems": false, + "items": { "type": "number" }, + "minItems": 3, + "maxItems": 3 + }, + "normal": { + "$id": "#markup/normal", + "type": "array", + "title": "Normal", + "description": "The normal direction of plane markups.", + "examples": [[0.0, 0.0, 1.0]], + "additionalItems": false, + "items": { "type": "number" }, + "minItems": 3, + "maxItems": 3 + }, + "size": { + "$id": "#markup/size", + "type": "array", + "title": "Size", + "description": "The size of the markups representation. For example, axis-aligned edge lengths of the ROI or plane markups.", + "examples": [[5.0, 5.0, 4.0], [5.0, 5.0, 0.0]], + "additionalItems": false, + "items": { "type": "number" }, + "minItems": 3, + "maxItems": 3 + }, + "planeBounds": { + "$id": "#markup/planeBounds", + "type": "array", + "title": "Plane bounds", + "description": "The bounds of the plane representation.", + "examples": [[-50, 50, -50, 50]], + "additionalItems": false, + "items": { "type": "number" }, + "minItems": 4, + "maxItems": 4 + }, + "objectToBase": { + "$id": "#markup/objectToBase", + "type": "array", + "title": "Object to Base matrix", + "description": "4x4 transform matrix from the object representation to the coordinate system defined by the control points.", + "examples": [[-0.9744254538021788, -0.15660098593235834, -0.16115572030626558, 26.459385388492746, + -0.08525118065879463, -0.4059244688892957, 0.9099217338613386, -48.04154530201596, + -0.20791169081775938, 0.9003896138683279, 0.3821927158637956, -53.35829266424462, + 0.0, 0.0, 0.0, 1.0]], + "additionalItems": false, + "items": { "type": "number" }, + "minItems": 16, + "maxItems": 16 + }, + "baseToNode": { + "$id": "#markup/baseToNode", + "type": "array", + "title": "Base to Node matrix", + "description": "4x4 transform matrix from the base representation to the node coordinate system.", + "examples": [[-0.9744254538021788, -0.15660098593235834, -0.16115572030626558, 26.459385388492746, + -0.08525118065879463, -0.4059244688892957, 0.9099217338613386, -48.04154530201596, + -0.20791169081775938, 0.9003896138683279, 0.3821927158637956, -53.35829266424462, + 0.0, 0.0, 0.0, 1.0]], + "additionalItems": false, + "items": { "type": "number" }, + "minItems": 16, + "maxItems": 16 + }, + "orientation": { + "$id": "#markup/orientation", + "type": "array", + "title": "Markups orientation", + "description": "3x3 orientation matrix of the markups representation. Ex. [orientation[0], orientation[3], orientation[6]] is the x vector of the object coordinate system in the node coordinate system.", + "examples": [[-0.6157905804369491, -0.3641498920623639, 0.6987108251316091, + -0.7414677108739087, -0.03213048377225371, -0.6702188193000602, + 0.2665100275346712, -0.9307859518297049, -0.2502197376306259]], + "additionalItems": false, + "items": { "type": "number" }, + "minItems": 9, + "maxItems": 9 + }, + "controlPoints": { + "$id": "#markup/controlPoints", + "type": "array", + "title": "Control points", + "description": "Stores all control points of this markup.", + "default": [], + "additionalItems": true, + "items": { + "$id": "#markup/controlPointItems", + "anyOf": [ + { + "$id": "#markup/controlPoint", + "type": "object", + "title": "The first anyOf schema", + "description": "Object containing the properties of a single control point.", + "default": {}, + "required": [], + "additionalProperties": true, + "properties": { + "id": { + "$id": "#markup/controlPoint/id", + "type": "string", + "title": "Control point ID", + "description": "Identifier of the control point within this markup", + "default": "", + "examples": ["2", "5"] + }, + "label": { + "$id": "#markup/controlPoint/label", + "type": "string", + "title": "Control point label", + "description": "Label displayed next to the control point.", + "default": "", + "examples": ["F_1"] + }, + "description": { + "$id": "#markup/controlPoint/description", + "type": "string", + "title": "Control point description", + "description": "Details about the control point.", + "default": "" + }, + "associatedNodeID": { + "$id": "#markup/controlPoint/associatedNodeID", + "type": "string", + "title": "Associated node ID", + "description": "ID of the node where this markups is defined on.", + "default": "", + "examples": ["vtkMRMLModelNode1"] + }, + "position": { + "$id": "#markup/controlPoint/position", + "type": "array", + "title": "Control point position", + "description": "Tuple of 3 defined in the specified coordinate system.", + "examples": [[-9.9, 1.1, 12.3]], + "additionalItems": false, + "items": { "type": "number" }, + "minItems": 3, + "maxItems": 3 + }, + "orientation": { + "$id": "#markup/controlPoint/orientation", + "type": "array", + "title": "Control point orientation", + "description": "3x3 orientation matrix", + "examples": [[1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0 ]], + "additionalItems": false, + "items": {"type": "number"}, + "minItems": 9, + "maxItems": 9 + }, + "selected": { + "$id": "#markup/controlPoint/selected", + "type": "boolean", + "title": "Control point is selected", + "description": "Specifies if the control point is selected or unselected.", + "default": true + }, + "locked": { + "$id": "#markup/controlPoint/locked", + "type": "boolean", + "title": "Control point locked", + "description": "Control point cannot be moved on the user interface.", + "default": false + }, + "visibility": { + "$id": "#markup/controlPoint/visibility", + "type": "boolean", + "title": "The visibility schema", + "description": "Visibility of the control point.", + "default": true + }, + "positionStatus": { + "$id": "#markup/controlPoint/positionStatus", + "type": "string", + "title": "The positionStatus schema", + "description": "Status of the control point position.", + "enum": ["undefined", "preview", "defined"], + "default": "defined" + } + } + } + ] + } + }, + "display": { + "$id": "#display", + "type": "object", + "title": "The display schema", + "description": "Object holding markups display properties.", + "default": {}, + "required": [], + "additionalProperties": true, + "properties": { + "visibility": { + "$id": "#display/visibility", + "type": "boolean", + "title": "Markup visibility", + "description": "Visibility of the entire markup.", + "default": true + }, + "opacity": { + "$id": "#display/opacity", + "type": "number", + "title": "Markup opacity", + "description": "Overall opacity of the markup.", + "minimum": 0.0, + "maximum": 1.0, + "default": 1.0 + }, + "color": { + "$id": "#display/color", + "type": "array", + "title": "Markup color", + "description": "Overall RGB color of the markup.", + "default": [0.4, 1.0, 1.0], + "additionalItems": false, + "items": {"type": "number", "minimum": 0.0, "maximum": 1.0}, + "minItems": 3, + "maxItems": 3 + }, + "selectedColor": { + "$id": "#display/selectedColor", + "title": "Markup selected color", + "description": "Overall RGB color of selected points in the markup.", + "default": [1.0, 0.5, 0.5], + "additionalItems": false, + "items": {"type": "number", "minimum": 0.0, "maximum": 1.0}, + "minItems": 3, + "maxItems": 3 + }, + "activeColor": { + "$id": "#display/activeColor", + "title": "Markup active color", + "description": "Overall RGB color of active points in the markup.", + "default": [0.4, 1.0, 0.0], + "additionalItems": false, + "items": {"type": "number", "minimum": 0.0, "maximum": 1.0}, + "minItems": 3, + "maxItems": 3 + }, + "propertiesLabelVisibility": { + "$id": "#display/propertiesLabelVisibility", + "type": "boolean", + "title": "Properties label visibility", + "description": "Visibility of the label that shows basic properties.", + "default": false + }, + "pointLabelsVisibility": { + "$id": "#display/pointLabelsVisibility", + "type": "boolean", + "title": "Point labels visibility", + "description": "Visibility of control point labels.", + "default": false + }, + "textScale": { + "$id": "#display/textScale", + "type": "number", + "title": "Markup overall text scale", + "description": "Size of displayed text as percentage of window size.", + "default": 3.0, + "minimum": 0.0 + }, + "glyphType": { + "$id": "#display/glyphType", + "type": "string", + "title": "The glyphType schema", + "description": "Enum representing the displayed glyph type.", + "default": "Sphere3D", + "enum": ["Vertex2D", "Dash2D", "Cross2D", "ThickCross2D", "Triangle2D", "Square2D", + "Circle2D", "Diamond2D", "Arrow2D", "ThickArrow2D", "HookedArrow2D", "StarBurst2D", + "Sphere3D", "Diamond3D"] + }, + "glyphScale": { + "$id": "#display/glyphScale", + "type": "number", + "title": "Point glyph scale", + "description": "Glyph size as percentage of window size.", + "default": 1.0, + "minimum": 0.0 + }, + "glyphSize": { + "$id": "#display/glyphSize", + "type": "number", + "title": "Point glyph size", + "description": "Absolute glyph size.", + "default": 5.0, + "minimum": 0.0 + }, + "useGlyphScale": { + "$id": "#display/useGlyphScale", + "type": "boolean", + "title": "Use glyph scale", + "description": "Use relative glyph scale.", + "default": true + }, + "sliceProjection": { + "$id": "#display/sliceProjection", + "type": "boolean", + "title": "Slice projection", + "description": "Enable project markups to slice views.", + "default": false + }, + "sliceProjectionUseFiducialColor": { + "$id": "#display/sliceProjectionUseFiducialColor", + "type": "boolean", + "title": "Use fiducial color for slice projection", + "description": "Choose between projection color or fiducial color for projections.", + "default": true + }, + "sliceProjectionOutlinedBehindSlicePlane": { + "$id": "#display/sliceProjectionOutlinedBehindSlicePlane", + "type": "boolean", + "title": "Display slice projection as outline", + "description": "Display slice projection as outline if behind slice plane.", + "default": false + }, + "sliceProjectionColor": { + "$id": "#display/sliceProjectionColor", + "type": "array", + "title": "Slice projection color", + "description": "Overall RGB color for displaying projection.", + "default": [1.0, 1.0, 1.0], + "additionalItems": false, + "items": {"type": "number", "minimum": 0.0, "maximum": 1.0}, + "minItems": 3, + "maxItems": 3 + }, + "sliceProjectionOpacity": { + "$id": "#display/sliceProjectionOpacity", + "type": "number", + "title": "Slice projection opacity", + "description": "Overall opacity of markup slice projection.", + "minimum": 0.0, + "maximum": 1.0, + "default": 0.6 + }, + "lineThickness": { + "$id": "#display/lineThickness", + "type": "number", + "title": "Line thickness", + "description": "Line thickness relative to markup size.", + "default": 0.2, + "minimum": 0.0 + }, + "lineColorFadingStart": { + "$id": "#display/lineColorFadingStart", + "type": "number", + "title": "Line color fading start", + "description": "Distance where line starts to fade out.", + "default": 1.0, + "minimum": 0.0 + }, + "lineColorFadingEnd": { + "$id": "#display/lineColorFadingEnd", + "type": "number", + "title": "Line color fading end", + "description": "Distance where line fades out completely.", + "default": 10.0, + "minimum": 0.0 + }, + "lineColorFadingSaturation": { + "$id": "#display/lineColorFadingSaturation", + "type": "number", + "title": "Color fading saturation", + "description": "Amount of color saturation change as the line fades out.", + "default": 1.0 + }, + "lineColorFadingHueOffset": { + "$id": "#display/lineColorFadingHueOffset", + "type": "number", + "title": "Color fadue hue offset", + "description": "Change in color hue as the line fades out.", + "default": 0.0 + }, + "handlesInteractive": { + "$id": "#display/handlesInteractive", + "type": "boolean", + "title": "Handles interactive", + "description": "Show interactive handles to transform this markup.", + "default": false + }, + "translationHandleVisibility": { + "$id": "#display/translationHandleVisibility", + "type": "boolean", + "title": "Translation handle visibility", + "description": "Visibility of the translation interaction handles", + "default": false + }, + "rotationHandleVisibility": { + "$id": "#display/rotationHandleVisibility", + "type": "boolean", + "title": "Rotation handle visibility", + "description": "Visibility of the rotation interaction handles", + "default": false + }, + "scaleHandleVisibility": { + "$id": "#display/scaleHandleVisibility", + "type": "boolean", + "title": "Scale handle visibility", + "description": "Visibility of the scale interaction handles", + "default": false + }, + "interactionHandleScale": { + "$id": "#display/interactionHandleScale", + "type": "number", + "title": "Interaction handle glyph scale", + "description": "Interaction handle size as percentage of window size.", + "default": 3.0 + }, + "snapMode": { + "$id": "#display/snapMode", + "type": "string", + "title": "Snap mode", + "description": "How control points can be defined and moved.", + "default": "toVisibleSurface", + "enum": ["unconstrained", "toVisibleSurface"] + } + } + }, + "measurements": { + "$id": "#markup/measurements", + "type": "array", + "title": "Measurements", + "description": "Stores all measurements for this markup.", + "default": [], + "additionalItems": true, + "items": { + "$id": "#markup/measurementItems", + "anyOf": [ + { + "$id": "#markup/measurement", + "type": "object", + "title": "Measurement", + "description": "Store a single measurement.", + "default": {}, + "required": [], + "additionalProperties": true, + "properties": { + "name": { + "$id": "#markup/measurement/name", + "type": "string", + "title": "Measurement name", + "description": "Printable name of the measurement", + "default": "", + "examples": ["length", "area"] + }, + "enabled": { + "$id": "#markup/measurement/enabled", + "type": "boolean", + "title": "Computation of the measurement is enabled", + "description": "This can be used to define measurements but prevent automatic updates.", + "default": true + }, + "value": { + "$id": "#display/measurement/value", + "type": "number", + "title": "Measurement value", + "description": "Numeric value of the measurement." + }, + "units": { + "$id": "#markup/measurement/units", + "anyOf": [ + { + "type": "string", + "title": "Measurement unit", + "description": "Printable measurement unit. Use of UCUM is preferred.", + "default": "", + "examples": ["mm", "mm2"] + }, + { + "type": "array", + "title": "Measurement units code", + "description": "Standard DICOM-compliant terminology item containing code, coding scheme designator, code meaning.", + "examples": [["cm3", "UCUM", "cubic centimeter"]], + "additionalItems": false, + "items": { "type": "string" }, + "minItems": 3, + "maxItems": 3 + } + ] + }, + "description": { + "$id": "#markup/measurement/description", + "type": "string", + "title": "Measurement description", + "description": "Explanation of the measurement.", + "default": "" + }, + "printFormat": { + "$id": "#markup/measurement/printFormat", + "type": "string", + "title": "Print format", + "description": "Format string (printf-style) to create user-displayable string from value and units.", + "default": "", + "examples": ["%5.3f %s"] + }, + "quantityCode": { + "$id": "#markup/measurement/quantityCode", + "type": "array", + "title": "Measurement quantity code", + "description": "Standard DICOM-compliant terminology item containing code, coding scheme designator, code meaning.", + "default": [], + "examples": [["118565006", "SCT", "Volume"]], + "additionalItems": false, + "items": { "type": "string" }, + "minItems": 3, + "maxItems": 3 + }, + "derivationCode": { + "$id": "#markup/measurement/derivationCode", + "type": "array", + "title": "Measurement derivation code", + "description": "Standard DICOM-compliant terminology item containing code, coding scheme designator, code meaning.", + "default": [], + "examples": [["255605001", "SCT", "Minimum"]], + "additionalItems": false, + "items": { "type": "string" }, + "minItems": 3, + "maxItems": 3 + }, + "methodCode": { + "$id": "#markup/measurement/methodCode", + "type": "array", + "title": "Measurement method code", + "description": "Standard DICOM-compliant terminology item containing code, coding scheme designator, code meaning.", + "default": [], + "examples": [["126030", "DCM", "Sum of segmented voxel volumes"]], + "additionalItems": false, + "items": { "type": "string" }, + "minItems": 3, + "maxItems": 3 + }, + "controlPointValues": { + "$id": "#markup/controlPoint/controlPointValues", + "type": "array", + "title": "Measurement values for each control point.", + "description": "This stores measurement result if it has value for each control point.", + "examples": [[-9.9, 1.1, 12.3, 4.3, 4.8]], + "additionalItems": false, + "items": { "type": "number" } + } + } + } + ] + } + } + } + } + ] + } + } + } +} diff --git a/models/gc_grt123_lung_cancer/__init__.py b/models/gc_grt123_lung_cancer/__init__.py new file mode 100644 index 00000000..90f60fdd --- /dev/null +++ b/models/gc_grt123_lung_cancer/__init__.py @@ -0,0 +1 @@ +from .utils import * \ No newline at end of file diff --git a/models/gc_grt123_lung_cancer/config/default.yml b/models/gc_grt123_lung_cancer/config/default.yml new file mode 100644 index 00000000..a068585b --- /dev/null +++ b/models/gc_grt123_lung_cancer/config/default.yml @@ -0,0 +1,30 @@ +general: + data_base_dir: /app/data + version: 1.0 + description: grt123 lung nodule and lung cancer classifier default (dicom to json) + +execute: + - DicomImporter + - MhaConverter + - LungCancerClassifierRunner + - DataOrganizer + +modules: + DicomImporter: + source_dir: input_data + import_dir: sorted_data + sort_data: True + meta: + mod: ct + + MhaConverter: + engine: panimg + + LungCancerClassifierRunner: + n_preprocessing_workers: 8 + + DataOrganizer: + target_dir: output_data + require_data_confirmation: true + targets: + - json-->[i:sid]/gc_grt123_lung_cancer_findings.json diff --git a/models/gc_grt123_lung_cancer/dockerfiles/Dockerfile b/models/gc_grt123_lung_cancer/dockerfiles/Dockerfile new file mode 100644 index 00000000..d70f0ab3 --- /dev/null +++ b/models/gc_grt123_lung_cancer/dockerfiles/Dockerfile @@ -0,0 +1,35 @@ +FROM mhubai/base:latest + +# Specify/override authors label +LABEL authors="sil.vandeleemput@radboudumc.nl" + +# install required dependencies for grt123 algorithm including GPU support +RUN pip3 install --no-cache-dir \ + torch===2.0.1+cu118 -f https://download.pytorch.org/whl/torch_stable.html + +# Install grt123 algorithm and model weights +# - Git clone the algorithm repository for v2.0.0 (fixed to v2.0.0 tag commit on 2023/09/13) +# - We remove unnecessary files for a compacter docker layer +# - Subsequently we remove the .git directory to procuce a compacter docker layer, but keep the latest commit hash in the HEAD file +RUN git clone --branch v2.0.0 https://github.com/DIAGNijmegen/bodyct-dsb2017-grt123.git /gc_grt123_lung_cancer && \ + cd /gc_grt123_lung_cancer && git reset --hard 9a4ca0415c7fc1d3023a16650bf1cdce86f8bb59 && \ + rm -rf /gc_grt123_lung_cancer/tests && \ + rm -rf /gc_grt123_lung_cancer/training && \ + rm -rf /gc_grt123_lung_cancer/processor && \ + rm -rf /gc_grt123_lung_cancer/images && \ + rm /gc_grt123_lung_cancer/README.md && \ + rm /gc_grt123_lung_cancer/solution-grt123-team.pdf && \ + mv /gc_grt123_lung_cancer/.git/HEAD /gc_grt123_lung_cancer && \ + rm -rf /gc_grt123_lung_cancer/.git/* && \ + mv /gc_grt123_lung_cancer/HEAD /gc_grt123_lung_cancer/.git + +# Import the MHub model definition +ARG MHUB_MODELS_REPO +RUN buildutils/import_mhub_model.sh gc_grt123_lung_cancer ${MHUB_MODELS_REPO} + +# Add lobe segmentation code base to python path +ENV PYTHONPATH="/gc_grt123_lung_cancer:/app" + +# Default entrypoint +ENTRYPOINT ["python3", "-m", "mhubio.run"] +CMD ["--config", "/app/models/gc_grt123_lung_cancer/config/default.yml"] diff --git a/models/gc_grt123_lung_cancer/meta.json b/models/gc_grt123_lung_cancer/meta.json new file mode 100644 index 00000000..b3d24de0 --- /dev/null +++ b/models/gc_grt123_lung_cancer/meta.json @@ -0,0 +1,171 @@ +{ + "id": "2e67a3cc-4680-4058-bf4e-f965cf50f06f", + "name": "gc_grt123_lung_cancer", + "title": "Lung cancer risk estimation on thorax CT scans", + "summary": { + "description": "This algorithm analyzes non-contrast CT scans of the thorax and predicts the lung cancer risk. The model consists of two modules. The first one is a 3D region proposal network for nodule detection, which outputs all suspicious nodules for a subject. The second one selects the top five nodules based on the detection confidence, evaluates their cancer probabilities and combines them with a leaky noisy-or gate to obtain the probability of lung cancer for the subject. This model was the winner of the Data Science Bowl 2017 competition hosted on Kaggle.", + "inputs": [ + { + "label": "CT", + "description": "Chest CT", + "format": "DICOM", + "modality": "CT", + "bodypartexamined": "Chest", + "slicethickness": "2.5mm", + "non-contrast": true, + "contrast": false + } + ], + "outputs": [ + { + "type": "Prediction", + "valueType": "number", + "label": "Lung thorax cancer nodule probability score", + "description": "The likelihood of the presence of cancer nodules in the lungs.", + "classes": [] + } + ], + "model": { + "architecture": "3D convolutional neural network", + "training": "supervised", + "cmpapproach": "3D" + }, + "data": { + "training": { + "vol_samples": 2483 + }, + "evaluation": { + "vol_samples": 506 + }, + "public": true, + "external": false + } + }, + "details": { + "name": " bodyct-dsb2017-grt123", + "version": "2.0.0", + "devteam": "DIAGNijmegen (Diagnostic Image Analysis Group, Radboud UMC, The Netherlands)", + "type": "3D Deep Leaky Noisy-or Network", + "date": { + "weights": "", + "code": "2023-07-04", + "pub": "2017-11-22" + }, + "cite": "F. Liao, M. Liang, Z. Li, X. Hu and S. Song, 'Evaluate the Malignancy of Pulmonary Nodules Using the 3D Deep Leaky Noisy-or Network', in IEEE Transactions on Neural Networks and Learning Systems, vol. 30, no. 11, pp. 3484-3495, Nov. 2019, doi: 10.1109/TNNLS.2019.2892409.", + "license": { + "code": "MIT", + "weights": "MIT" + }, + "publications": [ + { + "title": "Evaluate the Malignancy of Pulmonary Nodules Using the 3D Deep Leaky Noisy-or Network", + "uri": "https://ieeexplore.ieee.org/abstract/document/8642524" + }, + { + "title": "Deep Learning for Lung Cancer Detection on Screening CT Scans: Results of a Large-Scale Public Competition and an Observer Study with 11 Radiologists", + "uri": "https://pubmed.ncbi.nlm.nih.gov/34870218/" + } + ], + "github": "https://github.com/DIAGNijmegen/bodyct-dsb2017-grt123", + "zenodo": "", + "colab": "", + "slicer": false + }, + "info": { + "use": { + "title": "Intended use", + "text": "This algorithm analyzes non-contrast CT scans of the thorax, first it segments the lungs, subsequently it detects lung nodules within the lungs, and finally it predicts the lung cancer risk for the individual nodules and the scan as a whole. The algorithm is also hosted on Grand Challenge [1] and was the winner of the Data Science Bowl 2017 challenge on Kaggle [2]. ", + "references": [ + { + "label": "Lung cancer risk estimation algorithm on grand-challenge", + "uri": "https://grand-challenge.org/algorithms/dsb2017-grt123/" + }, + { + "label": "Data Science Bowl 2017 challenge", + "uri": "https://www.kaggle.com/c/data-science-bowl-2017" + } + ], + "tables": [] + }, + "analyses": { + "title": "Evaluation", + "text": "The evaluation of the model was done on the Data Science Bowl 2017 (DSB) dataset hosted on Kaggle [1] (this is no longer publicly available). The nodule detection was evaluated on the validation of the DSB dataset, which contained data from 198 cases and there were 71 (7 nodules smaller than 6 mm are ruled out) nodules in total. The Free Response Operating Characteristic (FROC) is used to evaluate the performance of the nodule detection. The case cancer classification was evaluated using the Area Under the Curve (AUC) metric on the training set and the testing set of respectively 1397 and 506 patient cases. The AUC and FROC graphs can be viewed in the publication [2]. For the final evaluation on the Data Science Bowl 2017 challenge, the model's performance was evaluated using the logistic loss on a private external dataset of 300 low-dose CT images [3], containing 100 cancer-positive scans and 200 cancer-negative scans. See tables for a summary of the results.", + "references": [ + { + "label": "Data Science Bowl 2017 challenge", + "uri": "https://www.kaggle.com/c/data-science-bowl-2017" + }, + { + "label": "Evaluate the Malignancy of Pulmonary Nodules Using the 3D Deep Leaky Noisy-or Network", + "uri": "https://ieeexplore.ieee.org/abstract/document/8642524" + }, + { + "label": "Evaluation paper external dataset Data Science Bowl 2017", + "uri": "https://pubmed.ncbi.nlm.nih.gov/34870218/" + } + ], + "tables": [ + { + "label": "Case cancer classification results on the DSB 2017 dataset", + "entries": { + "AUC on training set": "0.90", + "AUC on test set": "0.87", + "Logistic loss on test set": "0.39975" + } + }, + { + "label": "Case cancer classification results on private external evaluation dataset.", + "entries": { + "AUC on all scans": "0.877 (95% CI: 0.842, 0.910)" + } + } + ] + }, + "evaluation": { + "title": "Evaluation data", + "text": "The model was evaluated on the testing set of 506 patient cases the Data Science Bowl 2017 (DSB) hosted on Kaggle [1] (this is no longer publicly available). ", + "references": [ + { + "label": "Data Science Bowl 2017 challenge", + "uri": "https://www.kaggle.com/c/data-science-bowl-2017" + } + ], + "tables": [] + }, + "training": { + "title": "Training data", + "text": "Two lung scan datasets were used to train the model: the LUng Nodule Analysis 2016 (LUNA16) dataset [1] [2] and the training set of the Data Science Bowl 2017 (DSB) hosted on Kaggle [3] (this is no longer publicly available). Nodules smaller than 6 mm were removed from the LUNA16 annotations before training. The LUNA16 dataset includes 1186 nodule labels in 888 patient cases annotated by radiologists. The DSB dataset includes 1397 and 198 patient cases in its training and validation sets respectively. The LUNA16 dataset is a subset from the images from the LIDC/IDRI dataset [3] that is available under a Creative Commons Attribution 3.0 Unported License.", + "references": [ + { + "label": "LUng Nodule Analysis 2016 dataset part 1", + "uri": "https://zenodo.org/record/3723295" + }, + { + "label": "LUng Nodule Analysis 2016 dataset part 2", + "uri": "https://zenodo.org/record/4121926" + }, + { + "label": "Data Science Bowl 2017 challenge", + "uri": "https://www.kaggle.com/c/data-science-bowl-2017" + }, + { + "label": "The LIDC/IDRI dataset", + "uri": "https://www.cancerimagingarchive.net/collection/lidc-idri/" + } + ], + "tables": [] + }, + "ethics": { + "title": "", + "text": "", + "references": [], + "tables": [] + }, + "limitations": { + "title": "", + "text": "", + "references": [], + "tables": [] + } + } +} diff --git a/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py b/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py new file mode 100644 index 00000000..803a8126 --- /dev/null +++ b/models/gc_grt123_lung_cancer/utils/LungCancerClassifierRunner.py @@ -0,0 +1,119 @@ +""" +-------------------------------------------------------- +Mhub / GC - Run Module for grt123 Lung Cancer Classifier +-------------------------------------------------------- + +-------------------------------------------------------- +Author: Sil van de Leemput +Email: sil.vandeleemput@radboudumc.nl +-------------------------------------------------------- +""" +import torch.cuda +from mhubio.core import Instance, InstanceData, IO, Module, ValueOutput, Meta + +from typing import Dict +import json +from pathlib import Path + +import torch + + +@ValueOutput.Name('lncancerprob') +@ValueOutput.Meta(Meta(min=0.0, max=1.0, type="probability")) +@ValueOutput.Label('Lung Nodule cancer probability score.') +@ValueOutput.Type(float) +@ValueOutput.Description('The predicted cancer probability score for a single lung nodule detected by the algorithm') +class LNCancerProb(ValueOutput): + pass + + +@ValueOutput.Name('clcancerprob') +@ValueOutput.Meta(Meta(min=0.0, max=1.0, type="probability")) +@ValueOutput.Label('Case level cancer probability score.') +@ValueOutput.Type(float) +@ValueOutput.Description('The predicted cancer probability score for the whole case') +class CLCancerProb(ValueOutput): + pass + + +# This method cleans the raw results from the grt123 algorithm output and only keeps the relevant details +def cleanup_json_report(data: Dict): + for key in ["trainingset1", "trainingset2"]: + del data["lungcad"][key] + for key in ["patientuid", "studyuid"]: + del data["imageinfo"][key] + data["findings"] = [ + dict( + id=f["id"], + x=f["x"], + y=f["y"], + z=f["z"], + probability=f["probability"], + cancerprobability=f["cancerprobability"] + ) + for f in data["findings"] + ] + + +@IO.Config('n_preprocessing_workers', int, 6, the="number of preprocessing workers to use for the grt123 lung mask preprocessor") +class LungCancerClassifierRunner(Module): + + n_preprocessing_workers: int + + @IO.Instance() + @IO.Input('in_data', 'mha:mod=ct', the='input ct scan') + @IO.Output('out_data', 'grt123_lung_cancer_findings.json', 'json:model=grt123LungCancerClassification', data='in_data', the='predicted nodules and lung cancer findings of the lung lobe') + @IO.OutputData('clcancerprob', CLCancerProb, the='Case level probability score') + @IO.OutputDatas('lncancerprobs', LNCancerProb, the='Individual lung nodule probability scores') + def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData, clcancerprob: CLCancerProb, lncancerprobs: LNCancerProb) -> None: + # create temporary directories for the preprocessed data and the cropped bounding boxes + tmp_path = Path(self.config.data.requestTempDir('grt123')) + tmp_output_bbox_dir = tmp_path / "bbox" + tmp_output_prep_dir = tmp_path / "prep" + tmp_output_bbox_dir.mkdir(exist_ok=True, parents=True) + tmp_output_prep_dir.mkdir(exist_ok=True, parents=True) + + # determine the number of GPUs we can use + if torch.cuda.is_available(): + self.log("Running with a GPU", "NOTICE") + n_gpu = 1 + else: + self.log("Running on the CPU, might be slow...", "NOTICE") + n_gpu = 0 + + # Import the main module for the grt123 algorithm, which must be used for running the classification + import main + + # apply grt123 algorithm + results = main.main( + skip_detect=False, + skip_preprocessing=False, + datapath=str(Path(in_data.abspath).parent), + outputdir=str(tmp_path), + output_bbox_dir=str(tmp_output_bbox_dir), + output_prep_dir=str(tmp_output_prep_dir), + n_gpu=n_gpu, + n_worker_preprocessing=self.n_preprocessing_workers, + data_filter=r".*.mha" + ) + + # retrieve classification results + assert len(results) > 0, "LungCancerClassifierRunner - Always expects at least one output report" + results_dict = results[0].to_json() + cleanup_json_report(results_dict) + + # export to JSON (original json file) + self.log(f"Writing classification results to {out_data.abspath}", "NOTICE") + with open(out_data.abspath, "w") as f: + json.dump(results_dict, f, indent=4) + + # set output value for case level cancer probability + clcancerprob.value = results_dict["cancerinfo"]["casecancerprobability"] + + # set output values for nodule level cancer probabilities + for finding in results_dict["findings"]: + nodule_cancer_prob = LNCancerProb() + nodule_cancer_prob.meta = Meta(id=finding['id'], x=finding['x'], y=finding['y'], z=finding['z'], ) + nodule_cancer_prob.description += f" (for nodule {finding['id']} at location ({finding['x']}, {finding['y']}, {finding['z']}))" + nodule_cancer_prob.value = finding["cancerprobability"] + lncancerprobs.add(nodule_cancer_prob) diff --git a/models/gc_grt123_lung_cancer/utils/__init__.py b/models/gc_grt123_lung_cancer/utils/__init__.py new file mode 100644 index 00000000..d9f025f9 --- /dev/null +++ b/models/gc_grt123_lung_cancer/utils/__init__.py @@ -0,0 +1 @@ +from .LungCancerClassifierRunner import * \ No newline at end of file diff --git a/models/gc_lunglobes/config/default.yml b/models/gc_lunglobes/config/default.yml index f25f0444..5bbbbda3 100644 --- a/models/gc_lunglobes/config/default.yml +++ b/models/gc_lunglobes/config/default.yml @@ -7,7 +7,6 @@ execute: - DicomImporter - MhaConverter - LobeSegmentationRunner -- NiftiConverter - DsegConverter - DataOrganizer @@ -17,17 +16,13 @@ modules: import_dir: sorted_data sort_data: True meta: - mod: ct + mod: '%Modality' MhaConverter: - engine: plastimatch - - NiftiConverter: - in_datas: mha:mod=seg + engine: panimg DsegConverter: model_name: GCLungLobes - source_segs: nifti:mod=seg skip_empty_slices: True DataOrganizer: diff --git a/models/gc_lunglobes/dockerfiles/Dockerfile b/models/gc_lunglobes/dockerfiles/Dockerfile index 66f5a9ca..0e87d31a 100644 --- a/models/gc_lunglobes/dockerfiles/Dockerfile +++ b/models/gc_lunglobes/dockerfiles/Dockerfile @@ -1,7 +1,7 @@ FROM mhubai/base:latest # Update authors label -LABEL authors="s.vandeleemput@radboudumc.nl,dbontempi@bwh.harvard.edu,lnuernberg@bwh.harvard.edu" +LABEL authors="sil.vandeleemput@radboudumc.nl,dbontempi@bwh.harvard.edu,lnuernberg@bwh.harvard.edu" # Install system dependencies for OpenCV RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 -y @@ -19,8 +19,8 @@ RUN pip3 install --no-cache-dir --force-reinstall SimpleITK==1.2.4 ARG MHUB_MODELS_REPO RUN buildutils/import_mhub_model.sh gc_lunglobes ${MHUB_MODELS_REPO} -# Install Xie's pulmonary lobe segmentation algorithm and model weights -RUN git clone https://github.com/DIAGNijmegen/bodyct-pulmonary-lobe-segmentation.git src && \ +# Install Xie's pulmonary lobe segmentation algorithm and model weights (release gclobe165 v1.6.5) +RUN git clone --depth 1 --branch v1.6.5 https://github.com/DIAGNijmegen/bodyct-pulmonary-lobe-segmentation.git src && \ sed -i 's/from models import CTSUNet/from src.models import CTSUNet/g' src/test.py # Default run script diff --git a/models/gc_lunglobes/meta.json b/models/gc_lunglobes/meta.json index 47229053..d5d530a1 100644 --- a/models/gc_lunglobes/meta.json +++ b/models/gc_lunglobes/meta.json @@ -10,7 +10,7 @@ "format": "DICOM", "modality": "CT", "bodypartexamined": "Chest", - "slicethickness": "2.5mm", + "slicethickness": "0.75mm", "non-contrast": true, "contrast": false } ], @@ -26,7 +26,7 @@ } ], "model": { "architecture": "Relational two-stage U-net", - "training": "Supervised", + "training": "supervised", "cmpapproach": "3D" }, "data": { @@ -36,7 +36,7 @@ "evaluation": { "vol_samples": 1155 }, - "public": "Partially", + "public": false, "external": true } }, @@ -46,9 +46,9 @@ "devteam": "DIAGNijmegen (Diagnostic Image Analysis Group, Radboud UMC, The Netherlands)", "type": "Relational two-stage U-Net (Cascade of two relational U-Net, trained end-to-end)", "date": { - "weights": "14/02/22", - "code": "n/a", - "pub": "n/a" + "weights": "2022-02-14", + "code": "2023-11-27", + "pub": "2020-05-15" }, "cite": "W. Xie, C. Jacobs, J. -P. Charbonnier and B. van Ginneken, 'Relational Modeling for Robust and Efficient Pulmonary Lobe Segmentation in CT Scans,' in IEEE Transactions on Medical Imaging, vol. 39, no. 8, pp. 2664-2675, Aug. 2020, doi: 10.1109/TMI.2020.2995108.", "license": { diff --git a/models/gc_nnunet_pancreas/__init__.py b/models/gc_nnunet_pancreas/__init__.py new file mode 100644 index 00000000..16281fe0 --- /dev/null +++ b/models/gc_nnunet_pancreas/__init__.py @@ -0,0 +1 @@ +from .utils import * diff --git a/models/gc_nnunet_pancreas/config/default.yml b/models/gc_nnunet_pancreas/config/default.yml new file mode 100644 index 00000000..ca3a2a3e --- /dev/null +++ b/models/gc_nnunet_pancreas/config/default.yml @@ -0,0 +1,44 @@ +general: + version: 1.0 + data_base_dir: /app/data + description: base configuration for GC NNUnet Pancreas model (dicom to dicom, and json output) + +execute: +- DicomImporter +- MhaConverter +- GCNNUnetPancreasRunner +- DsegConverter +- ReportExporter +- DataOrganizer + +modules: + DicomImporter: + source_dir: input_data + import_dir: sorted_data + sort_data: true + meta: + mod: '%Modality' + + MhaConverter: + engine: panimg + targets: [dicom:mod=ct] + + DsegConverter: + model_name: 'GC NNUnet Pancreas' + source_segs: ['mha:mod=seg:src=cleaned'] + target_dicom: dicom:mod=ct + skip_empty_slices: True + + ReportExporter: + format: compact + includes: + - data: prostate_cancer_likelihood + label: prostate_cancer_likelihood + value: value + + DataOrganizer: + targets: + - mha:mod=heatmap-->[i:sid]/nnunet_pancreas_heatmap.mha + - mha:mod=seg:src=cleaned-->[i:sid]/nnunet_pancreas.seg.mha + - dicomseg:mod=seg-->[i:sid]/nnunet_pancreas.seg.dcm + - json:mod=report-->[i:sid]/nnunet_pancreas_case_level_likelihood.json diff --git a/models/gc_nnunet_pancreas/dockerfiles/Dockerfile b/models/gc_nnunet_pancreas/dockerfiles/Dockerfile new file mode 100644 index 00000000..22b43174 --- /dev/null +++ b/models/gc_nnunet_pancreas/dockerfiles/Dockerfile @@ -0,0 +1,45 @@ +FROM mhubai/base:latest + +# Specify/override authors label +LABEL authors="sil.vandeleemput@radboudumc.nl" + +# Install PyTorch 2.0.1 (CUDA enabled) +RUN pip3 install --no-cache-dir torch==2.0.1+cu118 -f https://download.pytorch.org/whl/torch_stable.html + +# Install git-lfs (required for downloading the model weights) +RUN apt update && \ + apt install -y --no-install-recommends git-lfs && \ + rm -rf /var/lib/apt/lists/* + +# Install the model weights and the algorithm files +# * Pull algorithm from repo into /opt/algorithm (main branch, commit 15dd550beada43a8a55b81a32d9b3904a1cf8d30) +# * Remove .git folder to keep docker layer small +RUN git clone https://github.com/DIAGNijmegen/CE-CT_PDAC_AutomaticDetection_nnUnet.git /opt/algorithm && \ + cd /opt/algorithm && \ + git reset --hard 15dd550beada43a8a55b81a32d9b3904a1cf8d30 && \ + rm -rf /opt/algorithm/.git + +# Set this environment variable as a shortcut to avoid nnunet 1.7.0 crashing the build +# by pulling sklearn instead of scikit-learn +# N.B. this is a known issue: +# https://github.com/MIC-DKFZ/nnUNet/issues/1281 +# https://github.com/MIC-DKFZ/nnUNet/pull/1209 +ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True + +# Install nnUNet 1.7.0 and other requirements +RUN pip3 install --no-cache-dir evalutils==0.3.0 nnunet==1.7.0 + +# Extend the nnUNet installation with custom trainers +RUN SITE_PKG=`pip3 show nnunet | grep "Location:" | awk '{print $2}'` && \ + mv /opt/algorithm/nnUNetTrainerV2_Loss_CE_checkpoints.py "$SITE_PKG/nnunet/training/network_training/nnUNetTrainerV2_Loss_CE_checkpoints.py" + +# Import the MHub model definiton +ARG MHUB_MODELS_REPO +RUN buildutils/import_mhub_model.sh gc_nnunet_pancreas ${MHUB_MODELS_REPO} + +# Add algorithm files to python path +ENV PYTHONPATH=/opt/algorithm:/app + +# Configure main entrypoint +ENTRYPOINT ["python3", "-m", "mhubio.run"] +CMD ["--config", "/app/models/gc_nnunet_pancreas/config/default.yml"] diff --git a/models/gc_nnunet_pancreas/meta.json b/models/gc_nnunet_pancreas/meta.json new file mode 100644 index 00000000..9eb831c1 --- /dev/null +++ b/models/gc_nnunet_pancreas/meta.json @@ -0,0 +1,149 @@ +{ + "id": "bf7ae4bb-c6f5-4b1e-89aa-a8de246def57", + "name": "gc_nnunet_pancreas", + "title": "Pancreatic Ductal Adenocarcinoma Detection in CT", + "summary": { + "description": "This algorithm produces a tumor likelihood heatmap for the presence of pancreatic ductal adenocarcinoma (PDAC) in an input venous-phase contrast-enhanced computed tomography scan (CECT). Additionally, the algorithm provides the segmentation of multiple surrounding anatomical structures such as the pancreatic duct, common bile duct, veins and arteries. The heatmap and segmentations are resampled to the same spatial resolution and physical dimensions as the input CECT image for easier visualisation.", + "inputs": [ + { + "label": "Venous phase CT scan", + "description": "A contrast-enhanced CT scan in the venous phase and axial reconstruction", + "format": "DICOM", + "modality": "CT", + "bodypartexamined": "Abdomen", + "slicethickness": "2.5mm", + "non-contrast": false, + "contrast": false + } + ], + "outputs": [ + { + "type": "Prediction", + "valueType": "Likelihood map", + "label": "Pancreatic tumor likelihood heatmap", + "description": "Pancreatic tumor likelihood heatmap, where each voxel represents a floating point in range [0,1].", + "classes": [] + }, + { + "type": "Prediction", + "valueType": "Likelihood", + "label": "Pancreatic tumor likelihood", + "description": "Case-level pancreatic tumor likelihood value with a value in range [0,1].", + "classes": [] + }, + { + "type": "Segmentation", + "label": "Pancreas segmentation", + "description": "Segmentation of pancreas related tissues, these segmentation classes were not thoroughly validated, use them on your own risk!", + "classes": [ + "VEIN", + "ARTERY", + "PANCREAS", + "PANCREATIC_DUCT", + "BILE_DUCT" + ] + } + ], + "model": { + "architecture": "nnUnet ", + "training": "supervised", + "cmpapproach": "3D" + }, + "data": { + "training": { + "vol_samples": 242 + }, + "evaluation": { + "vol_samples": 361 + }, + "public": true, + "external": false + } + }, + "details": { + "name": "Fully Automatic Deep Learning Framework for Pancreatic Ductal Adenocarcinoma Detection on Computed Tomography", + "version": "0.1.0", + "devteam": "DIAGNijmegen (Diagnostic Image Analysis Group, Radboud UMC, The Netherlands)", + "type": "The models were developed using nnUnet. All models employed a 3D U-Net as the base architecture and were trained for 250.000 training steps with five-fold cross-validation.", + "date": { + "weights": "2023-06-28", + "code": "2022-07-19", + "pub": "2022-01-13" + }, + "cite": "Alves N, Schuurmans M, Litjens G, Bosma JS, Hermans J, Huisman H. Fully Automatic Deep Learning Framework for Pancreatic Ductal Adenocarcinoma Detection on Computed Tomography. Cancers (Basel). 2022 Jan 13;14(2):376. doi: 10.3390/cancers14020376. PMID: 35053538; PMCID: PMC8774174.", + "license": { + "code": "Apache 2.0", + "weights": "Apache 2.0" + }, + "publications": [ + { + "title": "Fully Automatic Deep Learning Framework for Pancreatic Ductal Adenocarcinoma Detection on Computed Tomography ", + "uri": "https://www.mdpi.com/2072-6694/14/2/376" + } + ], + "github": "https://github.com/DIAGNijmegen/CE-CT_PDAC_AutomaticDetection_nnUnet", + "zenodo": "", + "colab": "", + "slicer": false + }, + "info": { + "use": { + "title": "Intended Use", + "text": "This algorithm is intended to be used only on venous-phase CECT examinations of patients with clinical suspicion of PDAC. This algorithm should not be used in different patient demographics.", + "references": [], + "tables": [] + }, + "analyses": { + "title": "Analysis", + "text": "The study evaluated a medical model's performance for tumor detection by analyzing receiver operating characteristic (ROC) and free-response receiver operating characteristic (FROC) curves, assessing both tumor presence and lesion localization, and compared three configurations using statistical tests and ensemble modeling. The table below lists the model's performance on an external evaluation dataset of 361 cases. Additional analysis details and results can be found in the original paper [1].", + "references": [ + { + "label": "Fully Automatic Deep Learning Framework for Pancreatic Ductal Adenocarcinoma Detection on Computed Tomography", + "uri": "https://www.mdpi.com/2072-6694/14/2/376" + } + ], + "tables": [ + { + "label": "Evaluation results of the nnUnet_MS model on the external test set of 361 cases.", + "entries": { + "Mean AUC-ROC (95% CI)": "0.991 (0.970-1.0)", + "Mean pAUC-FROC (95% CI)": "3.996 (3.027-4.965)" + } + } + ] + }, + "evaluation": { + "title": "Evaluation Data", + "text": "This framework was tested in an independent, external cohort consisting of two publicly available datasets of respectively 281 and 80 patients each. The Medical Segmentation Decathlon pancreas dataset (training portion) [1] consisting of 281 patients with pancreatic malignancies (including lesions in the head, neck, body, and tail of the pancreas) and voxel-level annotations for the pancreas and lesion. The Cancer Imaging Archive dataset from the US National Institutes of Health Clinical Center [2], containing 80 patients with normal pancreas and respective voxel-level annotations.", + "references": [ + { + "label": "The Medical Segmentation Decathlon pancreas dataset (training portion)", + "uri": "http://medicaldecathlon.com/" + }, + { + "label": "The Cancer Imaging Archive dataset from the US National Institutes of Health Clinical Center", + "uri": "https://wiki.cancerimagingarchive.net/display/Public/Pancreas-CT" + } + ], + "tables": [] + }, + "training": { + "title": "Training data", + "text": "CE-CT scans in the portal venous phase from 119 patients with pathology-proven PDAC in the pancreatic head (PDAC cohort) and 123 patients with normal pancreas (non-PDAC cohort), acquired between 1 January 2013 and 1 June 2020, were selected for model development.", + "references": [], + "tables": [] + }, + "ethics": { + "title": "", + "text": "", + "references": [], + "tables": [] + }, + "limitations": { + "title": "Before using this model", + "text": "Test the model retrospectively and prospectively on a diagnostic cohort that reflects the target population that the model will be used upon to confirm the validity of the model within a local setting.", + "references": [], + "tables": [] + } + } +} \ No newline at end of file diff --git a/models/gc_nnunet_pancreas/utils/GCNNUnetPancreasRunner.py b/models/gc_nnunet_pancreas/utils/GCNNUnetPancreasRunner.py new file mode 100644 index 00000000..770c203d --- /dev/null +++ b/models/gc_nnunet_pancreas/utils/GCNNUnetPancreasRunner.py @@ -0,0 +1,85 @@ +""" +----------------------------------------------------------- +GC / MHub - Run Module for the GC NNUnet Pancreas Algorithm +----------------------------------------------------------- + +----------------------------------------------------------- +Author: Sil van de Leemput +Email: sil.vandeleemput@radboudumc.nl +----------------------------------------------------------- +""" + +from mhubio.core import Module, Instance, InstanceData, DataType, Meta, IO, ValueOutput + +from pathlib import Path +import SimpleITK +import sys + + +CLI_PATH = Path(__file__).parent / "cli.py" + + +@ValueOutput.Name('prostate_cancer_likelihood') +@ValueOutput.Label('ProstateCancerLikelihood') +@ValueOutput.Meta(Meta(min=0.0, max=1.0, type="likelihood")) +@ValueOutput.Type(float) +@ValueOutput.Description('Likelihood of case-level prostate cancer.') +class ProstateCancerLikelihood(ValueOutput): + pass + + +class GCNNUnetPancreasRunner(Module): + @IO.Instance() + @IO.Input('in_data', 'mha:mod=ct', the="input data") + @IO.Output('heatmap', 'heatmap.mha', 'mha:mod=heatmap:model=GCNNUnetPancreas', data="in_data", + the="raw heatmap of the pancreatic tumor likelihood (not masked with any pancreas segmentations).") + @IO.Output('segmentation_raw', 'segmentation_raw.mha', 'mha:mod=seg:src=original:model=GCNNUnetPancreas:roi=VEIN,ARTERY,PANCREAS,PANCREATIC_DUCT,BILE_DUCT,PANCREAS+CYST,RENAL_VEIN', data="in_data", + the="original segmentation of the pancreas, with the following classes: " + "0-background, 1-veins, 2-arteries, 3-pancreas, 4-pancreatic duct, 5-bile duct, 6-cysts, 7-renal vein") + @IO.Output('segmentation', 'segmentation.mha', 'mha:mod=seg:src=cleaned:model=GCNNUnetPancreas:roi=VEIN,ARTERY,PANCREAS,PANCREATIC_DUCT,BILE_DUCT', data="in_data", + the="cleaned segmentation of the pancreas, with the following classes: " + "0-background, 1-veins, 2-arteries, 3-pancreas, 4-pancreatic duct, 5-bile duct") + @IO.OutputData('cancer_likelihood', ProstateCancerLikelihood, the='Case-level pancreatic tumor likelihood. This is equivalent to the maximum of the pancreatic tumor likelihood heatmap.') + def task(self, instance: Instance, in_data: InstanceData, heatmap: InstanceData, segmentation_raw: InstanceData, segmentation: InstanceData, cancer_likelihood: ProstateCancerLikelihood, **kwargs) -> None: + # Call the PDAC CLI + # A CLI was used here to ensure the mhub framework properly captures the nnUNet stdout output + cmd = [ + sys.executable, + str(CLI_PATH), + in_data.abspath, + heatmap.abspath, + segmentation_raw.abspath + ] + self.subprocess(cmd, text=True) + + # Remove cysts and renal vein classes from the original segmentation. + # Insufficient training samples were present in the training data for these classes. + # Hence, these classes should be omitted from the final output, since these are not + # expected to produce reliable segmentations. + self.clean_segementation( + segmentation_in=segmentation_raw, + segmentation_out=segmentation + ) + + # Extract case-level cancer likelihood + cancer_likelihood.value = self.extract_case_level_cancer_likelihood( + heatmap=heatmap + ) + + def clean_segementation(self, segmentation_in: InstanceData, segmentation_out: InstanceData): + self.log("Cleaning output segmentation", level="NOTICE") + seg_sitk = SimpleITK.ReadImage(segmentation_in.abspath) + seg_numpy = SimpleITK.GetArrayFromImage(seg_sitk) + seg_numpy[seg_numpy >= 6] = 0 # remove cysts and renal vein segmentation from original segmentation + remapped_sitk = SimpleITK.GetImageFromArray(seg_numpy) + remapped_sitk.CopyInformation(seg_sitk) + SimpleITK.WriteImage(remapped_sitk, segmentation_out.abspath, True) + + def extract_case_level_cancer_likelihood(self, heatmap: InstanceData): + self.log("Extracting case-level cancer likelihood", level="NOTICE") + heatmap_sitk = SimpleITK.ReadImage(heatmap.abspath) + f = SimpleITK.MinimumMaximumImageFilter() + f.Execute(heatmap_sitk) + cancer_likelihood = f.GetMaximum() + assert 0.0 <= cancer_likelihood <= 1.0, "Cancer likelihood value must be in range [0.0, 1.0]" + return cancer_likelihood diff --git a/models/gc_nnunet_pancreas/utils/__init__.py b/models/gc_nnunet_pancreas/utils/__init__.py new file mode 100644 index 00000000..683c17d1 --- /dev/null +++ b/models/gc_nnunet_pancreas/utils/__init__.py @@ -0,0 +1 @@ +from .GCNNUnetPancreasRunner import * diff --git a/models/gc_nnunet_pancreas/utils/cli.py b/models/gc_nnunet_pancreas/utils/cli.py new file mode 100644 index 00000000..99af524e --- /dev/null +++ b/models/gc_nnunet_pancreas/utils/cli.py @@ -0,0 +1,60 @@ +""" +------------------------------------------------------------- +GC / MHub - CLI for the GC nnUnet Pancreas Algorithm + The model algorith was wrapped in a CLI to ensure + the mhub framework is able to properly capture the nnUNet + stdout/stderr outputs +------------------------------------------------------------- + +------------------------------------------------------------- +Author: Sil van de Leemput +Email: sil.vandeleemput@radboudumc.nl +------------------------------------------------------------- +""" +import argparse +from pathlib import Path + +# Import the algorithm pipeline class from the CE-CT_PDAC_AutomaticDetection_nnUnet repository +from process import PDACDetectionContainer + + +def run_pdac_detection( + input_ct_image: Path, output_heatmap: Path, output_segmentation: Path +): + # Configure the algorithm pipeline class and run it + algorithm = PDACDetectionContainer(output_raw_heatmap=True) + algorithm.ct_image = input_ct_image + algorithm.heatmap_raw = output_heatmap + algorithm.segmentation = output_segmentation + algorithm.process() + + +def run_pdac_detection_cli(): + parser = argparse.ArgumentParser("CLI for the GC nnUNet Pancreas Algorithm") + parser.add_argument( + "input_ct_image", + type=str, + help="input CT scan (MHA)" + ) + parser.add_argument( + "output_heatmap", + type=str, + help="raw heatmap of the pancreatic tumor likelihood (MHA)", + ) + parser.add_argument( + "output_segmentation", + type=str, + help="segmentation map of the pancreas (MHA), with the following classes: " + "0-background, 1-veins, 2-arteries, 3-pancreas, 4-pancreatic duct, 5-bile duct, " + "6-cysts, 7-renal vein", + ) + args = parser.parse_args() + run_pdac_detection( + input_ct_image=Path(args.input_ct_image), + output_heatmap=Path(args.output_heatmap), + output_segmentation=Path(args.output_segmentation), + ) + + +if __name__ == "__main__": + run_pdac_detection_cli() diff --git a/models/gc_picai_baseline/config/default.yml b/models/gc_picai_baseline/config/default.yml new file mode 100644 index 00000000..bb5d5deb --- /dev/null +++ b/models/gc_picai_baseline/config/default.yml @@ -0,0 +1,34 @@ +general: + data_base_dir: /app/data + version: 1.0 + description: Prostate MRI classification default (dicom to json) + +execute: +- FileStructureImporter +- MhaConverter +- PicaiBaselineRunner +- ReportExporter +- DataOrganizer + +modules: + FileStructureImporter: + input_dir: input_data + structures: + - $sid@instance/$type@dicom:mod=mr + import_id: sid + + MhaConverter: + engine: panimg + allow_multi_input: true + + ReportExporter: + format: compact + includes: + - data: prostate_cancer_likelihood + label: prostate_cancer_likelihood + value: value + + DataOrganizer: + targets: + - json:mod=report-->[i:sid]/cspca-case-level-likelihood.json + - mha:mod=dm-->[i:sid]/cspca-detection-map.mha diff --git a/models/gc_picai_baseline/config/mha-pipeline.yml b/models/gc_picai_baseline/config/mha-pipeline.yml new file mode 100644 index 00000000..f20d5abc --- /dev/null +++ b/models/gc_picai_baseline/config/mha-pipeline.yml @@ -0,0 +1,31 @@ +general: + data_base_dir: /app/data + version: 1.0 + description: Prostate MRI classification MHA pipeline (mha to json) + +execute: +- FileStructureImporter +- PicaiBaselineRunner +- ReportExporter +- DataOrganizer + +modules: + FileStructureImporter: + input_dir: input_data + structures: + - $sid@instance/images/transverse-adc-prostate-mri/adc.mha@mha:mod=mradc + - $sid/images/transverse-t2-prostate-mri/t2w.mha@mha:mod=mrt2 + - $sid/images/transverse-hbv-prostate-mri/hbv.mha@mha:mod=mrhbv + import_id: sid + + ReportExporter: + format: compact + includes: + - data: prostate_cancer_likelihood + label: prostate_cancer_likelihood + value: value + + DataOrganizer: + targets: + - json:mod=report-->[i:sid]/cspca-case-level-likelihood.json + - mha:mod=hm-->[i:sid]/cspca-detection-map.mha diff --git a/models/gc_picai_baseline/dockerfiles/Dockerfile b/models/gc_picai_baseline/dockerfiles/Dockerfile new file mode 100644 index 00000000..7c2af162 --- /dev/null +++ b/models/gc_picai_baseline/dockerfiles/Dockerfile @@ -0,0 +1,55 @@ +FROM mhubai/base:latest + +# Specify/override authors label +LABEL authors="sil.vandeleemput@radboudumc.nl" + +# Install PyTorch 2.0.1 (CUDA enabled) +RUN pip3 install --no-cache-dir torch==2.0.1+cu118 -f https://download.pytorch.org/whl/torch_stable.html + +# Install git-lfs (required for unpacking model weights) +RUN apt update && \ + apt install -y --no-install-recommends git-lfs && \ + rm -rf /var/lib/apt/lists/* + +# Install PICAI baseline algorithm and model weights +# - Git clone the algorithm repository for v2.1.2 (fixed to v2.1.2 tag) +# - We remove unnecessary files for a compacter docker layer +# - Subsequently we remove the .git directory to procuce a compacter docker layer +RUN git clone --depth 1 --branch v2.1.2 https://github.com/DIAGNijmegen/picai_nnunet_semi_supervised_gc_algorithm.git /opt/algorithm && \ + rm -rf /opt/algorithm/test && \ + rm -rf /opt/algorithm/.git + +# Set this environment variable as a shortcut to avoid nnunet==1.7.0 crashing the build +# by pulling sklearn instead of scikit-learn +# N.B. this is a known issue: +# https://github.com/MIC-DKFZ/nnUNet/issues/1281 +# https://github.com/MIC-DKFZ/nnUNet/pull/1209 +ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True + +# Install additional PICAI requirements +RUN pip3 install --no-cache-dir -r /opt/algorithm/requirements.txt + +# Extend the nnUNet installation with custom trainers +RUN SITE_PKG=`pip3 show nnunet | grep "Location:" | awk '{print $2}'` && \ + mv /opt/algorithm/nnUNetTrainerV2_focalLoss.py "$SITE_PKG/nnunet/training/network_training/nnUNet_variants/loss_function/nnUNetTrainerV2_focalLoss.py" +RUN SITE_PKG=`pip3 show nnunet | grep "Location:" | awk '{print $2}'` && \ + mv /opt/algorithm/nnUNetTrainerV2_Loss_CE_checkpoints.py "$SITE_PKG/nnunet/training/network_training/nnUNetTrainerV2_Loss_CE_checkpoints.py" +RUN SITE_PKG=`pip3 show nnunet | grep "Location:" | awk '{print $2}'` && \ + mv /opt/algorithm/nnUNetTrainerV2_Loss_FL_and_CE.py "$SITE_PKG/nnunet/training/network_training/nnUNetTrainerV2_Loss_FL_and_CE.py" + +# Two code edits to the __init__ method of the algorithm class in process.py to prevent some of its default behavior +# 1. Skip forced error caused by using a different input locations than expected (we don't use the GC dirs) +# 2. Prevent unnecessary folder creation before input directories have been set (we will set the correct directory later) +RUN sed -i "s|file_paths = list(Path(folder).glob(scan_glob_format))|return|g" /opt/algorithm/process.py && \ + sed -i "s|self.cspca_detection_map_path.parent.mkdir(exist_ok=True, parents=True)||g" /opt/algorithm/process.py + +# Import the MHub model definiton +ARG MHUB_MODELS_REPO +RUN buildutils/import_mhub_model.sh gc_picai_baseline ${MHUB_MODELS_REPO} + +# Add lobe segmentation code base to python path +ENV PYTHONPATH="/app:/opt/algorithm" + +# Default entrypoint +ENTRYPOINT ["python3", "-m", "mhubio.run"] +CMD ["--config", "/app/models/gc_picai_baseline/config/default.yml"] diff --git a/models/gc_picai_baseline/meta.json b/models/gc_picai_baseline/meta.json new file mode 100644 index 00000000..4e270db4 --- /dev/null +++ b/models/gc_picai_baseline/meta.json @@ -0,0 +1,179 @@ +{ + "id": "c5f886fb-9f54-4555-a954-da02b22d6d3f", + "name": "gc_picai_baseline", + "title": "PI-CAI challenge baseline", + "summary": { + "description": "This algorithm predicts a detection map for the likelihood of clinically significant prostate cancer (csPCa) using biparametric MRI (bpMRI). The algorithm ensembles 5-fold cross-validation models that were trained on the PI-CAI: Public Training and Development Dataset v2.0. The detection map is at the same spatial resolution and physical dimensions as the input axial T2-weighted image. This model algorithm was used as a baseline for the PI-CAI challenge hosted on Grand Challenge.", + "inputs": [ + { + "label": "Transverse T2-weighted prostate biparametric MRI", + "description": "Transverse T2-weighted prostate biparametric MRI exam.", + "format": "DICOM", + "modality": "MR", + "bodypartexamined": "Prostate", + "slicethickness": "0.5 x 0.5 x 3.0 mm", + "non-contrast": false, + "contrast": false + }, + { + "label": "Transverse high b-value diffusion-weighted maps of the prostate", + "description": "Transverse high b-value diffusion-weighted (DWI) maps, with b-value of 1400 or 2000, either acquired or vendor-calculated.", + "format": "DICOM", + "modality": "MR", + "bodypartexamined": "Prostate", + "slicethickness": "0.5 x 0.5 x 3.0 mm", + "non-contrast": false, + "contrast": false + }, + { + "label": "Transverse apparent diffusion coefficient map of the prostate", + "description": "Transverse apparent diffusion coefficient (ADC) prostate MRI map.", + "format": "DICOM", + "modality": "MR", + "bodypartexamined": "Prostate", + "slicethickness": "0.5 x 0.5 x 3.0 mm", + "non-contrast": false, + "contrast": false + } + ], + "outputs": [ + { + "type": "Prediction", + "valueType": "Likelihood", + "label": "Prostate cancer likelihood", + "description": "Case-level likelihood of harboring clinically significant prostate cancer, in range [0,1].", + "classes": [] + }, + { + "type": "Prediction", + "valueType": "Likelihood map", + "label": "Transverse cancer detection map", + "description": "Detection map of clinically significant prostate cancer lesions in 3D, where each voxel represents a floating point in range [0,1]. This map is at the same spatial resolution and physical dimensions as the input transversal T2-weighted image.", + "classes": [] + } + ], + "model": { + "architecture": "3d fullres nnUNet", + "training": "semi-supervised", + "cmpapproach": "3D" + }, + "data": { + "training": { + "vol_samples": 1500 + }, + "evaluation": { + "vol_samples": 1000 + }, + "public": false, + "external": false + } + }, + "details": { + "name": "PI-CAI challenge baseline", + "version": "2.1.1", + "devteam": "Diagnostic Image Analysis Group, Radboud University Medical Center, Nijmegen, The Netherlands", + "type": "Prediction", + "date": { + "weights": "2022-06-22", + "code": "2022-09-05", + "pub": "" + }, + "cite": "J. S. Bosma, A. Saha, M. Hosseinzadeh, I. Slootweg, M. de Rooij, and H. Huisman, \"Semisupervised Learning with Report-guided Pseudo Labels for Deep Learning–based Prostate Cancer Detection Using Biparametric MRI\", Radiology: Artificial Intelligence, 230031, 2023. DOI: 10.1148/ryai.230031", + "license": { + "code": "Apache 2.0", + "weights": "CC-BY-NC-SA-4.0" + }, + "publications": [ + { + "uri": "https://doi.org/10.5281/zenodo.6667655", + "title": "Artificial Intelligence and Radiologists at Prostate Cancer Detection in MRI: The PI-CAI Challenge (Study Protocol)" + }, + { + "uri": "https://pubs.rsna.org/doi/10.1148/ryai.230031", + "title": "Semisupervised Learning with Report-guided Pseudo Labels for Deep Learning–based Prostate Cancer Detection Using Biparametric MRI" + } + ], + "github": "https://github.com/DIAGNijmegen/picai_nnunet_semi_supervised_gc_algorithm", + "zenodo": "", + "colab": "", + "slicer": false + }, + "info": { + "use": { + "title": "Intended use", + "text": "This algorithm is a deep learning-based detection/diagnosis model, which ensembles 5 independent nnU-Net models (5-fold cross-validation). To predict the likelihood of harboring clinically significant prostate cancer (csPCa), the transversal T2-weighted, apparent diffusion coefficient (ADC) and high b-value diffusion weighted maps are required. The input sequences should be co-registered or aligned reasonably well and the prostate gland should be localized within a volume of 460 cm³ from the centre coordinate. The nnU-Net framework will internally resample all input scans to 0.5 x 0.5 x 3.0 mm. Per case the input data should be put into the following folder structure: `case1/adc`, `case1/hbv`, `case1/t2w`, corresponding respectively with the ADC, high b-value DWI, and the T2 weighted MR inputs for a case called `case1`.", + "references": [ + { + "label": "PI-CAI baseline algorithm on grand-challenge", + "uri": "https://grand-challenge.org/algorithms/pi-cai-baseline-nnu-net-semi-supervised/" + } + ], + "tables": [] + }, + "analyses": { + "title": "Evaluation", + "text": "Patient-level diagnosis performance is evaluated using the Area Under Receiver Operating Characteristic (AUROC) metric. Lesion-level detection performance is evaluated using the Average Precision (AP) metric.", + "references": [ + { + "label": "PI-CAI AI challenge details", + "uri": "https://pi-cai.grand-challenge.org/AI/" + }, + { + "label": "PI-CAI baseline algorithm evaluation results on grand-challenge.", + "uri": "https://pi-cai.grand-challenge.org/evaluation/fe187cdb-cb61-4cbb-ab63-2de483a52d60/" + } + ], + "tables": [ + { + "label": "Evaluation results on the PI-CAI testing cohort of 1000 cases.", + "entries": { + "AUROC": "0.865", + "AP": "0.576" + } + } + ] + }, + "evaluation": { + "title": "Evaluation data", + "text": "The PI-CAI Hidden Testing Cohort (1000 cases) includes internal testing data (unseen cases from seen centers) and external testing data (unseen cases from an unseen center).", + "references": [ + { + "label": "PI-CAI data section", + "uri": "https://pi-cai.grand-challenge.org/DATA/" + } + ], + "tables": [] + }, + "training": { + "title": "Training data", + "text": "The publicly available PI-CAI training and development dataset of 1500 biparametric MRI exams was used for training [1]. AI-derived annotations were created for cases without manual annotations [2]. This model was trained using a custom preprocessing step followed by the standard nnU-Net pipeline. The default nnU-Net loss-function was changed to Cross-Entropy + Focal loss [3].", + "references": [ + { + "label": "PI-CAI publicly available training and development dataset", + "uri": "https://zenodo.org/record/6624726" + }, + { + "label": "Method to obtain AI-derived annotations", + "uri": "https://fastmri.eu/research/bosma22a.html" + }, + { + "label": "Detailed description of training method", + "uri": "https://github.com/DIAGNijmegen/picai_baseline/blob/main/nnunet_baseline.md" + } + ], + "tables": [] + }, + "ethics": { + "title": "", + "text": "", + "references": [], + "tables": [] + }, + "limitations": { + "title": "Limitations", + "text": "This algorithm was developed for research purposes only.", + "references": [], + "tables": [] + } + } +} \ No newline at end of file diff --git a/models/gc_picai_baseline/utils/PicaiBaselineRunner.py b/models/gc_picai_baseline/utils/PicaiBaselineRunner.py new file mode 100644 index 00000000..84dc1474 --- /dev/null +++ b/models/gc_picai_baseline/utils/PicaiBaselineRunner.py @@ -0,0 +1,65 @@ +""" +--------------------------------------------------------- +Mhub / DIAG - Run Module for the PICAI baseline Algorithm +--------------------------------------------------------- + +--------------------------------------------------------- +Author: Sil van de Leemput +Email: sil.vandeleemput@radboudumc.nl +--------------------------------------------------------- +""" + +import json +import sys +from pathlib import Path + +from mhubio.core import Instance, InstanceData, IO, Module, ValueOutput, ClassOutput, Meta + + +CLI_PATH = Path(__file__).parent / "cli.py" + + +@ValueOutput.Name('prostate_cancer_likelihood') +@ValueOutput.Label('ProstateCancerLikelihood') +@ValueOutput.Type(float) +@ValueOutput.Description('Likelihood of case-level prostate cancer.') +class ProstateCancerLikelihood(ValueOutput): + pass + + +class PicaiBaselineRunner(Module): + + @IO.Instance() + @IO.Input('in_data_t2', 'mha:mod=mr:type=t2w', the='input T2 weighted prostate MR image') + @IO.Input('in_data_adc', 'mha:mod=mr:type=adc', the='input ADC prostate MR image') + @IO.Input('in_data_hbv', 'mha:mod=mr:type=hbv', the='input HBV prostate MR image') + @IO.Output('cancer_likelihood_json', 'cspca-case-level-likelihood.json', "json", bundle='model', the='output JSON file with PICAI baseline prostate cancer likelihood') + @IO.Output('cancer_lesion_detection_map', 'cspca-detection-map.mha', "mha:mod=dm", bundle='model', the='output detection map of clinically significant prostate cancer lesions in 3D, where each voxel represents a floating point in range [0,1]') + @IO.OutputData('cancer_likelihood', ProstateCancerLikelihood, the='PICAI baseline prostate cancer likelihood') + def task(self, instance: Instance, in_data_t2: InstanceData, in_data_adc: InstanceData, in_data_hbv: InstanceData, cancer_likelihood_json: InstanceData, cancer_lesion_detection_map: InstanceData, cancer_likelihood: ProstateCancerLikelihood) -> None: + # build command (order matters!) + cmd = [ + sys.executable, + str(CLI_PATH), + in_data_t2.abspath, + in_data_adc.abspath, + in_data_hbv.abspath, + cancer_likelihood_json.abspath, + cancer_lesion_detection_map.abspath, + ] + + # run the command as subprocess + self.subprocess(cmd, text=True) + + # Extract cancer likelihood value from cancer_likelihood_file + if not Path(cancer_likelihood_json.abspath).is_file(): + raise FileNotFoundError(f"Output file {cancer_likelihood_json.abspath} could not be found!") + + with open(cancer_likelihood_json.abspath, "r") as f: + cancer_lh = float(json.load(f)) + + if not (isinstance(cancer_lh, (float, int)) and (0.0 <= cancer_lh <= 1.0)): + raise ValueError(f"Cancer likelihood value should be between 0 and 1, found: {cancer_lh}") + + # Output the predicted values + cancer_likelihood.value = cancer_lh diff --git a/models/gc_picai_baseline/utils/__init__.py b/models/gc_picai_baseline/utils/__init__.py new file mode 100644 index 00000000..a0ec22bc --- /dev/null +++ b/models/gc_picai_baseline/utils/__init__.py @@ -0,0 +1 @@ +from .PicaiBaselineRunner import * diff --git a/models/gc_picai_baseline/utils/cli.py b/models/gc_picai_baseline/utils/cli.py new file mode 100644 index 00000000..deaf9ecf --- /dev/null +++ b/models/gc_picai_baseline/utils/cli.py @@ -0,0 +1,54 @@ +""" +-------------------------------------------------- +Mhub / DIAG - CLI for the PICAI baseline Algorithm +-------------------------------------------------- + +-------------------------------------------------- +Author: Sil van de Leemput +Email: sil.vandeleemput@radboudumc.nl +-------------------------------------------------- +""" + +import argparse +from pathlib import Path +from process import csPCaAlgorithm as PicaiClassifier + + +def run_classifier(t2: Path, adc: Path, hbv: Path, cancer_likelihood_json: Path, cancer_lesion_detection_map: Path): + # Initialize classifier object + classifier = PicaiClassifier() + + # Specify input files (the order is important!) + classifier.scan_paths = [ + t2, + adc, + hbv, + ] + + # Specify output files + classifier.cspca_detection_map_path = cancer_lesion_detection_map + classifier.case_confidence_path = cancer_likelihood_json + + # Run the classifier on the input images + classifier.process() + + +def run_classifier_cli(): + parser = argparse.ArgumentParser("CLI to run the PICAI baseline classifier") + parser.add_argument("input_t2", type=str, help="input T2 weighted prostate MR image (MHA)") + parser.add_argument("input_adc", type=str, help="input ADC prostate MR image (MHA") + parser.add_argument("input_hbv", type=str, help="input HBV prostate MR image (MHA)") + parser.add_argument("output_cancer_likelihood_json", type=str, help="output JSON file with PICAI baseline prostate cancer likelihood (JSON)") + parser.add_argument("output_cancer_lesion_detection_map", type=str, help="output detection map of clinically significant prostate cancer lesions in 3D (MHA)") + args = parser.parse_args() + run_classifier( + t2=Path(args.input_t2), + adc=Path(args.input_adc), + hbv=Path(args.input_hbv), + cancer_likelihood_json=Path(args.output_cancer_likelihood_json), + cancer_lesion_detection_map=Path(args.output_cancer_lesion_detection_map), + ) + + +if __name__ == "__main__": + run_classifier_cli() diff --git a/models/lungmask/meta.json b/models/lungmask/meta.json index e24fb649..c8fafc07 100644 --- a/models/lungmask/meta.json +++ b/models/lungmask/meta.json @@ -28,7 +28,7 @@ } ], "model": { "architecture": "U-net", - "training": "Supervised", + "training": "supervised", "cmpapproach": "2D" }, "data": { @@ -38,7 +38,7 @@ "evaluation": { "vol_samples": 191 }, - "public": "Partially", + "public": false, "external": true } }, diff --git a/models/monai_prostate158/config/default.yml b/models/monai_prostate158/config/default.yml new file mode 100644 index 00000000..4f8c1a57 --- /dev/null +++ b/models/monai_prostate158/config/default.yml @@ -0,0 +1,36 @@ +general: + data_base_dir: /app/data + version: 1.0 + description: default configuration for MONAI Prostate158 MR Prostate zonal regions segmentation (dicom to dicom) + +execute: +- DicomImporter +- NiftiConverter +- Prostate158Runner +- DsegConverter +- DataOrganizer + +modules: + DicomImporter: + source_dir: input_data + import_dir: sorted_data + sort_data: true + meta: + mod: '%Modality' + + NiftiConverter: + in_datas: dicom:mod=mr + engine: dcm2niix + + Prostate158Runner: + in_data: nifti:mod=mr + + DsegConverter: + source_segs: nifti:mod=seg:roi=PROSTATE_TRANSITION_ZONE,PROSTATE_PERIPHERAL_ZONE + target_dicom: dicom:mod=mr + model_name: 'MONAI Prostate158' + skip_empty_slices: True + + DataOrganizer: + targets: + - dicomseg-->[i:sid]/monai_prostate158.seg.dcm diff --git a/models/monai_prostate158/dockerfiles/Dockerfile b/models/monai_prostate158/dockerfiles/Dockerfile new file mode 100644 index 00000000..22cab1b7 --- /dev/null +++ b/models/monai_prostate158/dockerfiles/Dockerfile @@ -0,0 +1,30 @@ +FROM mhubai/base:latest + +# FIXME: set this environment variable as a shortcut to avoid nnunet crashing the build +# by pulling sklearn instead of scikit-learn +# N.B. this is a known issue: +# https://github.com/MIC-DKFZ/nnUNet/issues/1281 +# https://github.com/MIC-DKFZ/nnUNet/pull/1209 +ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True + +ARG MONAI_BUNDLE_DIR='https://github.com/Project-MONAI/model-zoo/releases/download/hosting_storage_v1/prostate_mri_anatomy_v0.1.0.zip' +ARG MONAI_MODEL_NAME='prostate_mri_anatomy' + +# Install nnunet and platipy +RUN python3 -m pip install --upgrade pip && pip3 install --no-cache-dir "monai[ignite]" fire nibabel simpleITK + +# Clone the main branch of MHubAI/models +ARG MHUB_MODELS_REPO +RUN buildutils/import_mhub_model.sh monai_prostate158 ${MHUB_MODELS_REPO} + +# Pull weights into the container +ENV WEIGHTS_DIR=/app/models/monai_prostate158/bundle +RUN mkdir -p $WEIGHTS_DIR +RUN python3 -m monai.bundle download "prostate_mri_anatomy" --bundle_dir ${WEIGHTS_DIR} + +#define path to bundle root +ENV BUNDLE_ROOT=/app/models/monai_prostate158/bundle/prostate_mri_anatomy + +# Default run script +ENTRYPOINT ["mhub.run"] +CMD ["--config", "/app/models/monai_prostate158/config/default.yml"] diff --git a/models/monai_prostate158/meta.json b/models/monai_prostate158/meta.json new file mode 100644 index 00000000..ad964732 --- /dev/null +++ b/models/monai_prostate158/meta.json @@ -0,0 +1,131 @@ +{ + "id": "...", + "name": "monai_prostate158", + "title": "Prostate158 (Prostate transitional zone and peripheral zone segmentation)", + "summary": { + "description": "Prostate158 is a zonal prostate segmentation model, a multi-modality input AI-based pipeline for the automated segmentation of the peripheral and central gland of the prostate on MRI T2 axial scans.", + "inputs": [ + { + "label": "T2 input image", + "description": "The T2 axial sequence being one of the two input image", + "format": "DICOM", + "modality": "MR", + "bodypartexamined": "Prostate", + "slicethickness": "3 mm", + "non-contrast": true, + "contrast": false + } + ], + "outputs": [ + { + "type": "Segmentation", + "classes": [ + "PROSTATE_TRANSITION_ZONE", + "PROSTATE_PERIPHERAL_ZONE" + ] + } + ], + "model": { + "architecture": "U-net", + "training": "supervised", + "cmpapproach": "3D" + }, + "data": { + "training": { + "vol_samples": 139 + }, + "evaluation": { + "vol_samples": 20 + }, + "public": true, + "external": false + } + }, + "details": { + "name": "Prostate158 - An expert-annotated 3T MRI dataset and algorithm for prostate cancer detection", + "version": "1.0.0", + "devteam": "Lisa C. Adams, Keno K. Bressem", + "type": "Prostate158 (U-Net structure for prostate segmentation)", + "date": { + "weights": "March 2022", + "code": "April 2022", + "pub": "September 2022" + }, + "cite": "Lisa C. Adams and Marcus R. Makowski and Günther Engel and Maximilian Rattunde and Felix Busch and Patrick Asbach and Stefan M. Niehues and Shankeeth Vinayahalingam and Bram {van Ginneken} and Geert Litjens and Keno K. Bressem, Prostate158 - An expert-annotated 3T MRI dataset and algorithm for prostate cancer detection", + "license": { + "code": "MIT", + "weights": "CC BY-NC 4.0" + }, + "publications": [ + { + "title": "Prostate158 - An expert-annotated 3T MRI dataset and algorithm for prostate cancer detection", + "uri": "https://doi.org/10.1016/j.compbiomed.2022.105817" + } + ], + "github": "https://github.com/Project-MONAI/model-zoo/tree/dev/models/prostate_mri_anatomy", + "zenodo": "https://zenodo.org/records/6481141" + }, + "info": { + "use": { + "title": "Intended Use", + "text": "This model is intended to perform prostate regions anatomy segmentation in MR ADC and T2 scans. The slice thickness of the training data is 3mm. T2 input modality is used during training. To align with the model training pre-processing scheme, center-cropping of the input T2 image is recommended. No endorectal coil was present during training." + }, + "analyses": { + "title": "Quantitative Analyses", + "text": "The model's performance was assessed using the Dice Coefficient, on an internal test set and ProstateX collection. The complete breakdown of the metrics can be consulted in the publication.", + "references": [ + { + "label": "Prostate158 - An expert-annotated 3T MRI dataset and algorithm for prostate cancer detection", + "uri": "https://doi.org/10.1016/j.compbiomed.2022.105817" + } + ] + }, + "evaluation": { + "title": "External Evaluation Data", + "text": "The evaluation datasets consist of 186 ProstateX samples and 32 prostate MRI Medical Decathlon dataset samples.", + "tables": [ + { + "label": "Medical Decathlon mean DSC for the segmentation of the central gland and peripheral zone", + "entries": { + "Central gland": "0.82", + "Peripheral zone": "0.64" + } + }, + { + "label": "ProstateX mean DSC for the segmentation of the central gland and peripheral zone", + "entries": { + "Central gland": "0.86", + "Peripheral zone": "0.71" + } + } + ], + "references": [{ + "label": "Medical Segmentation Decathlon", + "uri": "https://www.nature.com/articles/s41467-022-30695-9" + }, + { + "label": "Quality control and whole-gland, zonal and lesion annotations for the PROSTATEx challenge public dataset", + "uri": "https://www.sciencedirect.com/science/article/abs/pii/S0720048X21001273" + }] + }, + "training": { + "title": "Training Data", + "text": "The training dataset consists of 139 MRI cases containing the prostate, from the Prostate158 collection. The authors report the following characteristics for the T2 imaging sequeneces:", + "tables": [ + { + "label": "Prostate158 dataset (training)", + "entries": { + "Slice Thickness": "3 mm", + "In-Plane Resolution": "0.47 mm" + } + } + ], + "references": [ + { + "label": "Prostate158 dataset (Zenodo access)", + "uri": "https://zenodo.org/records/6481141" + } + ] + } + } +} diff --git a/models/monai_prostate158/utils/Prostate158Runner.py b/models/monai_prostate158/utils/Prostate158Runner.py new file mode 100644 index 00000000..c0b42a5e --- /dev/null +++ b/models/monai_prostate158/utils/Prostate158Runner.py @@ -0,0 +1,83 @@ +""" +------------------------------------------------- +MHub - MONAI Prostate158 Runner +------------------------------------------------- + +------------------------------------------------- +Author: Cosmin Ciausu +Email: cciausu97@gmail.com +------------------------------------------------- +""" +# TODO: support multi-i/o and batch processing on multiple instances + +from typing import List, Optional +import os, subprocess, shutil, glob, sys +import SimpleITK as sitk, numpy as np +from mhubio.core import Module, Instance, InstanceData, DataType, FileType, IO +from mhubio.modules.runner.ModelRunner import ModelRunner +import json + +@IO.Config('apply_center_crop', bool, True, the='flag to apply center cropping to input_image') +class Prostate158Runner(Module): + + apply_center_crop : bool + + @IO.Instance() + @IO.Input("in_data", 'nifti:mod=mr', the="input T2 sequence data to run prostate158 on") + @IO.Output('out_data', 'monai_prostate158.nii.gz', + 'nifti:mod=seg:model=MonaiProstate158:roi=PROSTATE_TRANSITION_ZONE,PROSTATE_PERIPHERAL_ZONE', + data='in_data', bundle='model', the="predicted segmentation model") + def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData) -> None: + + # bring input data in nnunet specific format + # NOTE: only for nifti data as we hardcode the nnunet-formatted-filename (and extension) for now. + assert in_data.type.ftype == FileType.NIFTI + assert in_data.abspath.endswith('.nii.gz') + datalist = [in_data.abspath] + + if self.apply_center_crop: + in_dir_cropped = self.config.data.requestTempDir(label="monai-crop-in") + in_data_processed = os.path.join(in_dir_cropped, "image_cropped.nii.gz") + self.subprocess([sys.executable, f"{os.path.join(os.environ['BUNDLE_ROOT'], 'scripts', 'center_crop.py')}", + "--file_name", in_data.abspath, "--out_name",in_data_processed], text=True) + datalist = [in_data_processed] + + # define output folder (temp dir) and also override environment variable for nnunet + out_dir = self.config.data.requestTempDir(label="monai-model-out") + + bash_command = [sys.executable, + "-m", "monai.bundle", "run", "evaluating"] + bash_command += ["--meta_file", os.path.join(os.environ['BUNDLE_ROOT'], "configs", "metadata.json")] + bash_command += ["--config_file", os.path.join(os.environ['BUNDLE_ROOT'], "configs", "inference.json")] + bash_command += ["--datalist", str(datalist)] + bash_command += ["--output_dir", out_dir] + bash_command += ["--bundle_root", os.environ['BUNDLE_ROOT']] + bash_command += ["--dataloader#num_workers", "0"] + print(bash_command) + self.subprocess(bash_command, text=True) + + # get output data + out_path = glob.glob(os.path.join(out_dir, "**", + "*.nii.gz"), recursive=True)[0] + + if self.apply_center_crop: + out_dir_padded = self.config.data.requestTempDir(label="monai-padded-out") + out_data_padded = os.path.join(out_dir_padded, "seg_padded.nii.gz") + paddedFilter = sitk.ConstantPadImageFilter() + seg_image = sitk.ReadImage(out_path) + t2_image = sitk.ReadImage(in_data.abspath) + out_seg_shape = sitk.GetArrayFromImage(seg_image).shape + t2_image_shape = sitk.GetArrayFromImage(t2_image).shape + x_bound_lower = int((t2_image_shape[2] - out_seg_shape[2])/2) + x_bound_upper = int(int(t2_image_shape[2] - out_seg_shape[2])/2 + ((t2_image_shape[2] - out_seg_shape[2]) % 2)) + y_bound_lower = int((t2_image_shape[1] - out_seg_shape[1])/2) + y_bound_upper = int(int(t2_image_shape[1] - out_seg_shape[1])/2 + ((t2_image_shape[1] - out_seg_shape[1]) % 2)) + paddedFilter.SetConstant(0) + paddedFilter.SetPadLowerBound([x_bound_lower, y_bound_lower, 0]) + paddedFilter.SetPadUpperBound([x_bound_upper, y_bound_upper, 0]) + padded_img = paddedFilter.Execute(seg_image) + sitk.WriteImage(padded_img, out_data_padded) + out_path = out_data_padded + + # copy output data to instance + shutil.copyfile(out_path, out_data.abspath) diff --git a/models/monai_prostate158/utils/__init__.py b/models/monai_prostate158/utils/__init__.py new file mode 100644 index 00000000..d03d6b1f --- /dev/null +++ b/models/monai_prostate158/utils/__init__.py @@ -0,0 +1 @@ +from .Prostate158Runner import * \ No newline at end of file diff --git a/models/nnunet_liver/meta.json b/models/nnunet_liver/meta.json index 27527748..34b9469f 100644 --- a/models/nnunet_liver/meta.json +++ b/models/nnunet_liver/meta.json @@ -23,8 +23,8 @@ } ], "model": { "architecture": "U-net", - "training": "Supervised", - "cmpapproach": "2D, 3D, ensemble" + "training": "supervised", + "cmpapproach": "ensemble" }, "data": { "training": { @@ -33,7 +33,7 @@ "evaluation": { "vol_samples": 70 }, - "public": "Yes", + "public": true, "external": false } }, diff --git a/models/nnunet_pancreas/meta.json b/models/nnunet_pancreas/meta.json index 3f8d3710..b95b10a7 100644 --- a/models/nnunet_pancreas/meta.json +++ b/models/nnunet_pancreas/meta.json @@ -23,8 +23,8 @@ } ], "model": { "architecture": "U-net", - "training": "Supervised", - "cmpapproach": "2D, 3D, ensemble" + "training": "supervised", + "cmpapproach": "ensemble" }, "data": { "training": { @@ -33,7 +33,7 @@ "evaluation": { "vol_samples": 139 }, - "public": "Yes", + "public": true, "external": false } }, diff --git a/models/nnunet_prostate_task24/config/default.yml b/models/nnunet_prostate_task24/config/default.yml new file mode 100644 index 00000000..bde6f648 --- /dev/null +++ b/models/nnunet_prostate_task24/config/default.yml @@ -0,0 +1,39 @@ +general: + data_base_dir: /app/data + version: 1.0 + description: default configuration for nnUNet MR Prostate segmentation (dicom to dicom) + +execute: +- DicomImporter +- NiftiConverter +- NNUnetRunner +- DsegConverter +- DataOrganizer + +modules: + DicomImporter: + source_dir: input_data + import_dir: sorted_data + sort_data: true + meta: + mod: '%Modality' + + NiftiConverter: + in_datas: dicom:mod=mr + engine: dcm2niix + + NNUnetRunner: + in_data: nifti:mod=mr + nnunet_task: Task024_Promise + nnunet_model: 3d_fullres + roi: PROSTATE + + DsegConverter: + source_segs: nifti:mod=seg + target_dicom: dicom:mod=mr + model_name: 'nnUNet MR Prostate' + skip_empty_slices: True + + DataOrganizer: + targets: + - dicomseg-->[i:sid]/nnunet_mr_prostate.seg.dcm diff --git a/models/nnunet_prostate_task24/dockerfiles/Dockerfile b/models/nnunet_prostate_task24/dockerfiles/Dockerfile new file mode 100644 index 00000000..5ad3ab81 --- /dev/null +++ b/models/nnunet_prostate_task24/dockerfiles/Dockerfile @@ -0,0 +1,32 @@ +FROM mhubai/base:latest + +# FIXME: set this environment variable as a shortcut to avoid nnunet crashing the build +# by pulling sklearn instead of scikit-learn +# N.B. this is a known issue: +# https://github.com/MIC-DKFZ/nnUNet/issues/1281 +# https://github.com/MIC-DKFZ/nnUNet/pull/1209 +ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True + +# Install nnunet and platipy +RUN pip3 install --no-cache-dir \ + nnunet + +# Clone the main branch of MHubAI/models +ARG MHUB_MODELS_REPO +RUN buildutils/import_mhub_model.sh nnunet_prostate_task24 ${MHUB_MODELS_REPO} + +# Pull weights into the container +ENV WEIGHTS_DIR=/root/.nnunet/nnUNet_models/nnUNet/ +RUN mkdir -p $WEIGHTS_DIR +ENV WEIGHTS_FN=Task024_Promise.zip +ENV WEIGHTS_URL=https://zenodo.org/records/4003545/files/$WEIGHTS_FN +RUN wget --directory-prefix ${WEIGHTS_DIR} ${WEIGHTS_URL} +RUN unzip ${WEIGHTS_DIR}${WEIGHTS_FN} -d ${WEIGHTS_DIR} +RUN rm ${WEIGHTS_DIR}${WEIGHTS_FN} + +# specify nnunet specific environment variables +ENV WEIGHTS_FOLDER=$WEIGHTS_DIR + +# Default run script +ENTRYPOINT ["mhub.run"] +CMD ["--config", "/app/models/nnunet_prostate_task24/config/default.yml"] diff --git a/models/nnunet_prostate_task24/meta.json b/models/nnunet_prostate_task24/meta.json new file mode 100644 index 00000000..c21f91b5 --- /dev/null +++ b/models/nnunet_prostate_task24/meta.json @@ -0,0 +1,141 @@ +{ + "id": "...", + "name": "nnunet_prostate_task24", + "title": "nnU-Net (Whole prostate segmentation)", + "summary": { + "description": "nnU-Net's whole prostate segmentation model is a single-modality (i.e. T2) input AI-based pipeline for the automated segmentation of the whole prostate on MRI scans.", + "inputs": [ + { + "label": "T2 input image", + "description": "The T2 axial-acquired sequence being the input image", + "format": "DICOM", + "modality": "MR", + "bodypartexamined": "Prostate", + "slicethickness": "3.6 mm", + "non-contrast": true, + "contrast": false + } + ], + "outputs": [ + { + "type": "Segmentation", + "classes": [ + "PROSTATE" + ] + } + ], + "model": { + "architecture": "U-net", + "training": "supervised", + "cmpapproach": "3D" + }, + "data": { + "training": { + "vol_samples": 50 + }, + "evaluation": { + "vol_samples": 30 + }, + "public": true, + "external": false + } + }, + "details": { + "name": "nnU-Net whole prostate segmentation model", + "version": "1.0.0", + "devteam": "MIC-DKFZ (Helmholtz Imaging Applied Computer Vision Lab)", + "type": "nnU-Net (U-Net structure, optimized by data-driven heuristics)", + "date": { + "weights": "2020", + "code": "2020", + "pub": "2020" + }, + "cite": "Isensee, F., Jaeger, P. F., Kohl, S. A., Petersen, J., & Maier-Hein, K. H. (2020). nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation. Nature Methods, 1-9.", + "license": { + "code": "Apache 2.0", + "weights": "CC BY-NC 4.0" + }, + "publications": [ + { + "title": "nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation", + "uri": "https://www.nature.com/articles/s41592-020-01008-z" + } + ], + "github": "https://github.com/MIC-DKFZ/nnUNet/tree/nnunetv1", + "zenodo": "https://zenodo.org/record/4485926" + }, + "info": { + "use": { + "title": "Intended Use", + "text": "This model is intended to perform prostate anatomy segmentation in MR T2 scans. The slice thickness of the training data is 2.2~4mm. Endorectal coil was present during training." + }, + "analyses": { + "title": "Quantitative Analyses", + "text": "The model's performance was assessed using the Dice Coefficient, in the context of the Promise12 challenge. A brief summary of the evaluation results on internal data can be found in the evaluation section. The complete breakdown of the metrics can be consulted on GrandChallenge [1] and is reported in the supplementary material to the publication [2].", + "references": [ + { + "label": "Evaluation of prostate segmentation algorithms for MRI: The PROMISE12 challenge", + "uri": "https://doi.org/10.1016/j.media.2013.12.002" + }, + { + "label": "nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation", + "uri": "https://www.nature.com/articles/s41592-020-01008-z" + } + ] + }, + "evaluation": { + "title": "Evaluation Data", + "text": "The evaluation dataset consists of 30 test samples coming from the Promise12 challenge.", + "tables": [ + { + "label": "Promise12 training set Average DSC using five fold cross-validation", + "entries": { + "2D": "0.8932", + "3d_fullres": "0.8891", + "Best ensemble (2D + 3D_fullres)": "0.9029", + "Postprocessed": "0.9030" + } + }, + { + "label": "Promise12 test set Average DSC", + "entries": { + "Test set average DSC": "0.9194" + } + } + ], + "references": [ + { + "label": "Evaluation of prostate segmentation algorithms for MRI: The PROMISE12 challenge", + "uri": "https://doi.org/10.1016/j.media.2013.12.002" + }, + { + "label": "PROMISE12 dataset (direct download)", + "uri": "https://zenodo.org/records/8026660" + } + ] + }, + "training": { + "title": "Training Data", + "text": "The training dataset consists of 50 MRI cases containing the prostate, from the Promise12 challenge. The authors report the following characteristics for the training dataset:", + "tables": [ + { + "label": "Medical Image Decathlon dataset (training)", + "entries": { + "Slice Thickness": "2.2~4 mm", + "In-Plane Resolution": "0.27 mm" + } + } + ], + "references": [ + { + "label": "Evaluation of prostate segmentation algorithms for MRI: The PROMISE12 challenge", + "uri": "https://doi.org/10.1016/j.media.2013.12.002" + }, + { + "label": "PROMISE12 dataset (direct download)", + "uri": "https://zenodo.org/records/8026660" + } + ] + } + } +} diff --git a/models/nnunet_prostate_zonal_task05/config/default.yml b/models/nnunet_prostate_zonal_task05/config/default.yml new file mode 100644 index 00000000..84aadbfa --- /dev/null +++ b/models/nnunet_prostate_zonal_task05/config/default.yml @@ -0,0 +1,40 @@ +general: + data_base_dir: /app/data + version: 1.0 + description: Prostate MR ADC-T2 segmentation (dicom2dicom) + +execute: +- FileStructureImporter +- NiftiConverter +- ProstateResampler +- ProstateRunner +- DsegConverter +- DataOrganizer + +modules: + FileStructureImporter: + outsource_instances: True + import_id: patientID/studyID + structures: + - $patientID/$studyID@instance/$part@bundle@dicom + - $patientID@instance:studyID=none/ADC$part@bundle@dicom + - $patientID@instance:studyID=none/T2$part@bundle@dicom + + NiftiConverter: + in_datas: dicom:part=ADC|T2 + allow_multi_input: true + overwrite_existing_file: true + + DsegConverter: + model_name: nnUNet Zonal Prostate (Task05) + target_dicom: dicom:part=T2 + source_segs: nifti:mod=seg:roi=* + body_part_examined: PROSTATE + skip_empty_slices: True + segment_id_meta_key: roi + + DataOrganizer: + targets: + - DICOMSEG:mod=seg-->[i:patientID]/[i:studyID]/nnunet_prostate_zonal_task05.seg.dcm +# - NIFTI:mod=seg-->[i:patientID]/[i:studyID]/results.nii.gz +# - LOG-->[i:patientID]/[i:studyID]/logs/[d:part]/[basename] \ No newline at end of file diff --git a/models/nnunet_prostate_zonal_task05/dockerfiles/Dockerfile b/models/nnunet_prostate_zonal_task05/dockerfiles/Dockerfile new file mode 100644 index 00000000..416f17b0 --- /dev/null +++ b/models/nnunet_prostate_zonal_task05/dockerfiles/Dockerfile @@ -0,0 +1,39 @@ +FROM mhubai/base:latest + +# Authors of the image +LABEL authors="lnuernberg@bwh.harvard.edu" + +# FIXME: set this environment variable as a shortcut to avoid nnunet crashing the build +# by pulling sklearn instead of scikit-learn +# N.B. this is a known issue: +# https://github.com/MIC-DKFZ/nnUNet/issues/1281 +# https://github.com/MIC-DKFZ/nnUNet/pull/1209 +ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True + +# isntall additional system dependencies +RUN apt update && apt install -y dcm2niix + +# install additional python dependencies +RUN pip3 install --no-cache-dir \ + nnunet \ + nibabel + +# pull weights for platipy's nnU-Net so that the user doesn't need to every time a container is run +ENV WEIGHTS_DIR="/root/.nnunet/nnUNet_models/nnUNet/" +ENV WEIGHTS_URL="https://www.dropbox.com/s/igpwt45v6hlquxp/Task005_Prostate.zip" +ENV WEIGHTS_FN="Task005_Prostate.zip" + +RUN wget --directory-prefix ${WEIGHTS_DIR} ${WEIGHTS_URL} +RUN unzip ${WEIGHTS_DIR}${WEIGHTS_FN} -d ${WEIGHTS_DIR} +RUN rm ${WEIGHTS_DIR}${WEIGHTS_FN} + +# Import the MHub model definiton +ARG MHUB_MODELS_REPO +RUN buildutils/import_mhub_model.sh nnunet_prostate_zonal_task05 ${MHUB_MODELS_REPO} + +# specify nnunet specific environment variables +ENV WEIGHTS_FOLDER=$WEIGHTS_DIR + +# Default run script +ENTRYPOINT ["mhub.run"] +CMD ["--config", "/app/models/nnunet_prostate_zonal_task05/config/default.yml"] \ No newline at end of file diff --git a/models/nnunet_prostate_zonal_task05/meta.json b/models/nnunet_prostate_zonal_task05/meta.json new file mode 100644 index 00000000..43eac000 --- /dev/null +++ b/models/nnunet_prostate_zonal_task05/meta.json @@ -0,0 +1,178 @@ +{ + "id": "f2eb536b-448a-4e9a-8981-3efc51301f62", + "name": "nnunet_prostate_zonal_task05", + "title": "nnU-Net (Prostate transitional zone and peripheral zone segmentation)", + "summary": { + "description": "nnU-Net's zonal prostate segmentation model is a multi-modality input AI-based pipeline for the automated segmentation of the peripheral and transition zone of the prostate on MRI scans.", + "inputs": [ + { + "label": "T2 input image", + "description": "The T2 axial sequence being one of the two input image", + "format": "DICOM", + "modality": "MR", + "bodypartexamined": "Prostate", + "slicethickness": "3.6 mm", + "non-contrast": true, + "contrast": false + }, + { + "label": "ADC Input Image", + "description": "The ADC axial sequence being one of the two input image", + "format": "DICOM", + "modality": "MR", + "bodypartexamined": "Prostate", + "slicethickness": "3.6 mm", + "non-contrast": true, + "contrast": false + } + ], + "outputs": [ + { + "type": "Segmentation", + "classes": [ + "PROSTATE_PERIPHERAL_ZONE", + "PROSTATE_TRANSITION_ZONE" + ] + } + ], + "model": { + "architecture": "U-net", + "training": "supervised", + "cmpapproach": "3D" + }, + "data": { + "training": { + "vol_samples": 32 + }, + "evaluation": { + "vol_samples": 16 + }, + "public": true, + "external": false + } + }, + "details": { + "name": "nnU-Net Zonal prostate regions Segmentation Model", + "version": "1.0.0", + "devteam": "MIC-DKFZ (Helmholtz Imaging Applied Computer Vision Lab)", + "type": "nnU-Net (U-Net structure, optimized by data-driven heuristics)", + "date": { + "weights": "2020", + "code": "2020", + "pub": "2020" + }, + "cite": "Isensee, F., Jaeger, P. F., Kohl, S. A., Petersen, J., & Maier-Hein, K. H. (2020). nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation. Nature Methods, 1-9.", + "license": { + "code": "Apache 2.0", + "weights": "CC BY-NC 4.0" + }, + "publications": [ + { + "title": "nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation", + "uri": "https://www.nature.com/articles/s41592-020-01008-z" + } + ], + "github": "https://github.com/MIC-DKFZ/nnUNet/tree/nnunetv1", + "zenodo": "https://zenodo.org/record/4485926" + }, + "info": { + "use": { + "title": "Intended Use", + "text": "This model is intended to perform prostate regions anatomy segmentation in MR ADC and T2 scans. The slice thickness of the training data is 3.6mm. Input ADC and T2 modalities are co-registered during training. No endorectal coil was present during training." + }, + "analyses": { + "title": "Quantitative Analyses", + "text": "The model's performance was assessed using the Dice Coefficient, in the context of the Medical Segmentation Decathlon challenge. The complete breakdown of the metrics can be consulted on GrandChallenge [1] and is reported in the supplementary material to the publication [2].", + "references": [ + { + "label": "Medical Segmentation Decathlon on GrandChallenge", + "uri": "https://decathlon-10.grand-challenge.org/evaluation/challenge/leaderboard" + }, + { + "label": "nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation", + "uri": "https://www.nature.com/articles/s41592-020-01008-z" + } + ] + }, + "evaluation": { + "title": "Evaluation Data", + "text": "The evaluation dataset consists of 16 validation samples coming from the Medical Decathlon collection.", + "tables": [{ + "label": "mean DSC peripheral zone results on internal training data, using five fold cross-validation", + "entries": { + "2D": "0.6285", + "3D_fullres": "0.6663", + "Best ensemble (2D + 3D_fullres)": "0.6611", + "Postprocessed": "0.6611" + } + }, + { + "label": "mean DSC transition zone results on internal training data, using five fold cross-validation", + "entries": { + "2D": "0.8380", + "3D_fullres": "0.8410", + "Best ensemble (2D + 3D_fullres)": "0.8575", + "Postprocessed": "0.8577" + } + }, + { + "label": "mean DSC prostate zonal regions results on internal test data", + "entries": { + "mean DSC for PZ": "0.77", + "mean DSC for TZ": "0.90" + } + }], + "references": [ + { + "label": "Medical Segmentation Decathlon", + "uri": "https://www.nature.com/articles/s41467-022-30695-9" + }, + { + "label": "Medical Decathlon Prostate dataset (direct download)", + "uri": "https://drive.google.com/drive/folders/1HqEgzS8BV2c7xYNrZdEAnrHk7osJJ--2" + } + ] + }, + "training": { + "title": "Training Data", + "text": "The training dataset consists of 32 MRI cases containing the prostate, from the Medical Segmentation Decathlon. The authors report the following characteristics for the portal venous phase CT scans of the training dataset:", + "tables": [ + { + "label": "Medical Image Decathlon dataset (training)", + "entries": { + "Slice Thickness": "3.6 mm", + "In-Plane Resolution": "0.62 mm" + } + } + ], + "references": [ + { + "label": "Medical Segmentation Decathlon", + "uri": "https://www.nature.com/articles/s41467-022-30695-9" + }, + { + "label": "Medical Decathlon Prostate dataset (direct download)", + "uri": "https://drive.google.com/drive/folders/1HqEgzS8BV2c7xYNrZdEAnrHk7osJJ--2" + } + ] + }, + "limitations":{ + "title": "Dealing with multi-modality input", + "text": "Authors recommend co-registration of ADC and T2 input sequences, as applied during training. At the very least, the ADC and T2 sequence need to have identical geometry for nnUNet to run. Since evaluated ADC and T2 sequences during evaluation might more often that not fail this requirement, we apply resampling of the ADC sequence to the T2 sequence, since T2 tends to have a higher resolution. Below are some references regarding nnUnet recommendations for multi-modality input, alongside the paper describing the registration process of Medical Image Decathlon dataset for the ADC and T2 sequences.", + "references": [ + { + "label": "Litjens et al., A pattern recognition approach to zonal segmentation of the prostate on MRI", + "uri": "https://pubmed.ncbi.nlm.nih.gov/23286075/" + }, + { + "label": "Alignment of multi channel inputs for nnunet #502", + "uri": "https://github.com/MIC-DKFZ/nnUNet/issues/502" + }, + { + "label": "Multi-modality dataset conversion issue #306", + "uri": "https://github.com/MIC-DKFZ/nnUNet/issues/306" + } + ] + } + } +} \ No newline at end of file diff --git a/models/nnunet_prostate_zonal_task05/utils/ProstateResampler.py b/models/nnunet_prostate_zonal_task05/utils/ProstateResampler.py new file mode 100644 index 00000000..35eba6a4 --- /dev/null +++ b/models/nnunet_prostate_zonal_task05/utils/ProstateResampler.py @@ -0,0 +1,36 @@ +import os +import pyplastimatch as pypla + +from mhubio.core import Module, Instance, DataType, InstanceData, FileType, IO + +# for specific use case, resample ADC to match T2 (T2 is his 'sesired_grid' property value) +# TODO: add reference to colab notebook? +class ProstateResampler(Module): + + @IO.Instance() + @IO.Input('in_data', 'nifti:part=ADC', the="ADC image") + @IO.Input('fixed_data', 'nifti:part=T2', the="T2 image") + @IO.Output('out_data', 'resampled.nii.gz', 'nifti:part=ADC:resampled_to=T2', data='in_data', the="ADC image resampled to T2") + def task(self, instance: Instance, in_data: InstanceData, fixed_data: InstanceData, out_data: InstanceData): + + # log data + log_data = InstanceData('_pypla.log', DataType(FileType.LOG, in_data.type.meta + { + "log-origin": "plastimatch", + "log-task": "resampling", + "log-caller": "Resampler", + "log-instance": str(instance) + }), data=in_data, auto_increment=True) + + # process + resample_args = { + 'input': in_data.abspath, + 'output': out_data.abspath, + 'fixed': fixed_data.abspath, + } + + # TODO add log file + pypla.resample( + verbose=self.config.verbose, + path_to_log_file=log_data.abspath, + **resample_args # type: ignore + ) \ No newline at end of file diff --git a/models/nnunet_prostate_zonal_task05/utils/ProstateRunner.py b/models/nnunet_prostate_zonal_task05/utils/ProstateRunner.py new file mode 100644 index 00000000..1feca9e7 --- /dev/null +++ b/models/nnunet_prostate_zonal_task05/utils/ProstateRunner.py @@ -0,0 +1,49 @@ +import os, shutil +from mhubio.core import Module, Instance, InstanceData, IO + +@IO.Config('use_tta', bool, False, the='flag to enable test time augmentation') +@IO.Config('nnunet_model', str, '3d_fullres', the='nnunet model name (2d, 3d_lowres, 3d_fullres, 3d_cascade_fullres)') +class ProstateRunner(Module): + + use_tta: bool + nnunet_model: str + + @IO.Instance() + @IO.Input('T2', 'nifti:part=T2', the="T2 image") + @IO.Input('ADC', 'nifti:part=ADC:resampled_to=T2', the="ADC image resampled to T2") + @IO.Output('P', 'VOLUME_001.nii.gz', 'nifti:mod=seg:model=nnunet_t005_prostate:roi=PROSTATE_PERIPHERAL_ZONE,PROSTATE_TRANSITION_ZONE', bundle='nnunet-out', the="Prostate segmentation") + def task(self, instance: Instance, T2: InstanceData, ADC: InstanceData, P: InstanceData) -> None: + + # copy input files to align with the nnunet input folder and file name format + # T2: 0000 + # ADC: 0001 + inp_dir = self.config.data.requestTempDir(label="nnunet-model-inp") + inp_file_T2 = f'VOLUME_001_0000.nii.gz' + inp_file_ADC = f'VOLUME_001_0001.nii.gz' + shutil.copyfile(T2.abspath, os.path.join(inp_dir, inp_file_T2)) + shutil.copyfile(ADC.abspath, os.path.join(inp_dir, inp_file_ADC)) + + # define output folder (temp dir) and also override environment variable for nnunet + assert P.bundle is not None, f"Output bundle is required: {str(P)}" + os.environ['RESULTS_FOLDER'] = P.bundle.abspath + + # symlink nnunet input folder to the input data with python + # create symlink in python + # NOTE: this is a workaround for the nnunet bash script that expects the input data to be in a specific folder + # structure. This is not the case for the mhub data structure. So we create a symlink to the input data + # in the nnunet input folder structure. + os.symlink(os.environ['WEIGHTS_FOLDER'], os.path.join(P.bundle.abspath, 'nnUNet')) + + # construct nnunet inference command + bash_command = ["nnUNet_predict"] + bash_command += ["--input_folder", str(inp_dir)] + bash_command += ["--output_folder", str(P.bundle.abspath)] + bash_command += ["--task_name", 'Task005_Prostate'] + bash_command += ["--model", self.nnunet_model] + + # optional / customizable arguments + if not self.use_tta: + bash_command += ["--disable_tta"] + + # run command + self.subprocess(bash_command, text=True) \ No newline at end of file diff --git a/models/nnunet_segthor/config/default.yml b/models/nnunet_segthor/config/default.yml new file mode 100644 index 00000000..239a395c --- /dev/null +++ b/models/nnunet_segthor/config/default.yml @@ -0,0 +1,33 @@ +general: + data_base_dir: /app/data + version: 1.0 + description: default configuration for NNUnet Thoracic Organs at Risk segmentation (dicom to dicom) + +execute: +- DicomImporter +- NiftiConverter +- NNUnetRunner +- DsegConverter +- DataOrganizer + +modules: + DicomImporter: + source_dir: input_data + import_dir: sorted_data + sort_data: true + meta: + mod: '%Modality' + + NNUnetRunner: + nnunet_task: Task055_SegTHOR + nnunet_model: 3d_lowres + roi: ESOPHAGUS,HEART,TRACHEA,AORTA + + DsegConverter: + source_segs: nifti:mod=seg + model_name: NNUnet Thoracic OAR + skip_empty_slices: True + + DataOrganizer: + targets: + - dicomseg-->[i:sid]/nnunet_segthor.seg.dcm \ No newline at end of file diff --git a/models/nnunet_segthor/dockerfiles/Dockerfile b/models/nnunet_segthor/dockerfiles/Dockerfile new file mode 100644 index 00000000..779da51c --- /dev/null +++ b/models/nnunet_segthor/dockerfiles/Dockerfile @@ -0,0 +1,32 @@ +FROM mhubai/base:latest + +# FIXME: set this environment variable as a shortcut to avoid nnunet crashing the build +# by pulling sklearn instead of scikit-learn +# N.B. this is a known issue: +# https://github.com/MIC-DKFZ/nnUNet/issues/1281 +# https://github.com/MIC-DKFZ/nnUNet/pull/1209 +ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True + +# Install nnunet and platipy +RUN pip3 install --no-cache-dir \ + nnunet==1.7.1 + +# pull weights for platipy's nnU-Net so that the user doesn't need to every time a container is run +ENV WEIGHTS_DIR="/root/.nnunet/nnUNet_models/nnUNet/" +ENV WEIGHTS_URL="https://zenodo.org/record/4485926/files/Task055_SegTHOR.zip" +ENV WEIGHTS_FN="Task055_SegTHOR.zip" + +RUN wget --directory-prefix ${WEIGHTS_DIR} ${WEIGHTS_URL} +RUN unzip ${WEIGHTS_DIR}${WEIGHTS_FN} -d ${WEIGHTS_DIR} +RUN rm ${WEIGHTS_DIR}${WEIGHTS_FN} + +# specify nnunet specific environment variables +ENV WEIGHTS_FOLDER=$WEIGHTS_DIR + +# Import the MHub model definiton +ARG MHUB_MODELS_REPO +RUN buildutils/import_mhub_model.sh nnunet_segthor ${MHUB_MODELS_REPO} + +# Default run script +ENTRYPOINT ["mhub.run"] +CMD ["--config", "/app/models/nnunet_segthor/config/default.yml"] \ No newline at end of file diff --git a/models/nnunet_segthor/meta.json b/models/nnunet_segthor/meta.json new file mode 100644 index 00000000..8e32b9cf --- /dev/null +++ b/models/nnunet_segthor/meta.json @@ -0,0 +1,123 @@ +{ + "id": "69754d0c-0521-4986-9763-c0df6594b6bf", + "name": "nnunet_segthor", + "title": "nnU-Net (thoracic OAR)", + "summary": { + "description": "nnU-Net's thoracic OAR segmentation model is an AI-based pipeline for the automated segmentation of the heart, the aorta, the esophagus and the trachea in CT scans (with and without contrast).", + "inputs": [ { + "label": "Input Image", + "description": "The CT scan of a patient.", + "format": "DICOM", + "modality": "CT", + "bodypartexamined": "Chest", + "slicethickness": "2.5 mm", + "non-contrast": true, + "contrast": true + } ], + "outputs": [ { + "type": "Segmentation", + "classes": [ + "ESOPHAGUS", + "HEART", + "TRACHEA", + "AORTA" + ] + } ], + "model": { + "architecture": "U-net", + "training": "supervised", + "cmpapproach": "ensemble" + }, + "data": { + "training": { + "vol_samples": 40 + }, + "evaluation": { + "vol_samples": 20 + }, + "public": true, + "external": false + } + }, + "details": { + "name": "nnU-Net Thoracic Organs at Risk Segmentation Model", + "version": "1.0.0", + "devteam": "MIC-DKFZ (Helmholtz Imaging Applied Computer Vision Lab)", + "type": "nnU-Net (U-Net structure, optimized by data-driven heuristics)", + "date": { + "weights": "01/02/22", + "code": "n/a", + "pub": "2020" + }, + "cite": "Isensee, F., Jaeger, P. F., Kohl, S. A., Petersen, J., & Maier-Hein, K. H. (2020). nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation. Nature Methods, 1-9.", + "license": { + "code": "Apache 2.0", + "weights": "CC BY-NC 4.0" + }, + "publications": [ + { + "title": "nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation", + "uri": "https://www.nature.com/articles/s41592-020-01008-z" + } + ], + "github": "https://github.com/MIC-DKFZ/nnUNet/tree/nnunetv1", + "zenodo": "https://zenodo.org/record/4485926" + }, + "info": { + "use": { + "title": "Intended Use", + "text": "This model is intended to perform esophagus, heart, trachea and aorta segmentation in contrast-enhanced (CE) and non-CE chest CT scans. The model has been trained and tested on patients with Non-Small Cell Lung Cancer (NSCLC) referred for curative-intent radiotherapy, so it might not be suited for a healthy population. The generalization capabilities of the model on a range of ages, genders, and ethnicities are unknown. The slice thickness should not exceed 2.5mm for best results." + }, + "analyses": { + "title": "Quantitative Analyses", + "text": "The model's performance was assessed using the Dice Coefficient and the (raw) Hausdorff Distance, in the context of the CodaLab SegTHOR challenge. The complete breakdown of the metrics can be consulted on CodaLab [1] and is reported in the supplementary material to the publication [2].", + "references": [ + { + "label": "SegTHOR Challenge on CodaLab", + "uri": "https://competitions.codalab.org/competitions/21145" + }, + { + "label": "nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation", + "uri": "https://www.nature.com/articles/s41592-020-01008-z" + } + ] + }, + "evaluation": { + "title": "Evaluation Data", + "text": "The evaluation dataset consists of 20 contrast-enhanced (CE) and non-CE chest CT scans from the SegTHOR dataset [1][2]. This dataset comprised Non-Small Cell Lung Cancer (NSCLC) patients referred for curative-intent radiotherapy (excluding patients with tumor extension distorting the mediastinum anatomy). Images were provided by the Centre Henri Becquerel, Rouen, France (CHB). On each CT scan, the OARs have been delineated by an experienced radiation oncologist using a Varian Medical Systems SomaVision platform. The body and lung contours were segmented with the automatic tools available on the platform. The esophagus was manually delineated from the 4th cervical vertebra to the esophago-gastric junction. The heart was delineated as recommended by the Radiation Therapy Oncology Group 2. The trachea was contoured from the lower limit of the larynx to 2cm below the carena excluding the lobar bronchi. The aorta was delineated from its origin above the heart down to below the diaphragm pillars.", + "references": [ + { + "label": "SegTHOR: Segmentation of Thoracic Organs at Risk in CT images", + "uri": "https://arxiv.org/abs/1912.05950" + }, + { + "label": "SegTHOR Challenge on CodaLab", + "uri": "https://competitions.codalab.org/competitions/21145" + } + ] + }, + "training": { + "title": "Training Data", + "text": "The evaluation dataset consists of 20 contrast-enhanced (CE) and non-CE chest CT scans from the SegTHOR dataset [1][2]. This dataset comprised Non-Small Cell Lung Cancer (NSCLC) patients referred for curative-intent radiotherapy (excluding patients with tumor extension distorting the mediastinum anatomy), provided by the Centre Henri Becquerel, Rouen, France (CHB). For details regarding the labels the model was trained with, see the section above. The authors reported the following reconstruction and acquisition parameters for the CT scans in the training datasets:", + "tables": [ + { + "label": "SegTHOR dataset", + "entries": { + "Slice Thickness": "2-3.7 mm", + "In-Plane Resolution": "0.9–1.37 mm" + } + } + ], + "references": [ + { + "label": "SegTHOR: Segmentation of Thoracic Organs at Risk in CT images", + "uri": "https://arxiv.org/abs/1912.05950" + }, + { + "label": "SegTHOR Challenge on CodaLab", + "uri": "https://competitions.codalab.org/competitions/21145" + } + ] + } + } + } \ No newline at end of file diff --git a/models/platipy/meta.json b/models/platipy/meta.json index fd6dee27..3d487a33 100644 --- a/models/platipy/meta.json +++ b/models/platipy/meta.json @@ -38,8 +38,8 @@ } ], "model": { "architecture": "U-net, Atlas", - "training": "Supervised", - "cmpapproach": "Hybrid" + "training": "supervised", + "cmpapproach": "3D" }, "data": { "training": { @@ -48,7 +48,7 @@ "evaluation": { "vol_samples": 30 }, - "public": "Yes", + "public": true, "external": true } }, diff --git a/models/totalsegmentator/meta.json b/models/totalsegmentator/meta.json index 6ce750bb..cebcfe01 100644 --- a/models/totalsegmentator/meta.json +++ b/models/totalsegmentator/meta.json @@ -125,8 +125,8 @@ } ], "model": { "architecture": "U-net", - "training": "Supervised", - "cmpapproach": "2D, 3D, ensemble" + "training": "supervised", + "cmpapproach": "ensemble" }, "data": { "training": { @@ -135,7 +135,7 @@ "evaluation": { "vol_samples": 65 }, - "public": "Yes", + "public": true, "external": false } },