From 2f8d60291aa8a07c69503ace3e452abe8b9f296a Mon Sep 17 00:00:00 2001 From: "A. Karimov" Date: Sun, 22 Dec 2024 20:59:41 +0300 Subject: [PATCH] Merging trk (#242) * added new files * add files & fixes * fix * activ sigmoid * infer for lungs * tests * models uploaded, code readability * hemor_contrast old recover 1 * hemor_contrast old recover 2 * revert old test changes * refactor rtk to new files * restore old coco files * restore old test file * rename experiments * lung description check * fix tests * fix tests * tests removed * float comparison in tests * remove selfwritten iou * get test back * black * an attempt to pass tests * an attempt to pass tests * test back * Fixed gdcm error * Fixed gdcm error * fix path bug * more tests * fixes and more tests * argparse tests * tests more * remove redund comments --------- Co-authored-by: Ainur Karimov Co-authored-by: InnopolisU --- .../lung_description_decision_datamodule.yaml | 12 +- .../segmentation_brain_complex.yaml | 17 ++ .../segmentation_brain_rtk.yaml | 17 ++ .../AK_081224_Yjc97FX_unet_brain_complex.yaml | 17 ++ .../AK_081224_gwVOeQ_unet_brain_rtk.yaml | 17 ++ .../lung_description_decision.yaml | 4 +- .../semantic-segmentation/unet_brain.yaml | 8 + examples/infer_contrasting_ct_dicom_brain.sh | 10 +- .../infer_ct_brain_hemorrhage_detection.sh | 14 +- .../infer_ct_brain_hemorrhage_segmentation.sh | 15 +- examples/infer_ct_mri_complexing.sh | 14 +- examples/infer_lung_description_decision.sh | 2 +- .../metrics_contrasting_ct_dicom_brain.sh | 7 + .../metrics_ct_brain_hemorrhage_detection.sh | 14 + ...etrics_ct_brain_hemorrhage_segmentation.sh | 14 + examples/metrics_ct_mri_complexing.sh | 13 + examples/metrics_lung_description_decision.sh | 14 + .../lightning_datamodules/coco_rtk.py | 171 +++++++++++++ .../lung_description_decision_datamodule.py | 21 +- innofw/core/datasets/coco_rtk.py | 201 +++++++++++++++ innofw/core/models/sklearn_adapter.py | 4 + .../torch/lightning_modules/segmentation.py | 8 +- .../CT_hemorrhage_contrast_metrics.py | 110 ++++++++ .../CT_hemorrhage_contrast_rtk.py | 106 ++++++++ .../data_utils/preprocessing/dicom_handler.py | 6 + .../data_utils/rtk/CT_complexing_metrics.py | 144 +++++++++++ .../data_utils/rtk/CT_hemorrhage_metrics.py | 225 +++++++++++++++++ .../rtk/lungs_description_metrics.py | 80 ++++++ .../lightning_datamodules/test_rtk.py | 239 ++++++++++++++++++ tests/unit/rtk/test_hemorrhage_contrast.py | 35 +++ tests/unit/rtk/test_lung_description_rtk.py | 22 ++ tests/unit/rtk/test_rtk_metrics.py | 71 ++++++ 32 files changed, 1630 insertions(+), 22 deletions(-) create mode 100644 config/datasets/semantic-segmentation/segmentation_brain_complex.yaml create mode 100644 config/datasets/semantic-segmentation/segmentation_brain_rtk.yaml create mode 100644 config/experiments/semantic-segmentation/AK_081224_Yjc97FX_unet_brain_complex.yaml create mode 100644 config/experiments/semantic-segmentation/AK_081224_gwVOeQ_unet_brain_rtk.yaml create mode 100644 config/models/semantic-segmentation/unet_brain.yaml mode change 100644 => 100755 examples/infer_contrasting_ct_dicom_brain.sh create mode 100644 examples/metrics_contrasting_ct_dicom_brain.sh create mode 100644 examples/metrics_ct_brain_hemorrhage_detection.sh create mode 100644 examples/metrics_ct_brain_hemorrhage_segmentation.sh create mode 100644 examples/metrics_ct_mri_complexing.sh create mode 100644 examples/metrics_lung_description_decision.sh create mode 100644 innofw/core/datamodules/lightning_datamodules/coco_rtk.py create mode 100644 innofw/core/datasets/coco_rtk.py create mode 100644 innofw/utils/data_utils/preprocessing/CT_hemorrhage_contrast_metrics.py create mode 100644 innofw/utils/data_utils/preprocessing/CT_hemorrhage_contrast_rtk.py create mode 100644 innofw/utils/data_utils/rtk/CT_complexing_metrics.py create mode 100644 innofw/utils/data_utils/rtk/CT_hemorrhage_metrics.py create mode 100644 innofw/utils/data_utils/rtk/lungs_description_metrics.py create mode 100644 tests/unit/datamodules/lightning_datamodules/test_rtk.py create mode 100644 tests/unit/rtk/test_hemorrhage_contrast.py create mode 100644 tests/unit/rtk/test_lung_description_rtk.py create mode 100644 tests/unit/rtk/test_rtk_metrics.py diff --git a/config/datasets/classification/lung_description_decision_datamodule.yaml b/config/datasets/classification/lung_description_decision_datamodule.yaml index 9be099ab..277342e7 100644 --- a/config/datasets/classification/lung_description_decision_datamodule.yaml +++ b/config/datasets/classification/lung_description_decision_datamodule.yaml @@ -8,13 +8,15 @@ markup_info: Набор данных содержит описание легк date_time: 13.09.2023 _target_: innofw.core.datamodules.pandas_datamodules.lung_description_decision_datamodule.LungDescriptionDecisionPandasDataModule -target_col: decision +target_col: "decision" train: - source: ./data/ainur/infer/labels.csv + source: https://api.blackhole.ai.innopolis.university/public-datasets/rtk/labels.zip + target: ./data/lung_description/train test: - source: ./data/ainur/infer/labels.csv + source: https://api.blackhole.ai.innopolis.university/public-datasets/rtk/labels.zip + target: ./data/lung_description/test infer: - source: ./data/ainur/infer/labels.csv - + source: https://api.blackhole.ai.innopolis.university/public-datasets/rtk/labels.zip + target: ./data/lung_description/infer diff --git a/config/datasets/semantic-segmentation/segmentation_brain_complex.yaml b/config/datasets/semantic-segmentation/segmentation_brain_complex.yaml new file mode 100644 index 00000000..71fa55b1 --- /dev/null +++ b/config/datasets/semantic-segmentation/segmentation_brain_complex.yaml @@ -0,0 +1,17 @@ +_target_: innofw.core.datamodules.lightning_datamodules.coco_rtk.DicomCocoComplexingDataModule +date_time: 30.09.2022 +description: stroke segmentation dataset +markup_info: segmentation masks +name: stroke segmentation +task: + - image-segmentation + +test: + source: https://api.blackhole.ai.innopolis.university/public-datasets/stroke/test.zip + target: ./data/stroke/test +train: + source: https://api.blackhole.ai.innopolis.university/public-datasets/stroke/train.zip + target: ./data/stroke/train +infer: + source: https://api.blackhole.ai.innopolis.university/public-datasets/rtk/complex_infer.zip + target: ./data/complex/infer \ No newline at end of file diff --git a/config/datasets/semantic-segmentation/segmentation_brain_rtk.yaml b/config/datasets/semantic-segmentation/segmentation_brain_rtk.yaml new file mode 100644 index 00000000..da632169 --- /dev/null +++ b/config/datasets/semantic-segmentation/segmentation_brain_rtk.yaml @@ -0,0 +1,17 @@ +_target_: innofw.core.datamodules.lightning_datamodules.coco_rtk.DicomCocoDataModuleRTK +date_time: 30.09.2022 +description: stroke segmentation dataset +markup_info: segmentation masks +name: stroke segmentation +task: + - image-segmentation + +test: + source: https://api.blackhole.ai.innopolis.university/public-datasets/stroke/test.zip + target: ./data/rtk/test +train: + source: https://api.blackhole.ai.innopolis.university/public-datasets/stroke/train.zip + target: ./data/rtk/train +infer: + source: https://api.blackhole.ai.innopolis.university/public-datasets/rtk/infer.zip + target: ./data/rtk/infer \ No newline at end of file diff --git a/config/experiments/semantic-segmentation/AK_081224_Yjc97FX_unet_brain_complex.yaml b/config/experiments/semantic-segmentation/AK_081224_Yjc97FX_unet_brain_complex.yaml new file mode 100644 index 00000000..aedff018 --- /dev/null +++ b/config/experiments/semantic-segmentation/AK_081224_Yjc97FX_unet_brain_complex.yaml @@ -0,0 +1,17 @@ +# @package _global_ +defaults: + - override /models: semantic-segmentation/unet_brain + - override /datasets: semantic-segmentation/segmentation_brain_complex.yaml + - override /augmentations_train: none #segmentation_stroke.yaml + - override /augmentations_val: none + - override /augmentations_test: none + - override /losses: segmentation_losses.yaml +models: + in_channels: 1 +project: "segmentation" +task: "image-segmentation" +random_seed: 42 +stop_param: 10 +epochs: 300 +weights_freq: 1 +batch_size: 10 diff --git a/config/experiments/semantic-segmentation/AK_081224_gwVOeQ_unet_brain_rtk.yaml b/config/experiments/semantic-segmentation/AK_081224_gwVOeQ_unet_brain_rtk.yaml new file mode 100644 index 00000000..78c2d6cf --- /dev/null +++ b/config/experiments/semantic-segmentation/AK_081224_gwVOeQ_unet_brain_rtk.yaml @@ -0,0 +1,17 @@ +# @package _global_ +defaults: + - override /models: semantic-segmentation/unet_brain + - override /datasets: semantic-segmentation/segmentation_brain_rtk.yaml + - override /augmentations_train: none #segmentation_stroke.yaml + - override /augmentations_val: none + - override /augmentations_test: none + - override /losses: segmentation_losses.yaml +models: + in_channels: 1 +project: "segmentation" +task: "image-segmentation" +random_seed: 42 +stop_param: 10 +epochs: 300 +weights_freq: 1 +batch_size: 10 diff --git a/config/models/classification/lung_description_decision.yaml b/config/models/classification/lung_description_decision.yaml index 00a113b6..a7c0e072 100644 --- a/config/models/classification/lung_description_decision.yaml +++ b/config/models/classification/lung_description_decision.yaml @@ -6,8 +6,8 @@ steps: - - "vectorizer" - _target_: sklearn.feature_extraction.text.TfidfVectorizer max_df: 1000 - stop_words: - - russian +# stop_words: +# - russian min_df: 10 - - "classifier" - _target_: sklearn.linear_model.LogisticRegression diff --git a/config/models/semantic-segmentation/unet_brain.yaml b/config/models/semantic-segmentation/unet_brain.yaml new file mode 100644 index 00000000..4ab24ddb --- /dev/null +++ b/config/models/semantic-segmentation/unet_brain.yaml @@ -0,0 +1,8 @@ +_target_: segmentation_models_pytorch.FPN +description: Base Unet segmentation model with 1 channel input +name: FPN +in_channels: 1 +classes: 4 +encoder_name: 'efficientnet-b7' +encoder_weights: +activation: sigmoid \ No newline at end of file diff --git a/examples/infer_contrasting_ct_dicom_brain.sh b/examples/infer_contrasting_ct_dicom_brain.sh old mode 100644 new mode 100755 index b654d37e..dc06de8e --- a/examples/infer_contrasting_ct_dicom_brain.sh +++ b/examples/infer_contrasting_ct_dicom_brain.sh @@ -1 +1,9 @@ -python3 ./innofw/utils/data_utils/preprocessing/CT_hemorrhage_contrast.py \ No newline at end of file +data_path=$1 +out_path=$2 + +if [ -z "$data_path" ]; then + data_path="https://api.blackhole.ai.innopolis.university/public-datasets/rtk/infer.zip" + echo "Using default data path $data_path" +fi + +python innofw/utils/data_utils/preprocessing/CT_hemorrhage_contrast_rtk.py --input "$data_path" --output "$out_path" \ No newline at end of file diff --git a/examples/infer_ct_brain_hemorrhage_detection.sh b/examples/infer_ct_brain_hemorrhage_detection.sh index a7804aec..b5b8c9c0 100755 --- a/examples/infer_ct_brain_hemorrhage_detection.sh +++ b/examples/infer_ct_brain_hemorrhage_detection.sh @@ -1 +1,13 @@ -python infer.py experiments=semantic-segmentation/SK_180822_qmciwj41_unet_brain 'ckpt_path=https://api.blackhole.ai.innopolis.university/pretrained/segmentation_unet_brain.pt' \ No newline at end of file +data_path=$1 +experiments="semantic-segmentation/AK_081224_gwVOeQ_unet_brain_rtk" +ckpt_path="https://api.blackhole.ai.innopolis.university/pretrained/segmentation_rtk_brain.pt" +if [ -z "$data_path" ] +then + python infer.py experiments=$experiments \ + "ckpt_path=$ckpt_path" +else + python infer.py experiments=$experiments \ + "ckpt_path=$ckpt_path" \ + "++datasets.infer.target='$data_path'" \ + "++datasets.infer.source='$data_path'" +fi \ No newline at end of file diff --git a/examples/infer_ct_brain_hemorrhage_segmentation.sh b/examples/infer_ct_brain_hemorrhage_segmentation.sh index a7804aec..e91208e3 100644 --- a/examples/infer_ct_brain_hemorrhage_segmentation.sh +++ b/examples/infer_ct_brain_hemorrhage_segmentation.sh @@ -1 +1,14 @@ -python infer.py experiments=semantic-segmentation/SK_180822_qmciwj41_unet_brain 'ckpt_path=https://api.blackhole.ai.innopolis.university/pretrained/segmentation_unet_brain.pt' \ No newline at end of file +data_path=$1 +experiments="semantic-segmentation/AK_081224_gwVOeQ_unet_brain_rtk" +ckpt_path="https://api.blackhole.ai.innopolis.university/pretrained/segmentation_rtk_brain.pt" + +if [ -z "$data_path" ] +then + python infer.py experiments=$experiments \ + "ckpt_path=$ckpt_path" +else + python infer.py experiments=$experiments \ + "ckpt_path=$ckpt_path" \ + "++datasets.infer.target='$data_path'" \ + "++datasets.infer.source='$data_path'" +fi \ No newline at end of file diff --git a/examples/infer_ct_mri_complexing.sh b/examples/infer_ct_mri_complexing.sh index b42f0411..a9c4f98d 100644 --- a/examples/infer_ct_mri_complexing.sh +++ b/examples/infer_ct_mri_complexing.sh @@ -1 +1,13 @@ -python infer.py experiments=semantic-segmentation/SK_100923_uner_brain_mri.yaml 'ckpt_path=https://api.blackhole.ai.innopolis.university/pretrained/segmentation_unet_brain_complex.pt' \ No newline at end of file +data_path=$1 +ckpt_path="https://api.blackhole.ai.innopolis.university/pretrained/segmentation_rtk_brain.pt" +experiments="semantic-segmentation/AK_081224_Yjc97FX_unet_brain_complex.yaml" + +if [ -z "$data_path" ] +then + python infer.py experiments=$experiments \ + "ckpt_path=$ckpt_path" +else + python infer.py experiments=$experiments \ + "++datasets.infer.source='$data_path'" \ + "ckpt_path=$ckpt_path" +fi diff --git a/examples/infer_lung_description_decision.sh b/examples/infer_lung_description_decision.sh index fcc491bd..bb39acc5 100644 --- a/examples/infer_lung_description_decision.sh +++ b/examples/infer_lung_description_decision.sh @@ -1,6 +1,6 @@ export HYDRA_FULL_ERROR=1 export experiments="classification/AK_130923_fbFMFDe1_lung_description_decision.yaml" -export model_weights="/home/ainur/Desktop/innopolis/text/pipe.pkl" +export model_weights="https://api.blackhole.ai.innopolis.university/pretrained/lungs_description.pkl" export data_source=$1 if [ -z "$data_source" ] then diff --git a/examples/metrics_contrasting_ct_dicom_brain.sh b/examples/metrics_contrasting_ct_dicom_brain.sh new file mode 100644 index 00000000..82c111ca --- /dev/null +++ b/examples/metrics_contrasting_ct_dicom_brain.sh @@ -0,0 +1,7 @@ +output=$1 +if [ -z "$output" ]; then + output="../innofw/logs/infer/contrast" + output+="/$(ls $output -tr | tail -n 1)" + output+="/$(ls $output -tr | tail -n 1)" +fi +python innofw/utils/data_utils/preprocessing/CT_hemorrhage_contrast_metrics.py -o "$output" diff --git a/examples/metrics_ct_brain_hemorrhage_detection.sh b/examples/metrics_ct_brain_hemorrhage_detection.sh new file mode 100644 index 00000000..6ca37244 --- /dev/null +++ b/examples/metrics_ct_brain_hemorrhage_detection.sh @@ -0,0 +1,14 @@ +data_path=$1 +out=$2 +if [ -z "$data_path" ]; then + data_path="../innofw/data/rtk/infer/" + echo "Using default data path $data_path" +fi + +if [ -z "$output" ]; then + output="../innofw/logs/infer/segmentation/semantic-segmentation/AK_081224_gwVOeQ_unet_brain_rtk/" + output+="$(ls $output | tail -n 1)" + echo "Using default output path $output" +fi + +python innofw/utils/data_utils/rtk/CT_hemorrhage_metrics.py -i "$data_path" -o "$output" -t "detection" diff --git a/examples/metrics_ct_brain_hemorrhage_segmentation.sh b/examples/metrics_ct_brain_hemorrhage_segmentation.sh new file mode 100644 index 00000000..a8a76009 --- /dev/null +++ b/examples/metrics_ct_brain_hemorrhage_segmentation.sh @@ -0,0 +1,14 @@ +data_path=$1 +output=$2 +if [ -z "$data_path" ]; then + data_path="../innofw/data/rtk/infer/" + echo "Using default data path $data_path" +fi + +if [ -z "$output" ]; then + output="../innofw/logs/infer/segmentation/semantic-segmentation/AK_081224_gwVOeQ_unet_brain_rtk/" + output+="$(ls $output -tr| tail -n 1)" + echo "Using default output path $output" +fi + +python innofw/utils/data_utils/rtk/CT_hemorrhage_metrics.py -i "$data_path" -o "$output" -t "segmentation" diff --git a/examples/metrics_ct_mri_complexing.sh b/examples/metrics_ct_mri_complexing.sh new file mode 100644 index 00000000..28371e79 --- /dev/null +++ b/examples/metrics_ct_mri_complexing.sh @@ -0,0 +1,13 @@ +data_path=$1 +output=$2 + +if [ -z "$data_path" ]; then + data_path="../innofw/data/complex/infer" + echo "Using default data path $data_path" +fi + +if [ -z "$output" ]; then + output="../innofw/logs/infer/segmentation/semantic-segmentation/AK_081224_Yjc97FX_unet_brain_complex.yaml/" + output+="$(ls $output -tr | tail -n 1)" +fi +python innofw/utils/data_utils/rtk/CT_complexing_metrics.py -i $data_path -o $output diff --git a/examples/metrics_lung_description_decision.sh b/examples/metrics_lung_description_decision.sh new file mode 100644 index 00000000..1b126f67 --- /dev/null +++ b/examples/metrics_lung_description_decision.sh @@ -0,0 +1,14 @@ +data_path=$1 +if [ -z "$data_path" ]; then + data_path="./data/lung_description/infer/labels.csv" + echo "Using default data path $data_path" +fi + +if [ -z "$output" ]; then + output="./logs/infer/lung_description_decision/classification/AK_130923_fbFMFDe1_lung_description_decision.yaml/" + output+="$(ls $output -tr| tail -n 1)/" + output+="$(ls $output -tr| tail -n 1)" + echo "Using default output path $output" +fi + +python innofw/utils/data_utils/rtk/lungs_description_metrics.py -i "$data_path" -o "$output" diff --git a/innofw/core/datamodules/lightning_datamodules/coco_rtk.py b/innofw/core/datamodules/lightning_datamodules/coco_rtk.py new file mode 100644 index 00000000..a5e240b9 --- /dev/null +++ b/innofw/core/datamodules/lightning_datamodules/coco_rtk.py @@ -0,0 +1,171 @@ +import os +import pathlib + +import albumentations as albu +import numpy as np +import torch +from albumentations.pytorch import ToTensorV2 + +from innofw.constants import Stages +from innofw.core.augmentations import Augmentation +from innofw.core.datamodules.lightning_datamodules.base import BaseLightningDataModule +from innofw.core.datasets.coco_rtk import DicomCocoDatasetRTK + + +class CustomNormalize: + def __call__(self, image, **kwargs): + image = (image - image.min()) / (image.max() - image.min() + 1e-8) + return image + + +DEFAULT_TRANSFORM = albu.Compose( + [ + albu.Resize(256, 256), + albu.Lambda(image=CustomNormalize()), + ToTensorV2(transpose_mask=True), + ] +) + + +class DicomCocoComplexingDataModule(BaseLightningDataModule): + task = ["image-detection", "image-segmentation"] + dataset = DicomCocoDatasetRTK + + def __init__( + self, + train=None, + test=None, + infer=None, + val_size: float = 0.2, + num_workers: int = 1, + augmentations=None, + stage=None, + batch_size=32, + transform=None, + val_split=0.2, + test_split=0.1, + *args, + **kwargs, + ): + super().__init__( + train, + test, + infer, + batch_size, + num_workers, + stage, + *args, + **kwargs, + ) + + def setup(self, stage=None): + pass + + def setup_train_test_val(self, **kwargs): + pass + + def setup_infer(self): + if self.aug: + transform = Augmentation(self.aug["test"]) + else: + transform = DEFAULT_TRANSFORM + if str(self.predict_source).split("/")[-1] in ["mrt", "ct"]: + self.predict_source = self.predict_source.parent + cont = os.listdir(self.predict_source) + assert "ct" in cont, f"No CT data in {self.predict_source}" + assert "mrt" in cont, f"No MRT data in {self.predict_source}" + + self.predict_dataset = [ + self.dataset( + data_dir=os.path.join(self.predict_source, "ct"), + transform=transform, + ), + self.dataset( + data_dir=os.path.join(self.predict_source, "mrt"), + transform=transform, + ), + ] + self.predict_dataset = torch.utils.data.ConcatDataset(self.predict_dataset) + + def train_dataloader(self): + return torch.utils.data.DataLoader( + self.train_dataset, + batch_size=self.batch_size, + shuffle=True, + num_workers=self.num_workers, + ) + + def val_dataloader(self): + return torch.utils.data.DataLoader( + self.val_dataset, + batch_size=self.batch_size, + shuffle=False, + num_workers=self.num_workers, + ) + + def test_dataloader(self): + return torch.utils.data.DataLoader( + self.test_dataset, + batch_size=self.batch_size, + shuffle=False, + num_workers=self.num_workers, + ) + + def predict_dataloader(self): + """shuffle should be turned off""" + return torch.utils.data.DataLoader( + self.predict_dataset, + batch_size=self.batch_size, + shuffle=False, + num_workers=self.num_workers, + ) + + def save_preds(self, preds, stage: Stages, dst_path: pathlib.Path): + """we assume that shuffle is turned off + + Args: + preds: + stage: + dst_path: + + Returns: + + """ + + total_iter = 0 + for tensor_batch in preds: + for i in range(tensor_batch.shape[0]): + path = self.predict_dataset[total_iter]["path"] + output = tensor_batch[i].cpu().detach().numpy() + output = np.max(output, axis=0) + output = np.expand_dims(output, axis=0) + output = np.transpose(output, (1, 2, 0)) + if "/ct/" in path: + prefix = "_ct" + else: + prefix = "_mrt" + path = os.path.join(dst_path, f"{prefix}_{total_iter}.npy") + np.save(path, output) + total_iter += 1 + + +class DicomCocoDataModuleRTK(DicomCocoComplexingDataModule): + def setup_infer(self): + if self.aug: + transform = Augmentation(self.aug["test"]) + else: + transform = DEFAULT_TRANSFORM + self.predict_dataset = self.dataset( + data_dir=str(self.predict_source), transform=transform + ) + + def save_preds(self, preds, stage: Stages, dst_path: pathlib.Path): + prefix = "mask" + for batch_idx, tensor_batch in enumerate(preds): + for i in range(tensor_batch.shape[0]): + output = tensor_batch[i].cpu().detach().numpy() + output = np.max(output, axis=0) + output = np.expand_dims(output, axis=0) + output = np.transpose(output, (1, 2, 0)) + path = os.path.join(dst_path, f"{prefix}_{batch_idx}_{i}.npy") + np.save(path, output) diff --git a/innofw/core/datamodules/pandas_datamodules/lung_description_decision_datamodule.py b/innofw/core/datamodules/pandas_datamodules/lung_description_decision_datamodule.py index 5e6230ca..137ae626 100644 --- a/innofw/core/datamodules/pandas_datamodules/lung_description_decision_datamodule.py +++ b/innofw/core/datamodules/pandas_datamodules/lung_description_decision_datamodule.py @@ -19,13 +19,22 @@ class LungDescriptionDecisionPandasDataModule(PandasDataModule): the given sklearn pipeline. "SR" stands for "Structured report" the field in DICOM """ + def _get_x_n_y(self, dataset, target_col): + if "y" in dataset: + return { + "x": dataset[TEXT_DATA_COLUMN_NAME], + "y": dataset["y"].str.strip(), + } + return { + "x": dataset[TEXT_DATA_COLUMN_NAME], + "y": None if target_col is None else dataset[target_col].str.strip(), + } + def save_preds(self, preds: np.ndarray, stage: Stages, dst_path: Path): """saving result as csv file - The main difference is that sklearn pipeline gets as input pd.Series data, while standard datamodule retrieves pd.DataFrame, thus this method merge input which is pd.Series and output (np.ndarray) and saves as csv file. - :param preds: result of sklearn pipeline :param stage: :param dst_path: @@ -35,12 +44,6 @@ def save_preds(self, preds: np.ndarray, stage: Stages, dst_path: Path): df["y"] = preds else: df[self.target_col] = preds - - if self.infer: - dst_path = os.path.dirname(self.infer) dst_filepath = Path(dst_path) / "prediction.csv" df.to_csv(dst_filepath) - logging.info(f"Saved results to: {dst_filepath}") - - def _get_x_n_y(self, dataset, target_col): - return {"x": dataset[TEXT_DATA_COLUMN_NAME], "y": None if target_col is None else dataset[target_col].str.strip()} + logging.info(f"Saved results to: {dst_filepath}") \ No newline at end of file diff --git a/innofw/core/datasets/coco_rtk.py b/innofw/core/datasets/coco_rtk.py new file mode 100644 index 00000000..7d092021 --- /dev/null +++ b/innofw/core/datasets/coco_rtk.py @@ -0,0 +1,201 @@ +from pckg_util import install_and_import + +install_and_import("pylibjpeg", "2.0.0", packageimportname="pylibjpeg") +install_and_import("python-gdcm", "3.0.24.1", packageimportname="gdcm") +from innofw.utils.data_utils.preprocessing.dicom_handler import dicom_to_raster + +from pathlib import Path +import json +import os + +import cv2 +import numpy as np +import pydicom +import torch +from torch.utils.data import Dataset + + +class DicomCocoDatasetRTK(Dataset): + def __init__(self, *args, **kwargs): + """ + Args: + data_dir (str): Путь к директории с DICOM файлами и COCO аннотациями. + transform (callable, optional): Трансформации, применяемые к изображениям и маскам. + """ + data_dir = kwargs["data_dir"] + data_dir = os.path.abspath(data_dir) + + assert os.path.isdir(data_dir), f"Invalid path {data_dir}" + + if data_dir.split("/")[-1] in ["annotations", "images"]: + data_dir = Path(data_dir).parent + + self.transform = kwargs.get("transform", None) + + # searching COCO annotation + self.dicom_paths = [] + + coco_path = None + for root, _, files in os.walk(data_dir): + + for file in files: + basename = os.path.basename(file) + filename, ext = os.path.splitext(basename) + if ext == ".json": + coco_path = os.path.join(data_dir, root, file) + elif ext in ["", ".dcm"]: + dicom_path = os.path.join(data_dir, root, file) + if pydicom.misc.is_dicom(dicom_path): + self.dicom_paths += [dicom_path] + if not coco_path: + print(f"COCO аннотации не найдены в директории {data_dir}.") + self.coco_found = False + else: + self.coco_found = True + + if not self.dicom_paths: + raise FileNotFoundError(f"Dicom не найдены в директории {data_dir}.") + + import re + + def extract_digits(s): + out = re.findall(r"\d+", s) + out = "".join(out) + return int(out) + + # COCO annotation load + if self.coco_found: + with open(coco_path, "r") as f: + self.coco = json.load(f) + self.categories = self.coco["categories"] + self.annotations = self.coco["annotations"] + self.num_classes = len(self.categories) + + self.images = self.coco["images"] + self.image_id_to_annotations = {image["id"]: [] for image in self.images} + for ann in self.annotations: + self.image_id_to_annotations[ann["image_id"]].append(ann) + + if len(self.images) != len(self.dicom_paths): + new_images = [] + for img in self.images: + for dicom_path in self.dicom_paths: + if dicom_path.endswith(img["file_name"]): + new_images += [img] + self.images = new_images + self.images.sort(key=lambda x: extract_digits(x["file_name"])) + else: + self.dicom_paths.sort() + + def __len__(self): + if self.coco_found: + return len(self.images) + else: + return len(self.dicom_paths) + + def get_dicom(self, i): + """no annotation data + + :param i: + :return: + """ + + dicom_path = self.dicom_paths[i] + dicom = pydicom.dcmread(dicom_path) + image = dicom_to_raster(dicom) + + if self.transform: + transformed = self.transform(image=image) + image = transformed["image"] + + if type(image) == torch.Tensor: + image = image.float() + + out = {"image": image, "path": dicom_path} + return out + + def __getitem__(self, idx): + """ + + Args: + idx: + + Returns: + A dictionary with keys + "image": image + "mask": mask + "path": dicom_path + "raw_image": dicom_image + + + """ + if not self.coco_found: + return self.get_dicom(idx) + image_info = self.images[idx] + for dicom_path in self.dicom_paths: + if dicom_path.endswith(image_info["file_name"]): + break + else: + print(self.dicom_paths, image_info["file_name"]) + raise FileNotFoundError(f"Dicom {dicom_path} не найден.") + dicom = pydicom.dcmread(dicom_path) + image = dicom_to_raster(dicom) + + anns = self.image_id_to_annotations[image_info["id"]] + mask = self.get_mask(anns, image_info) + + if self.transform: + transformed = self.transform(image=image, mask=mask) + image = transformed["image"] + mask = transformed["mask"] + + raw = dicom.pixel_array + + if type(image) == torch.Tensor: + image = image.float() + shape = image.shape[1:] + add_raw = False + else: + shape = image.shape[:2] + add_raw = True + + out = {"image": image, "mask": mask, "path": dicom_path} + + if add_raw: + if raw.shape[:2] != shape: + # no need to apply all transforms + raw = cv2.resize(raw, shape) + out["raw_image"] = raw + return out + + def get_mask(self, anns, image_info): + mask = np.zeros( + (image_info["height"], image_info["width"], self.num_classes), + dtype=np.uint8, + ) + for ann in anns: + segmentation = ann["segmentation"] + category_id = ann["category_id"] - 1 + if isinstance(segmentation, list): # полигональная аннотация + for polygon in segmentation: + poly_mask = self._polygon_to_mask( + polygon, image_info["height"], image_info["width"] + ) + mask[:, :, category_id][poly_mask > 0] = 1 + return mask + + @staticmethod + def _polygon_to_mask(polygon, height, width): + mask = np.zeros((height, width), dtype=np.uint8) + polygon = np.array(polygon).reshape(-1, 2) + mask = cv2.fillPoly(mask, [polygon.astype(int)], color=1) + return mask + + def setup_infer(self): + pass + + def infer_dataloader(self): + return self + + def predict_dataloader(self): + return self diff --git a/innofw/core/models/sklearn_adapter.py b/innofw/core/models/sklearn_adapter.py index c8b5d7d0..e678a584 100755 --- a/innofw/core/models/sklearn_adapter.py +++ b/innofw/core/models/sklearn_adapter.py @@ -72,6 +72,10 @@ def _predict(self, datamodule, ckpt_path=None, **kwargs): def _train(self, datamodule, **kwargs): data = datamodule.train_dataloader() x, y = data["x"], data["y"] + try: + self.model.steps = list( self.model.steps) + except: + pass self.model.fit(X=x, y=y) self.test(datamodule) diff --git a/innofw/core/models/torch/lightning_modules/segmentation.py b/innofw/core/models/torch/lightning_modules/segmentation.py index 1642cad3..b42a925d 100755 --- a/innofw/core/models/torch/lightning_modules/segmentation.py +++ b/innofw/core/models/torch/lightning_modules/segmentation.py @@ -180,7 +180,13 @@ def model_load_checkpoint(self, path): def predict_step(self, batch: Any, batch_indx: int, **kwargs) -> torch.Tensor: """Predict and output binary predictions""" if isinstance(batch, dict): - input_tensor = batch[SegDataKeys.image] + try: + input_tensor = batch[SegDataKeys.image] + except: + try: + input_tensor = batch[SegDataKeys.image.value] + except: + raise AttributeError else: input_tensor = batch diff --git a/innofw/utils/data_utils/preprocessing/CT_hemorrhage_contrast_metrics.py b/innofw/utils/data_utils/preprocessing/CT_hemorrhage_contrast_metrics.py new file mode 100644 index 00000000..bde166bd --- /dev/null +++ b/innofw/utils/data_utils/preprocessing/CT_hemorrhage_contrast_metrics.py @@ -0,0 +1,110 @@ +from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser +import os + +from skimage.metrics import peak_signal_noise_ratio, structural_similarity +import matplotlib.pyplot as plt +import cv2 +import numpy as np + + +def overlay_mask_on_image(image, mask, alpha=0.5): + """Overlaying a mask on image + + + Args: + image (np.array): input image (HxWxC). + mask (np.array): mask in (H, W, D) format, where D - class number. + alpha (float): transprency + + Returns: + np.array: image + """ + color = np.array([255, 0, 0]) + + if len(image.shape) == 2 or image.shape[2] == 1: + image = np.stack([image] * 3, axis=-1) + + colored_mask = np.any(mask > 0, axis=-1) + shape_to = image.shape[:2] + colored_mask = cv2.resize( + colored_mask.astype(np.uint8), shape_to, interpolation=cv2.INTER_NEAREST + ) + overlayed_image = image.copy() + overlayed_image[colored_mask.astype(bool)] = ( + overlayed_image[colored_mask.astype(bool)] * (1 - alpha) + alpha * color + ) + return overlayed_image + + +def calculate_metrics(raw, contrasted): + + # PSNR + psnr = peak_signal_noise_ratio(raw, contrasted) + + # SSIM + ssim = structural_similarity(raw, contrasted) + + return {"Peak Signal-to-Noise Ratio": psnr, "Structural Similarity Index": ssim} + + +def hemorrhage_contrast_metrics(input_path: str): + try: + input_path = str(input_path) + except TypeError: + raise Exception(f"wrong path {input_path}") + + assert os.path.exists(input_path), f"wrong path {input_path}" + files = os.listdir(input_path) + uniqes = [x.rsplit("_", 1)[0] for x in files] + + for f in uniqes: + mask = cv2.imread(os.path.join(input_path, f + "_mask.png"), 0) + raw_image = np.load(os.path.join(input_path, f + "_raw.npy")) + image = cv2.imread(os.path.join(input_path, f + "_image.png"), 0) + mask = np.expand_dims(mask, 2) + contrasted_image_with_mask = overlay_mask_on_image(image, mask) + + f, ax = plt.subplots(1, 2) + ax[0].imshow(raw_image, cmap="Greys_r") + ax[1].imshow(contrasted_image_with_mask) + + metrics = calculate_metrics(raw_image, image) + plt.suptitle("\n".join([f"{k}:{np.round(v, 2)}" for k, v in metrics.items()])) + plt.show() + plt.close("all") + + +def callback(arguments): + """Callback function for arguments""" + try: + hemorrhage_contrast_metrics(arguments.input) + except KeyboardInterrupt: + print("You exited") + + +def setup_parser(parser): + """The function to setup parser arguments""" + parser.add_argument( + "-i", + "--input", + "-o", + "--output", + required=True, + help="path to dataset to load, default path is %(default)s", + ) + + +def main(): + """Main module function""" + parser = ArgumentParser( + prog="hemorrhage_contrast", + description="A tool to contrast", + formatter_class=ArgumentDefaultsHelpFormatter, + ) + setup_parser(parser) + arguments = parser.parse_args() + callback(arguments) + + +if __name__ == "__main__": + main() diff --git a/innofw/utils/data_utils/preprocessing/CT_hemorrhage_contrast_rtk.py b/innofw/utils/data_utils/preprocessing/CT_hemorrhage_contrast_rtk.py new file mode 100644 index 00000000..cba418f9 --- /dev/null +++ b/innofw/utils/data_utils/preprocessing/CT_hemorrhage_contrast_rtk.py @@ -0,0 +1,106 @@ +from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser +from pathlib import Path +from urllib.parse import urlparse +import os + +from tqdm import tqdm +import cv2 +import numpy as np + + +from innofw.core.datamodules.lightning_datamodules.coco_rtk import ( + DicomCocoDataModuleRTK, +) +from innofw.utils.data_utils.rtk.CT_hemorrhage_metrics import transform + + +def hemorrhage_contrast(input_path: str, output_folder: str = None): + if output_folder is None or output_folder == "": + output_folder = default_output_path() + if type(output_folder) != str: + try: + output_folder = str(output_folder) + except TypeError: + raise ValueError(f"Wrong path to save: {output_folder}") + if urlparse(input_path): + default_path = "./data/rtk/infer/" + path = {"source": input_path, "target": default_path} + else: + path = {"source": input_path, "target": input_path} + dm = DicomCocoDataModuleRTK(infer=path, transform=transform) + dm.setup_infer() + dataloader = dm.predict_dataloader() + dataset = dataloader.dataset + dataset.transform = transform + + if len(dataset) == 0: + raise Warning(f"empty dataset with the directory {input_path}") + else: + for x in (pbar := tqdm(dataset)): + path = x["path"] + mask = x.get("mask", None) + contrasted_image = x["image"] + raw_image = x.get("raw_image", None) + + basename = Path(path).stem + os.makedirs(output_folder, exist_ok=True) + + output_path = os.path.join(output_folder, basename + "_raw.npy") + np.save(output_path, raw_image) + + output_path = os.path.join(output_folder, basename + "_mask.png") + cv2.imwrite(output_path, mask) + + output_path = os.path.join(output_folder, basename + "_image.png") + cv2.imwrite(output_path, contrasted_image) + + +def callback(arguments): + """Callback function for arguments""" + try: + hemorrhage_contrast(arguments.input, arguments.output) + except KeyboardInterrupt: + print("You exited") + + +def default_output_path() -> str: + from innofw.utils.getters import get_log_dir + from uuid import uuid4 + + log_root = os.path.join(os.getcwd(), "logs") + project = "contrast" + stage = "infer" + experiment_name = str(uuid4()).split("-")[0] + return str(get_log_dir(project, stage, experiment_name, log_root)) + + +def setup_parser(parser): + """The function to setup parser arguments""" + parser.add_argument( + "-i", + "--input", + required=True, + help="path to dataset to load, default path is %(default)s", + ) + parser.add_argument( + "-o", + "--output", + default=None, + help="path to dataset to save", + ) + + +def main(): + """Main module function""" + parser = ArgumentParser( + prog="hemorrhage_contrast", + description="A tool to contrast", + formatter_class=ArgumentDefaultsHelpFormatter, + ) + setup_parser(parser) + arguments = parser.parse_args() + callback(arguments) + + +if __name__ == "__main__": + main() diff --git a/innofw/utils/data_utils/preprocessing/dicom_handler.py b/innofw/utils/data_utils/preprocessing/dicom_handler.py index 8f782ad2..6b7ba70a 100644 --- a/innofw/utils/data_utils/preprocessing/dicom_handler.py +++ b/innofw/utils/data_utils/preprocessing/dicom_handler.py @@ -1,3 +1,8 @@ +from pckg_util import install_and_import + +install_and_import("pylibjpeg", "2.0.0", packageimportname="pylibjpeg") +install_and_import("python-gdcm", "3.0.24.1", packageimportname="gdcm") + import datetime import os.path @@ -14,6 +19,7 @@ from innofw.utils import get_project_root + def img_to_dicom( img: np.ndarray, origin_dicom: FilePath = None, diff --git a/innofw/utils/data_utils/rtk/CT_complexing_metrics.py b/innofw/utils/data_utils/rtk/CT_complexing_metrics.py new file mode 100644 index 00000000..5eb85a2b --- /dev/null +++ b/innofw/utils/data_utils/rtk/CT_complexing_metrics.py @@ -0,0 +1,144 @@ +from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser +from functools import partial +import os + +from matplotlib.widgets import Button +import matplotlib.pyplot as plt +import numpy as np + +from innofw.core.datasets.coco_rtk import DicomCocoDatasetRTK +from innofw.utils.data_utils.preprocessing.CT_hemorrhage_contrast_metrics import ( + overlay_mask_on_image, +) +from innofw.utils.data_utils.rtk.CT_hemorrhage_metrics import transform + +current_index_mrt = 0 +current_index_ct = 0 + + +def update_images(): + ax_left.imshow(list(left_images(current_index_mrt))[0]) + ax_right.imshow(list(right_images(current_index_ct))[0]) + fig.canvas.draw() + + +def next_image(event): + global current_index_mrt, left_max + current_index_mrt = (current_index_mrt + 1) % left_max + update_images() + + +def prev_image(event): + global current_index_mrt, left_max + current_index_mrt = (current_index_mrt - 1) % left_max + update_images() + + +def next_image_ct(event): + global current_index_ct, right_max + current_index_ct = (current_index_ct + 1) % right_max + update_images() + + +def prev_image_ct(event): + global current_index_ct, right_max + current_index_ct = (current_index_ct - 1) % right_max + update_images() + + +def show_result(): + global fig, ax_left, ax_right, left_images, right_images + fig, (ax_left, ax_right) = plt.subplots(1, 2) + ax_left.imshow(list(left_images(current_index_mrt))[0]) + ax_right.imshow(list(right_images(current_index_ct))[0]) + ax_left.axis("off") + ax_right.axis("off") + + ax_prev = plt.axes([0.3, 0.05, 0.1, 0.075]) + ax_prev_ct = plt.axes([0.6, 0.05, 0.1, 0.075]) + + ax_next = plt.axes([0.3, 0.15, 0.1, 0.075]) + ax_next_ct = plt.axes([0.6, 0.15, 0.1, 0.075]) + + btn_prev = Button(ax_prev, "Назад MRT") + btn_next = Button(ax_next, "Вперед MRT") + btn_prev.on_clicked(prev_image) + btn_next.on_clicked(next_image) + + btn_prev_ct = Button(ax_prev_ct, "Назад CT") + btn_next_ct = Button(ax_next_ct, "Вперед CT") + btn_prev_ct.on_clicked(prev_image_ct) + btn_next_ct.on_clicked(next_image_ct) + plt.show() + + +def show_complexing_metrics(input_path, out_path): + + outs = os.listdir(out_path) + outs.sort() + dataset_mrt = DicomCocoDatasetRTK( + data_dir=os.path.join(input_path, "mrt"), transform=transform + ) + out_mrt = [x for x in outs if "_mrt" in x] + + dataset_ct = DicomCocoDatasetRTK( + data_dir=os.path.join(input_path, "ct"), transform=transform + ) + out_ct = [x for x in outs if "_ct" in x] + + global left_images, right_images, left_max, right_max + left_max = len(out_mrt) + right_max = len(out_ct) + + left_images = partial(data_gen, ds=dataset_mrt, folder=out_path, outs=out_mrt) + right_images = partial(data_gen, ds=dataset_ct, folder=out_path, outs=out_ct) + + show_result() + + +def data_gen(i, ds, folder, outs): + x = ds[i] + pr_mask = np.load(os.path.join(folder, outs[i])) + image = x["image"] + with_mask = overlay_mask_on_image(image, pr_mask) + yield with_mask + + +def callback(arguments): + """Callback function for arguments""" + + try: + show_complexing_metrics(arguments.input, arguments.output) + except KeyboardInterrupt: + print("You exited") + + +def setup_parser(parser): + """The function to setup parser arguments""" + parser.add_argument( + "-i", + "--input", + help="path to dataset to load, default path is %(default)s", + ) + + parser.add_argument( + "-o", + "--output", + help="path to dataset to save", + ) + + +def main(): + """Main module function""" + parser = ArgumentParser( + prog="complexing", + description="A tool to contrast", + formatter_class=ArgumentDefaultsHelpFormatter, + ) + setup_parser(parser) + arguments = parser.parse_args() + callback(arguments) + + +if __name__ == "__main__": + main() diff --git a/innofw/utils/data_utils/rtk/CT_hemorrhage_metrics.py b/innofw/utils/data_utils/rtk/CT_hemorrhage_metrics.py new file mode 100644 index 00000000..4ad5aa8c --- /dev/null +++ b/innofw/utils/data_utils/rtk/CT_hemorrhage_metrics.py @@ -0,0 +1,225 @@ +from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser +from pathlib import Path +import os + +from matplotlib.patches import Patch +from torchmetrics.functional import jaccard_index as iou +from torchvision.ops.boxes import box_iou +from tqdm import tqdm +import albumentations as A +import cv2 +import matplotlib.pyplot as plt +import numpy as np +import torch + +from innofw.core.datasets.coco_rtk import DicomCocoDatasetRTK +from innofw.utils.data_utils.preprocessing.CT_hemorrhage_contrast_metrics import ( + overlay_mask_on_image, +) + + +transform = A.Compose([A.Resize(256, 256)]) + + +def compute_metrics(gt_boxes, pr_boxes, iou_threshold=0.5): + tp, fp, fn = 0, 0, 0 + iou_sum = 0 + matched_gt = set() + + for pr_box in pr_boxes: + best_iou = 0 + best_gt_idx = -1 + + for idx, gt_box in enumerate(gt_boxes): + + pr = torch.Tensor(pr_box).unsqueeze(0) + gt = torch.Tensor(gt_box).unsqueeze(0) + iou = box_iou(pr, gt) + iou = iou.cpu().numpy()[0][0] + if iou > best_iou: + best_iou = iou + best_gt_idx = idx + + if best_iou >= iou_threshold and best_gt_idx not in matched_gt: + tp += 1 + matched_gt.add(best_gt_idx) + iou_sum += best_iou + else: + fp += 1 + + fn = len(gt_boxes) - len(matched_gt) + + # Precision, Recall, IoU + precision = tp / (tp + fp) if tp + fp > 0 else 0 + recall = tp / (tp + fn) if tp + fn > 0 else 0 + mean_iou = iou_sum / tp if tp > 0 else 0 + + metrics = { + "precision": precision, + "recall": recall, + "mean_iou": mean_iou, + } + return metrics + + +def process_metrics(input_path, output_folder, task="detection"): + + dataset = DicomCocoDatasetRTK(data_dir=input_path, transform=transform) + outs = os.listdir(output_folder) + outs = [x for x in outs if x.endswith("npy")] + outs.sort() + + for x, out in (pbar := tqdm(zip(dataset, outs))): + image = x["image"] + gt_mask = x.get("mask", np.zeros(image.shape[:2])) + assert out.endswith(".npy") + pr_mask = np.load(os.path.join(output_folder, out)) + + gt = image.copy() + pr = image.copy() + if task == "segmentation": + gt = overlay_mask_on_image(gt, gt_mask) + pr = overlay_mask_on_image(pr, pr_mask) + iou_str = "Intersection over union" + + pr_t = torch.tensor(pr_mask)[:, :, 0] + gt_t = torch.tensor(gt_mask)[:, :, 0] + iou_score = iou( + pr_t, gt_t, num_classes=2, ignore_index=0, task="multiclass" + ) + iou_score = iou_score.cpu().numpy() + metrics = {iou_str: iou_score} + elif task == "detection": + gt, gt_boxes = result_bbox(gt_mask, gt) + pr, pr_boxes = result_bbox(pr_mask, pr) + metrics = compute_metrics(gt_boxes, pr_boxes, iou_threshold=0.2) + else: + raise NotImplementedError(f"no suck task {task}") + + f, ax = plt.subplots(1, 2) + ax[0].imshow(gt) + ax[0].title.set_text("Ground Truth") + ax[1].imshow(pr) + ax[1].title.set_text("Predicted") + + plt.suptitle("\n".join([f"{k}:{v:.2f}" for k, v in metrics.items()])) + + patch = Patch(facecolor="red", edgecolor="r", label="pathology") + f.legend(handles=[patch], loc="lower center") + plt.show() + plt.close("all") + + +def result_bbox(masks, image): + """ + + Args: + masks: + image: 2d array [H, W] + + Returns: + + """ + assert len(image.shape) == 2 + img = image.copy() + boxes = mask_to_bbox(masks) + img = np.stack([img] * 3, axis=2) + img = draw_bboxes(img, boxes) + return img, boxes + + +def mask_to_bbox(mask_image: np.ndarray): + """ + Преобразует маску в список bounding boxes для каждого класса. + + Parameters: + mask (torch.Tensor): Тензор маски размером [# classes, h, w]. + + Returns: + List[List[Tuple[int, int, int, int]]]: list of list with (x_min, y_min, x_max, y_max) for every instance of the class + """ + all_bboxes = [] + mask = mask_image.sum(axis=-1).astype(np.uint8) + assert mask.shape[0] == mask.shape[1] + # todo 256 to config + d = 256 / mask.shape[0] + + contours = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + contours = contours[0] if len(contours) == 2 else contours[1] + + for contour in contours: + x_min, y_min, w, h = cv2.boundingRect(contour) + x_max = x_min + w + y_max = y_min + h + bbox = (int(d * x_min), int(d * y_min), int(d * x_max), int(d * y_max)) + all_bboxes.append(bbox) + return all_bboxes + + +def draw_bboxes(image, bboxes): + """ + Отрисовывает bounding boxes на изображении. + + Parameters: + image (np.ndarray): Изображение, на котором нужно отрисовать bounding boxes. + bboxes (List[Tuple[int, int, int, int]]): Список координат bounding boxes [(x_min, y_min, x_max, y_max)] для каждого класса. + class_names (List[str]): Список имен классов. + colors (List[Tuple[int, int, int]]): Список цветов для каждого класса. + + Returns: + np.ndarray: Изображение с отрисованными bounding boxes. + """ + color = [255, 0, 0] + + for bbox in bboxes: + if not bbox: + break + x_min, y_min, x_max, y_max = bbox + cv2.rectangle(image, (x_min, y_min), (x_max, y_max), color, 2) + + return image + + +def callback(arguments): + """Callback function for arguments""" + try: + process_metrics(arguments.input, arguments.output, arguments.task) + except KeyboardInterrupt: + print("You exited") + + +def setup_parser(parser): + """The function to setup parser arguments""" + parser.add_argument( + "-i", + "--input", + help="path to dataset to load", + ) + + parser.add_argument( + "-o", + "--output", + help="path to dataset to save", + ) + + parser.add_argument( + "-t", + "--task", + help="segmentation or detection", + ) + + +def main(): + """Main module function""" + parser = ArgumentParser( + prog="hemorrhage_contrast", + description="A tool to contrast", + formatter_class=ArgumentDefaultsHelpFormatter, + ) + setup_parser(parser) + arguments = parser.parse_args() + callback(arguments) + + +if __name__ == "__main__": + main() diff --git a/innofw/utils/data_utils/rtk/lungs_description_metrics.py b/innofw/utils/data_utils/rtk/lungs_description_metrics.py new file mode 100644 index 00000000..441f8079 --- /dev/null +++ b/innofw/utils/data_utils/rtk/lungs_description_metrics.py @@ -0,0 +1,80 @@ +from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser + +from sklearn.metrics import classification_report, confusion_matrix +import pandas as pd + + +def calculate_lungs_metrics(gt_path, pred_path): + + y_col = "decision" + + y_pred = pd.read_csv(pred_path) + if y_col in y_pred: + y_pred = y_pred[y_col] + elif "y" in y_pred: + y_pred = y_pred["y"] + else: + raise ValueError("not proper csv") + + y_gt = pd.read_csv(gt_path) + if y_col in y_gt: + y_gt = y_gt[y_col] + elif "y" in y_gt: + y_gt = y_gt["y"] + else: + raise ValueError("not proper csv") + + y_gt = y_gt.str.strip() + + report = classification_report(y_gt, y_pred, zero_division=0) + + classes = ["Патология", "Норма", "необходимо дообследование"] + conf_matrix = confusion_matrix(y_pred=y_pred, y_true=y_gt, labels=classes) + + print("Отчет по метрикам") + print(report) + + print("\nМатрица ошибок:") + inds = ["Истинно " + x.lower() for x in classes] + cols = ["Предсказано " + x.lower() for x in classes] + print(pd.DataFrame(conf_matrix, index=inds, columns=cols)) + return True + + +def callback(arguments): + """Callback function for arguments""" + try: + calculate_lungs_metrics(arguments.input, arguments.output) + except KeyboardInterrupt: + print("You exited") + + +def setup_parser(parser): + """The function to setup parser arguments""" + parser.add_argument( + "-i", + "--input", + help="path to dataset to load", + ) + + parser.add_argument( + "-o", + "--output", + help="path to dataset to save", + ) + + +def main(): + """Main module function""" + parser = ArgumentParser( + prog="innofw", + description="Tool to calc lung decision metrcis", + formatter_class=ArgumentDefaultsHelpFormatter, + ) + setup_parser(parser) + arguments = parser.parse_args() + callback(arguments) + + +if __name__ == "__main__": + main() diff --git a/tests/unit/datamodules/lightning_datamodules/test_rtk.py b/tests/unit/datamodules/lightning_datamodules/test_rtk.py new file mode 100644 index 00000000..58a2f20e --- /dev/null +++ b/tests/unit/datamodules/lightning_datamodules/test_rtk.py @@ -0,0 +1,239 @@ +from unittest.mock import patch, MagicMock +from pathlib import Path +import os +import shutil + +import pytest +import numpy as np +import torch + + +from innofw.constants import Stages +from innofw.core.datamodules.lightning_datamodules.coco_rtk import ( + DEFAULT_TRANSFORM, + DicomCocoComplexingDataModule, + DicomCocoDataModuleRTK, + CustomNormalize, +) +from innofw.core.datamodules.pandas_datamodules.lung_description_decision_datamodule import ( + LungDescriptionDecisionPandasDataModule, +) +from innofw.core.datasets.coco_rtk import DicomCocoDatasetRTK +from innofw.utils.data_utils.preprocessing.CT_hemorrhage_contrast_rtk import ( + hemorrhage_contrast, + transform as resize_transform, +) +from innofw.utils.data_utils.preprocessing.CT_hemorrhage_contrast_metrics import ( + hemorrhage_contrast_metrics, +) +from innofw.utils.data_utils.rtk.CT_hemorrhage_metrics import process_metrics + +rtk_complex = "https://api.blackhole.ai.innopolis.university/public-datasets/rtk/complex_infer.zip" +rtk_segm = "https://api.blackhole.ai.innopolis.university/public-datasets/rtk/infer.zip" +lungs = "https://api.blackhole.ai.innopolis.university/public-datasets/rtk/labels.zip" + + +transforms = [None, DEFAULT_TRANSFORM] + + +@pytest.fixture() +def rtk_data(tmp_path_factory): + target_dir = tmp_path_factory.mktemp("infer") + if os.path.exists(target_dir): + shutil.rmtree(target_dir) + path = {"source": rtk_segm, "target": target_dir} + dm = DicomCocoDataModuleRTK(infer=path, transform=resize_transform) + dm.setup_infer() + return target_dir + + +@pytest.fixture +def rtk_downloader(rtk_data): + with patch( + "innofw.core.datamodules.lightning_datamodules.coco_rtk.DicomCocoDataModuleRTK._get_data" + ) as mock: + mock.return_value = [rtk_data, rtk_data] + yield mock + + +@pytest.fixture() +def complex_data(tmp_path_factory): + target_dir = tmp_path_factory.mktemp("infer") + if os.path.exists(target_dir): + shutil.rmtree(target_dir) + path = {"source": rtk_complex, "target": target_dir} + dm = DicomCocoDataModuleRTK(infer=path, transform=resize_transform) + dm.setup_infer() + return target_dir + + +@pytest.fixture +def complex_downloader(complex_data): + with patch( + "innofw.core.datamodules.lightning_datamodules.coco_rtk.DicomCocoComplexingDataModule._get_data" + ) as mock: + mock.return_value = [complex_data, complex_data] + yield mock + + +# transforms +def test_default_transform(): + image = np.random.randint(0, 255, size=(32, 32, 3), dtype=np.uint8) + mask = np.random.randint(0, 2, size=(32, 32), dtype=np.uint8) + + transformed = DEFAULT_TRANSFORM(image=image, mask=mask) + assert transformed["image"].shape == (3, 256, 256) + assert transformed["mask"].shape[-2:] == (256, 256) + assert isinstance(transformed["image"], torch.Tensor) + assert isinstance(transformed["mask"], torch.Tensor) + assert np.allclose(transformed["image"].min(), 0, atol=1e-6) + assert np.allclose(transformed["image"].max(), 1, atol=1e-6) + + +def test_normalize(): + image = np.array([[1, 2, 3], [4, 5, 6]]) + normalize = CustomNormalize() + normalized_image = normalize(image) + + assert np.allclose(normalized_image.min(), 0.0) + assert np.allclose(normalized_image.max(), 1.0) + + +# datamodules +@pytest.mark.parametrize("transform", transforms) +def test_DicomCocoComplexingDataModule_predict_dataloader( + transform, complex_data, complex_downloader +): + path = {"source": rtk_complex, "target": complex_data} + dm = DicomCocoComplexingDataModule(infer=path, transform=transform) + dm.setup_infer() + ds = dm.predict_dataloader() + for batch in ds: + break + for k in ["image", "mask", "path"]: + assert k in batch, f"no suck key {k}" + assert complex_downloader.call_count == 3 + + +@pytest.mark.parametrize("transform", transforms) +def test_DicomCocoDataModuleRTK_predict_dataloader(transform, rtk_data, rtk_downloader): + path = {"source": rtk_segm, "target": rtk_data} + dm = DicomCocoDataModuleRTK(infer=path, transform=transform) + dm.setup_infer() + ds = dm.predict_dataloader() + for batch in ds: + break + for k in ["image", "path"]: + assert k in batch, f"no suck key {k}" + assert rtk_downloader.call_count == 3 + + +@patch("numpy.save") +def test_ComplexDataModule_save_preds( + mock_save, tmp_path_factory, complex_data, complex_downloader +): + path = {"source": rtk_complex, "target": complex_data} + dm = DicomCocoComplexingDataModule(infer=path) + dm.setup_infer() + + batch_size = 2 + height, width = 32, 32 + batches = 2 + preds = torch.tensor(np.random.rand(batches, batch_size, 1, height, width)) + dst_path = tmp_path_factory.mktemp("output") + dm.save_preds(preds, Stages.predict, dst_path) + assert mock_save.call_count == batch_size * batches + for i in range(batch_size * batches): + args, kwargs = mock_save.call_args_list[i] + assert isinstance(args[1], np.ndarray) + assert args[1].shape == (height, width, 1) + + +@patch("numpy.save") +def test_RTKDataModule_save_preds(mock_save, rtk_data, rtk_downloader, tmp_path): + path = {"source": rtk_segm, "target": rtk_data} + dm = DicomCocoDataModuleRTK(infer=path, transform=resize_transform) + dm.setup_infer() + size = 32 + preds = torch.tensor(np.random.rand(2, 2, 1, size, size)) + dst_path = tmp_path + dm.save_preds(preds, Stages.predict, dst_path) + assert mock_save.call_count == preds.shape[0] * preds.shape[1] + args, kwargs = mock_save.call_args_list[0] + assert args[1].shape == (size, size, 1) + + +@pytest.mark.parametrize("transform", transforms) +def test_DicomCocoDataset_rtk(transform, rtk_data): + ds = DicomCocoDatasetRTK(data_dir=rtk_data, transform=transform) + for batch in ds: + break + for k in ["image", "mask", "path"]: + assert k in batch, f"no suck key {k}" + + +@pytest.mark.parametrize("transform", transforms) +def test_DicomCocoDataset_rtk_no_annotation(transform, tmp_path_factory): + """ + since we modify data we download it again + """ + target_dir = str(tmp_path_factory.mktemp("infer")) + path = {"source": str(rtk_complex), "target": target_dir} + dm = DicomCocoDataModuleRTK(infer=path, transform=transform) + dm.setup_infer() + + annotations_file = os.path.join(target_dir, "annotations", "instances_default.json") + if os.path.exists(annotations_file): + os.remove(annotations_file) + ds = DicomCocoDatasetRTK(data_dir=target_dir, transform=transform) + for batch in ds: + break + for k in ["image", "path"]: + assert k in batch, f"no suck key {k}" + + +def test_datamodule_description(): + target_dir = "./data/lung_description/infer" + if os.path.exists(target_dir): + shutil.rmtree(target_dir) + path = {"source": lungs, "target": target_dir} + dm = LungDescriptionDecisionPandasDataModule(infer=path) + dm.setup_infer() + ds = dm.predict_dataloader() + for key in "x", "y": + assert key in ds + + +@patch("matplotlib.pyplot.show") +def test_hemor_contrast(mock_show, rtk_data, tmp_path_factory): + out_ = str(tmp_path_factory.mktemp("out")) + hemorrhage_contrast(input_path=str(rtk_data), output_folder=out_) + content = os.listdir(out_) + assert len(content) > 0 + assert len(content) % 3 == 0 + assert np.any([x.endswith("npy") for x in content]) + assert np.any([x.endswith("png") for x in content]) + + hemorrhage_contrast_metrics(out_) + assert mock_show.call_count > 0 + + +@pytest.mark.parametrize("task", ["segmentation", "detection"]) +@patch("matplotlib.pyplot.show") +def test_segm_detection_pipeline_metrics( + mock_show, tmp_path_factory, task, rtk_data, rtk_downloader +): + path = {"source": rtk_segm, "target": rtk_data} + dm = DicomCocoDataModuleRTK(infer=path, transform=resize_transform) + dm.setup_infer() + ds = dm.predict_dataloader() + ds.transform = resize_transform + + samples_number = len(ds) + out_dir = tmp_path_factory.mktemp("out") + for i in range(samples_number): + random_numpy = np.random.randint(0, 1, [256, 256, 1]) + np.save(os.path.join(out_dir, f"{i}.npy"), random_numpy) + process_metrics(input_path=rtk_data, output_folder=out_dir) + assert mock_show.call_count > 0 + assert rtk_downloader.call_count > 0 diff --git a/tests/unit/rtk/test_hemorrhage_contrast.py b/tests/unit/rtk/test_hemorrhage_contrast.py new file mode 100644 index 00000000..13339b49 --- /dev/null +++ b/tests/unit/rtk/test_hemorrhage_contrast.py @@ -0,0 +1,35 @@ +from unittest.mock import patch +import argparse + +from innofw.utils.data_utils.preprocessing.CT_hemorrhage_contrast_rtk import ( + callback, + default_output_path, + setup_parser, +) + + +def test_setup_parser(): + parser = argparse.ArgumentParser() + setup_parser(parser) + args = parser.parse_args(["-i", "input.txt"]) + assert args.input == "input.txt" + assert args.output is None + + args = parser.parse_args(["-i", "input.txt", "-o", "output.txt"]) + assert args.input == "input.txt" + assert args.output == "output.txt" + + +def test_default_output_path(): + with patch("innofw.utils.getters.get_log_dir", return_value="mock_log_dir"): + output_path = default_output_path() + assert output_path == "mock_log_dir" + + +@patch( + "innofw.utils.data_utils.preprocessing.CT_hemorrhage_contrast_rtk.hemorrhage_contrast" +) +def test_callback_success(mock_contrast): + arguments = argparse.Namespace(input="input.txt", output="output.txt") + callback(arguments) + mock_contrast.assert_called_once_with("input.txt", "output.txt") diff --git a/tests/unit/rtk/test_lung_description_rtk.py b/tests/unit/rtk/test_lung_description_rtk.py new file mode 100644 index 00000000..68c05a1f --- /dev/null +++ b/tests/unit/rtk/test_lung_description_rtk.py @@ -0,0 +1,22 @@ +import argparse +from unittest.mock import patch + +from innofw.utils.data_utils.rtk.lungs_description_metrics import ( + callback, + setup_parser, +) + + +def test_setup_parser(): + parser = argparse.ArgumentParser() + setup_parser(parser) + args = parser.parse_args(["-i", "input.txt", "-o", "output.txt"]) + assert args.input == "input.txt" + assert args.output == "output.txt" + + +@patch("innofw.utils.data_utils.rtk.lungs_description_metrics.calculate_lungs_metrics") +def test_callback_success(mock_calc): + arguments = argparse.Namespace(input="input.txt", output="output.txt") + callback(arguments) + mock_calc.assert_called_once_with("input.txt", "output.txt") diff --git a/tests/unit/rtk/test_rtk_metrics.py b/tests/unit/rtk/test_rtk_metrics.py new file mode 100644 index 00000000..b872232e --- /dev/null +++ b/tests/unit/rtk/test_rtk_metrics.py @@ -0,0 +1,71 @@ +import os + +import numpy as np +import pandas as pd +import pytest + +from innofw.utils.data_utils.rtk.CT_hemorrhage_metrics import compute_metrics +from innofw.utils.data_utils.rtk.lungs_description_metrics import ( + calculate_lungs_metrics, +) + + +@pytest.fixture +def generate_test_data(tmpdir): + # data generation for y_pred и y_gt + pred_data = pd.DataFrame( + {"y": ["Патология", "Норма", "Норма", "Патология", "необходимо дообследование"]} + ) + + gt_data = pd.DataFrame( + { + "decision": [ + "Патология ", + "Норма", + "необходимо дообследование", + "Патология", + "Норма", + ] + } + ) + + pred_path = os.path.join(tmpdir, "predictions.csv") + pred_data.to_csv(pred_path, index=False) + + gt_path = os.path.join(tmpdir, "ground_truth.csv") + gt_data.to_csv(gt_path, index=False) + + return pred_path, gt_path + + +def test_lungs_metrics(generate_test_data, capsys): + pred_path, gt_path = generate_test_data + + calculate_lungs_metrics(pred_path, gt_path) + + captured = capsys.readouterr() + + assert "Отчет по метрикам" in captured.out + assert "Матрица ошибок" in captured.out + assert "Патология" in captured.out + assert "Норма" in captured.out + assert "необходимо дообследование" in captured.out + + +def test_compute_metrics(): + gt_boxes = [[0, 0, 2, 2], [2, 2, 4, 4]] + # pr_boxes = [[1, 1, 3, 3], [3, 3, 5, 5]] + + pr_boxes_same = [[0, 0, 2, 2], [2, 2, 4, 4]] + metrics = compute_metrics(gt_boxes, pr_boxes_same, iou_threshold=0.5) + + assert metrics["precision"] == pytest.approx(1.0) + assert metrics["recall"] == pytest.approx(1.0) + assert metrics["mean_iou"] == pytest.approx(1.0) + + pr_boxes_empty = [] + metrics = compute_metrics(gt_boxes, pr_boxes_empty, iou_threshold=0.5) + + assert metrics["precision"] == 0 + assert metrics["recall"] == 0 + assert metrics["mean_iou"] == 0