From 8739639bd9d684b6cdd9b1f62d0246535d4927cb Mon Sep 17 00:00:00 2001
From: plbenveniste
 <plbenveniste@Sansfil-Securise-Employes-Poly-226-246.polymtl.ca>
Date: Thu, 22 Jun 2023 15:24:31 -0400
Subject: [PATCH 01/26] Added folder for nnunet experiment

---
 nnunet/convert_bids_to_nnunet.py | 199 +++++++++++++++++++++++++++++++
 1 file changed, 199 insertions(+)
 create mode 100644 nnunet/convert_bids_to_nnunet.py

diff --git a/nnunet/convert_bids_to_nnunet.py b/nnunet/convert_bids_to_nnunet.py
new file mode 100644
index 0000000..71c5536
--- /dev/null
+++ b/nnunet/convert_bids_to_nnunet.py
@@ -0,0 +1,199 @@
+# This scripts was adapted from a script by the following authors : Julian McGinnis, Naga Karthik 
+#
+# This script is specifically designed to CanProCo dataset. It converts the dataset from a BIDS format to a nnU-Net format. 
+# 
+import argparse
+import pathlib
+from pathlib import Path
+import json
+import os
+import shutil
+from collections import OrderedDict
+from tqdm import tqdm
+
+import nibabel as nib
+import numpy as np
+
+def label_encoding(subject_path, label_path, label_gm_path, label_wm_path, threshold=1e-12):
+    #Encoding of the label images with background=0 , GM=1 and WM=2
+    #We consider that GM and WM don't overlap (maybe need to change that ?)
+
+    #For grey matter
+    label_gm_npy = nib.load(label_gm_path).get_fdata() 
+    label_gm_npy = np.where(label_gm_npy > threshold, 1, 0)
+    #For white matter
+    label_wm_npy = nib.load(label_wm_path).get_fdata() 
+    label_wm_npy = np.where(label_wm_npy > threshold, 2, 0)
+    
+    #Addition of both
+    label_npy = label_gm_npy + label_wm_npy
+    
+    ref = nib.load(subject_path)
+    label_bin = nib.Nifti1Image(label_npy, ref.affine, ref.header)
+    # overwrite the original label file with the binarized version
+    nib.save(label_bin, label_path)
+
+
+# parse command line arguments
+parser = argparse.ArgumentParser(description='Convert BIDS-structured database to nnUNet format.')
+parser.add_argument('--path-data', required=True,
+                    help='Path to BIDS structured dataset. Accepts both cross-sectional and longitudinal datasets')
+parser.add_argument('--path-out', help='Path to output directory.', required=True)
+parser.add_argument('--taskname', default='MSSpineLesion', type=str,
+                    help='Specify the task name - usually the anatomy to be segmented, e.g. Hippocampus',)
+parser.add_argument('--tasknumber', default=501,type=int, 
+                    help='Specify the task number, has to be greater than 500 but less than 999. e.g 502')
+parser.add_argument('--label-folder', help='Path to the label folders in derivatives', default='labels', type=str)
+#parser.add_argument('--mri-protocols', help='protocols to select', default='STIR,PSIR', type=str)
+
+args = parser.parse_args()
+
+path_in_images = Path(args.path_data)
+label_folder = args.label_folder
+path_in_labels = Path(os.path.join(args.path_data, 'derivatives', label_folder))
+path_out = Path(os.path.join(os.path.abspath(args.path_out), f'Dataset{args.tasknumber}_{args.taskname}'))
+#mri_protocols = args.mri_protocols.split(',')
+
+# define paths for train and test folders 
+path_out_imagesTr = Path(os.path.join(path_out, 'imagesTr'))
+path_out_imagesTs = Path(os.path.join(path_out, 'imagesTs'))
+path_out_labelsTr = Path(os.path.join(path_out, 'labelsTr'))
+path_out_labelsTs = Path(os.path.join(path_out, 'labelsTs'))
+
+# we load both train and validation set into the train images as nnunet uses cross-fold-validation
+train_images, train_labels = [], []
+test_images, test_labels = [], []
+
+if __name__ == '__main__':
+
+    # make the directories
+    pathlib.Path(path_out).mkdir(parents=True, exist_ok=True)
+    pathlib.Path(path_out_imagesTr).mkdir(parents=True, exist_ok=True)
+    pathlib.Path(path_out_imagesTs).mkdir(parents=True, exist_ok=True)
+    pathlib.Path(path_out_labelsTr).mkdir(parents=True, exist_ok=True)
+    pathlib.Path(path_out_labelsTs).mkdir(parents=True, exist_ok=True)
+
+    conversion_dict = {}
+    
+
+    #------------- EXTRACTION OF THE LABELED IMAGES NAMES--------------------------
+    labelled_imgs = []
+    
+    # We first extract all the label files' names
+    
+    #label_dirs = sorted(list(path_in_labels.glob('*/')))
+    
+    label_files = sorted(list(path_in_labels.rglob('*_STIR_lesion-manual.nii.gz')) + list(path_in_labels.rglob('*PSIR_lesion-manual.nii.gz') ))
+    labelled_imgs += [str(k) for k in label_files]
+    
+    #--------------- DISPACTH OF LABELLED IMAGES AND UNLABELED IMAGES ------------------- 
+    
+    #Initialise the number of scans in train and in test folder
+    scan_cnt_train, scan_cnt_test = 0, 0
+
+    valid_train_imgs = []
+    valid_test_imgs = []
+
+    #The image folders
+    #dirs = sorted(list(path_in_images.glob('*/')))
+    #dirs = [str(x) for x in dirs]
+
+    image_files = sorted(list(path_in_images.rglob('*_STIR.nii.gz')) + list(path_in_images.rglob('*_PSIR.nii.gz')))
+    
+    for image_file in image_files:
+
+        #Identify common data path
+        common = os.path.relpath(os.path.dirname(image_file), args.path_data)
+
+        file_id = str(image_file).rsplit('_',1)[0].split('/')[-1] + "_"
+        
+        if (file_id in str(labelled_imgs)):
+            label_file = [k for k in labelled_imgs if file_id in k][0]
+
+            scan_cnt_train+= 1
+
+            image_file_nnunet = os.path.join(path_out_imagesTr,f'{args.taskname}_{scan_cnt_train:03d}_0000.nii.gz')
+            label_file_nnunet = os.path.join(path_out_labelsTr,f'{args.taskname}_{scan_cnt_train:03d}.nii.gz')
+            
+            train_images.append(str(image_file_nnunet))
+            train_labels.append(str(label_file_nnunet))
+            
+            # copy the files to new structure
+            shutil.copyfile(image_file, image_file_nnunet)
+            shutil.copyfile(label_file, label_file_nnunet)
+           
+            conversion_dict[str(os.path.abspath(image_file))] = image_file_nnunet
+            conversion_dict[str(os.path.abspath(label_file))] = label_file_nnunet
+        else:
+                            
+            # Repeat the above procedure for testing
+            scan_cnt_test += 1
+            # create the new convention names
+            image_file_nnunet = os.path.join(path_out_imagesTs,f'{args.taskname}_{scan_cnt_test:03d}_0000.nii.gz')
+            label_file_nnunet = os.path.join(path_out_labelsTs,f'{args.taskname}_{scan_cnt_test:03d}.nii.gz')
+
+            test_images.append(str(image_file_nnunet))
+            test_labels.append(str(label_file_nnunet))
+
+            # copy the files to new structure
+            shutil.copyfile(image_file, image_file_nnunet)
+
+            conversion_dict[str(os.path.abspath(image_file))] = image_file_nnunet
+
+    #Display of number of training and number of testing images
+    print("Number of images for training: " + str(scan_cnt_train))
+    print("Number of images for testing: " + str(scan_cnt_test))
+
+    #----------------- CREATION OF THE DICTIONNARY-----------------------------------
+    # create dataset_description.json
+    json_object = json.dumps(conversion_dict, indent=4)
+    # write to dataset description
+    conversion_dict_name = f"conversion_dict.json"
+    with open(os.path.join(path_out, conversion_dict_name), "w") as outfile:
+        outfile.write(json_object)
+
+
+    # c.f. dataset json generation. This contains the metadata for the dataset that nnUNet uses during preprocessing and training
+    # general info : https://github.com/MIC-DKFZ/nnUNet/blob/master/nnunet/dataset_conversion/utils.py
+    # example: https://github.com/MIC-DKFZ/nnUNet/blob/master/nnunet/dataset_conversion/Task055_SegTHOR.py
+
+    json_dict = OrderedDict()
+    json_dict['name'] = args.taskname
+    json_dict['description'] = args.taskname
+    json_dict['tensorImageSize'] = "3D"
+    json_dict['reference'] = "TBD"
+    json_dict['licence'] = "TBD"
+    json_dict['release'] = "0.0"
+
+    
+    # Because only using one modality  
+    ## was changed from 'modality' to 'channel_names'
+    json_dict['channel_names'] = {
+            "0": "T1w",
+        }
+    
+    # 0 is always the background. Any class labels should start from 1.
+    json_dict['labels'] = {
+        "background" : "0",
+        "MS Lesion" : "1" ,
+    }
+   
+    json_dict['numTraining'] = scan_cnt_train
+    json_dict['numTest'] = scan_cnt_test
+    #Newly required field in the json file with v2
+    json_dict["file_ending"] = ".nii.gz"
+
+    json_dict['training'] = [{'image': str(train_labels[i]).replace("labelsTr", "imagesTr") , "label": train_labels[i] }
+                                 for i in range(len(train_images))]
+    # Note: See https://github.com/MIC-DKFZ/nnUNet/issues/407 for how this should be described
+
+    #Removed because useless in this case
+    json_dict['test'] = [str(test_labels[i]).replace("labelsTs", "imagesTs") for i in range(len(test_images))]
+
+    # create dataset_description.json
+    json_object = json.dumps(json_dict, indent=4)
+    # write to dataset description
+    # nn-unet requires it to be "dataset.json"
+    dataset_dict_name = f"dataset.json"
+    with open(os.path.join(path_out, dataset_dict_name), "w") as outfile:
+        outfile.write(json_object)
\ No newline at end of file

From c495a267ecae0e19f57fa6a6699b5757c8d8b291 Mon Sep 17 00:00:00 2001
From: plbenveniste
 <plbenveniste@Sansfil-Securise-Employes-Poly-226-246.polymtl.ca>
Date: Thu, 22 Jun 2023 16:17:24 -0400
Subject: [PATCH 02/26] Formatting

---
 nnunet/convert_bids_to_nnunet.py | 31 +------------------------------
 1 file changed, 1 insertion(+), 30 deletions(-)

diff --git a/nnunet/convert_bids_to_nnunet.py b/nnunet/convert_bids_to_nnunet.py
index 71c5536..17a16ce 100644
--- a/nnunet/convert_bids_to_nnunet.py
+++ b/nnunet/convert_bids_to_nnunet.py
@@ -14,26 +14,6 @@
 import nibabel as nib
 import numpy as np
 
-def label_encoding(subject_path, label_path, label_gm_path, label_wm_path, threshold=1e-12):
-    #Encoding of the label images with background=0 , GM=1 and WM=2
-    #We consider that GM and WM don't overlap (maybe need to change that ?)
-
-    #For grey matter
-    label_gm_npy = nib.load(label_gm_path).get_fdata() 
-    label_gm_npy = np.where(label_gm_npy > threshold, 1, 0)
-    #For white matter
-    label_wm_npy = nib.load(label_wm_path).get_fdata() 
-    label_wm_npy = np.where(label_wm_npy > threshold, 2, 0)
-    
-    #Addition of both
-    label_npy = label_gm_npy + label_wm_npy
-    
-    ref = nib.load(subject_path)
-    label_bin = nib.Nifti1Image(label_npy, ref.affine, ref.header)
-    # overwrite the original label file with the binarized version
-    nib.save(label_bin, label_path)
-
-
 # parse command line arguments
 parser = argparse.ArgumentParser(description='Convert BIDS-structured database to nnUNet format.')
 parser.add_argument('--path-data', required=True,
@@ -44,7 +24,6 @@ def label_encoding(subject_path, label_path, label_gm_path, label_wm_path, thres
 parser.add_argument('--tasknumber', default=501,type=int, 
                     help='Specify the task number, has to be greater than 500 but less than 999. e.g 502')
 parser.add_argument('--label-folder', help='Path to the label folders in derivatives', default='labels', type=str)
-#parser.add_argument('--mri-protocols', help='protocols to select', default='STIR,PSIR', type=str)
 
 args = parser.parse_args()
 
@@ -52,7 +31,6 @@ def label_encoding(subject_path, label_path, label_gm_path, label_wm_path, thres
 label_folder = args.label_folder
 path_in_labels = Path(os.path.join(args.path_data, 'derivatives', label_folder))
 path_out = Path(os.path.join(os.path.abspath(args.path_out), f'Dataset{args.tasknumber}_{args.taskname}'))
-#mri_protocols = args.mri_protocols.split(',')
 
 # define paths for train and test folders 
 path_out_imagesTr = Path(os.path.join(path_out, 'imagesTr'))
@@ -74,15 +52,11 @@ def label_encoding(subject_path, label_path, label_gm_path, label_wm_path, thres
     pathlib.Path(path_out_labelsTs).mkdir(parents=True, exist_ok=True)
 
     conversion_dict = {}
-    
 
     #------------- EXTRACTION OF THE LABELED IMAGES NAMES--------------------------
     labelled_imgs = []
     
-    # We first extract all the label files' names
-    
-    #label_dirs = sorted(list(path_in_labels.glob('*/')))
-    
+    # We first extract all the label files' names    
     label_files = sorted(list(path_in_labels.rglob('*_STIR_lesion-manual.nii.gz')) + list(path_in_labels.rglob('*PSIR_lesion-manual.nii.gz') ))
     labelled_imgs += [str(k) for k in label_files]
     
@@ -95,9 +69,6 @@ def label_encoding(subject_path, label_path, label_gm_path, label_wm_path, thres
     valid_test_imgs = []
 
     #The image folders
-    #dirs = sorted(list(path_in_images.glob('*/')))
-    #dirs = [str(x) for x in dirs]
-
     image_files = sorted(list(path_in_images.rglob('*_STIR.nii.gz')) + list(path_in_images.rglob('*_PSIR.nii.gz')))
     
     for image_file in image_files:

From cfe61fdf18073e01f97d45f379f7188095392c8a Mon Sep 17 00:00:00 2001
From: Pierre-Louis Benveniste <pierrelouis.benveniste03@gmail.com>
Date: Tue, 18 Jul 2023 16:14:19 -0400
Subject: [PATCH 03/26] added code to crop img along sc

---
 nnunet/convert_bids_to_nnunet.py | 76 ++++++++++++++++++++++++++++----
 nnunet/crop_sc.py                | 28 ++++++++++++
 nnunet/seg_sc.py                 | 26 +++++++++++
 3 files changed, 121 insertions(+), 9 deletions(-)
 create mode 100644 nnunet/crop_sc.py
 create mode 100644 nnunet/seg_sc.py

diff --git a/nnunet/convert_bids_to_nnunet.py b/nnunet/convert_bids_to_nnunet.py
index 17a16ce..88a9982 100644
--- a/nnunet/convert_bids_to_nnunet.py
+++ b/nnunet/convert_bids_to_nnunet.py
@@ -1,7 +1,30 @@
-# This scripts was adapted from a script by the following authors : Julian McGinnis, Naga Karthik 
-#
-# This script is specifically designed to CanProCo dataset. It converts the dataset from a BIDS format to a nnU-Net format. 
-# 
+
+"""Convert data from BIDS to nnU-Net format
+
+This scripts was adapted from a script by the following authors : Julian McGinnis, Naga Karthik 
+This python script converts data from the BIDS format to the nnU-Net format in order to be able to perform pre-processing, training and inference.
+This script is specifically designed to the canproco dataset. It converts the dataset from a BIDS format to a nnU-Net format. 
+This script is specific in that it takes all labeled data for training and validation. Because, very little labelled data is available, no labelled data is used for testing.
+
+Example of run:
+
+    $ python convert_bids_to_nnunet.py --path-data /path/to/data_extracted --path-out /path/to/nnUNet_raw --taskname TASK-NAME --tasknumber DATASET-ID
+
+Arguments:
+
+    --path-data : Path to BIDS structured dataset. Accepts both cross-sectional and longitudinal datasets
+    --path-out : Path to output directory.
+    --taskname: Specify the task name - usually the anatomy to be segmented, e.g. Hippocampus
+    --tasknumber : Specify the task number, has to be greater than 100 but less than 999
+    --label-folder : Path to the label folders in derivatives (default='manual_masks')
+    --crop-training : Crop images to spinal cord mask (default=False)
+    --crop-testing : Crop images to spinal cord mask (default=False)
+    
+Todo:
+    * 
+
+Pierre-Louis Benveniste
+"""
 import argparse
 import pathlib
 from pathlib import Path
@@ -10,6 +33,9 @@
 import shutil
 from collections import OrderedDict
 from tqdm import tqdm
+from seg_sc import segment_sc
+from crop_sc import crop_to_mask
+
 
 import nibabel as nib
 import numpy as np
@@ -24,6 +50,8 @@
 parser.add_argument('--tasknumber', default=501,type=int, 
                     help='Specify the task number, has to be greater than 500 but less than 999. e.g 502')
 parser.add_argument('--label-folder', help='Path to the label folders in derivatives', default='labels', type=str)
+parser.add_argument('--crop-training', help='Crop images to spinal cord mask', action='store_true', default=False)
+parser.add_argument('--crop-testing', help='Crop images to spinal cord mask', action='store_true', default=False)
 
 args = parser.parse_args()
 
@@ -88,10 +116,26 @@
             
             train_images.append(str(image_file_nnunet))
             train_labels.append(str(label_file_nnunet))
+
+            if args.crop_training:
+                # segment the spinal cord
+                temp_seg = os.path.join(path_out, f'temp_seg.nii.gz')
+                if 'STIR' in str(image_file):
+                    contrast = 't2'
+                else:
+                    contrast = 't1'
+                segment_sc(image_file, temp_seg, contrast=contrast)
+                # crop the image to the spinal cord
+                crop_to_mask(image_file, temp_seg, image_file_nnunet)
+                # crop the label to the spinal cord
+                crop_to_mask(label_file, temp_seg, label_file_nnunet)
+                # remove the temporary segmentation file
+                os.remove(temp_seg)
             
-            # copy the files to new structure
-            shutil.copyfile(image_file, image_file_nnunet)
-            shutil.copyfile(label_file, label_file_nnunet)
+            else:
+                # copy the files to new structure
+                shutil.copyfile(image_file, image_file_nnunet)
+                shutil.copyfile(label_file, label_file_nnunet)
            
             conversion_dict[str(os.path.abspath(image_file))] = image_file_nnunet
             conversion_dict[str(os.path.abspath(label_file))] = label_file_nnunet
@@ -106,8 +150,22 @@
             test_images.append(str(image_file_nnunet))
             test_labels.append(str(label_file_nnunet))
 
-            # copy the files to new structure
-            shutil.copyfile(image_file, image_file_nnunet)
+            if args.crop_testing:
+                # segment the spinal cord
+                temp_seg = os.path.join(path_out, f'temp_seg.nii.gz')
+                if 'STIR' in str(image_file):
+                    contrast = 't2'
+                else:
+                    contrast = 't1'
+                segment_sc(image_file, temp_seg)
+                # crop the image to the spinal cord
+                crop_to_mask(image_file, temp_seg, image_file_nnunet)
+                # remove the temporary segmentation file
+                os.remove(temp_seg) 
+            
+            else:
+                # copy the files to new structure
+                shutil.copyfile(image_file, image_file_nnunet)
 
             conversion_dict[str(os.path.abspath(image_file))] = image_file_nnunet
 
diff --git a/nnunet/crop_sc.py b/nnunet/crop_sc.py
new file mode 100644
index 0000000..9c876ac
--- /dev/null
+++ b/nnunet/crop_sc.py
@@ -0,0 +1,28 @@
+"""
+This function is used to crop the images at the same size as the mask given
+
+Args:
+    image: image to crop
+    mask: mask to crop
+    dilate_size: size of the dilation
+    output_path: output folder path
+
+Returns:
+    None
+
+Todo:
+    * 
+
+Pierre-Louis Benveniste
+"""
+
+import os
+
+def crop_to_mask(image, mask, output_path, dilate_size=2):
+    #This function uses sct_crop_image to crop the image to the mask
+    #The mask is dilated to avoid cutting the edges of the mask
+    #The image is cropped to the mask
+
+    os.system(f'sct_crop_image -i {image} -m {mask} -o {output_path} -dilate {dilate_size} ')
+
+    return None
diff --git a/nnunet/seg_sc.py b/nnunet/seg_sc.py
new file mode 100644
index 0000000..f7aa854
--- /dev/null
+++ b/nnunet/seg_sc.py
@@ -0,0 +1,26 @@
+"""
+This functions is used to segment the spinal cord using the spinal cord toolbox
+
+Args:
+    image: image to segment
+    output_path: output folder path
+    contrast: contrast used for the segmentation
+
+Returns:
+    None
+
+Todo:
+    * 
+
+Pierre-Louis Benveniste
+"""
+
+import os
+
+def segment_sc(image, output_path, contrast='t2'):
+    #This function uses sct_deepseg_sc to segment the spinal cord
+    #The image is segmented and the segmentation is saved in the output folder
+
+    os.system(f'sct_deepseg_sc -i {image} -o {output_path} -c {contrast}')
+
+    return None

From 24bcb0c41d7e4a48c47cfe4dbdfcf11cf1938c2a Mon Sep 17 00:00:00 2001
From: Pierre-Louis Benveniste <pierrelouis.benveniste03@gmail.com>
Date: Wed, 19 Jul 2023 15:33:02 -0400
Subject: [PATCH 04/26] script for seg of full dataset

---
 nnunet/seg_dataset.py | 100 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 100 insertions(+)
 create mode 100644 nnunet/seg_dataset.py

diff --git a/nnunet/seg_dataset.py b/nnunet/seg_dataset.py
new file mode 100644
index 0000000..55fc247
--- /dev/null
+++ b/nnunet/seg_dataset.py
@@ -0,0 +1,100 @@
+"""
+This script adds segmentation of the spinal cord to the dataset using the spinal cord toolbox. 
+
+Args:
+    input_folder: path to the folder containing the images
+    contrast: contrast used for the segmentation
+    qc_folder: path to the quality control folder
+
+Returns:
+    None
+
+Todo:
+    *
+
+Pierre-Louis Benveniste
+"""
+
+import os
+import argparse
+from pathlib import Path
+
+def seg_sc(image, contrast, output_path, qc_folder):
+    """
+    This function is used to segment the spinal cord using the spinal cord toolbox. Is
+
+    Args:
+        image: image to segment
+        contrast: contrast used for the segmentation
+        output_path: output folder path
+
+    Returns:
+        None
+    """
+
+    contrast_dict = {'STIR': 't2', 'PSIR': 't1', 'T2star':'t2s', 'T2w': 't2', 'T1w': 't1', 'MT-T1': 't1', 'MTon': 't2s' }
+
+    if contrast=='PSIR':
+        os.system(f'sct_propseg_sc -i {image} -o {output_path} -c {contrast_dict[contrast]} -qc {qc_folder}')
+    else:
+        os.system(f'sct_deepseg_sc -i {image} -o {output_path} -c {contrast_dict[contrast]} -qc {qc_folder}')
+
+    return None
+
+def get_parser():
+    """
+    This function parses the arguments given to the script
+
+    Args:
+        None
+    
+    Returns:
+        parser: parser containing the arguments
+    """
+
+    parser = argparse.ArgumentParser(description='This script adds segmentation of the spinal cord to the dataset using the spinal cord toolbox.')
+    parser.add_argument('-i', '--input_folder', type=str, help='path to the folder containing the images', required=True)
+    parser.add_argument('-c', '--contrast', type=str, help='contrast used for the segmentation (if multiple: do the following: PSIR,STIR (no space))', required=True)
+    parser.add_argument('-q', '--qc_folder', type=str, help='path to the quality control folder', required=True)
+    
+    return parser
+
+def main():
+    """
+    This function is the main function of the script. It calls the other functions to segment the spinal cord.
+
+    Args:
+        None
+
+    Returns:
+        None
+    """
+
+    parser = get_parser()
+    args = parser.parse_args()
+
+    #get the path to the folder containing the images
+    path_in_images = Path(args.input_folder)
+    #get the contrast used for the segmentation
+    contrasts = list(args.contrast.split(','))
+
+    #Get the list of segmentations with the right contrast
+    seg_files = []
+    for contrast in contrasts:
+        seg_files += sorted(list(path_in_images.rglob(f'*{contrast}.nii.gz')))
+    seg_files = sorted(list(set(seg_files)))
+
+    for seg_file in seg_files:
+        #get the mouse number
+        mouse_nb = seg_file.parts[-4]
+        #get session number
+        session_nb = seg_file.parts[-3]
+        #get the contrast
+        contrast = seg_file.parts[-1].split('.')[0].split('_')[-1]
+        #get the output path
+        seg_out_path = str(seg_file).split('.')[0] + '_seg.nii.gz'
+        #segment the spinal cord
+        seg_sc(seg_file, contrast, seg_out_path, args.qc_folder)
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file

From 919477f64bb546785cc6ab0fe982a617e9a9249b Mon Sep 17 00:00:00 2001
From: Pierre-Louis Benveniste <pierrelouis.benveniste03@gmail.com>
Date: Wed, 19 Jul 2023 20:14:11 -0400
Subject: [PATCH 05/26] modified seg and crop algo

---
 nnunet/crop_dataset.py | 108 +++++++++++++++++++++++++++++++++++++++++
 nnunet/seg_dataset.py  |   9 ++--
 2 files changed, 112 insertions(+), 5 deletions(-)
 create mode 100644 nnunet/crop_dataset.py

diff --git a/nnunet/crop_dataset.py b/nnunet/crop_dataset.py
new file mode 100644
index 0000000..602af4c
--- /dev/null
+++ b/nnunet/crop_dataset.py
@@ -0,0 +1,108 @@
+"""
+This script crops the images and their label (if it exists) to the same size as the mask given. 
+The mask given is the segmentation of the spinal cord.
+
+Args:
+    input_folder: path to the folder containing the images
+    contrast: contrast used for the segmentation
+
+Returns:
+    None
+
+Todo:
+    *
+
+Pierre-Louis Benveniste
+"""
+
+import os
+import argparse
+from pathlib import Path
+
+def crop_to_mask(image, mask, output_path, dilate_size=2):
+    """
+    This function is used to crop the images at the same size as the mask given. 
+    It dilates the mask to avoid cutting the edges of the mask.
+
+    Args:
+        image: image to crop
+        mask: mask to crop
+        dilate_size: size of the dilation
+        output_path: output folder path
+    
+    Returns:
+        None
+    """
+    
+    os.system(f'sct_crop_image -i {image} -m {mask} -o {output_path} -dilate {dilate_size} ')
+    
+    return None
+
+
+def get_parser():
+    """
+    This function parses the arguments given to the script
+
+    Args:
+        None
+    
+    Returns:
+        parser: parser containing the arguments
+    """
+
+    parser = argparse.ArgumentParser(description='This script crops the images and their label to the same size as the mask given. The mask given is the segmentation of the spinal cord.')
+    parser.add_argument('-i', '--input_folder', type=str, help='path to the folder containing the images', required=True)
+    parser.add_argument('-c', '--contrast', type=str, help='contrast used for the segmentation (if multiple: do the following: PSIR,STIR (no space))', required=True)
+    
+    return parser
+
+def main():
+    """
+    This function is the main function of the script. It calls the other functions to crop the images and its label (if it exists).
+    It performs the cropping for all the images in the BIDS format dataset
+
+    Args:
+        None
+    
+    Returns:
+        None
+    """
+
+    #Get the arguments
+    parser = get_parser()
+    args = parser.parse_args()
+    input_folder = Path(args.input_folder)
+    contrasts = list(args.contrast.split(','))
+
+    #Get the list of images
+    image_files = []
+    for contrast in contrasts:
+        image_files += list(input_folder.rglob(f'*{contrast}.nii.gz'))
+
+    #Crop the images and their label
+    for image in image_files:
+        #get the mouse number
+        mouse_nb = str(image.parts[-4])
+        #get session number
+        session_nb = str(image.parts[-3])
+        #Get the mask
+        mask = str(image).split('.')[0] + '_seg.nii.gz'
+        #output image
+        output_image = str(image).split('.')[0] + '_crop.nii.gz'
+        #Crop the image
+        crop_to_mask(image, mask, output_image)
+        
+        #get the label
+        label = str(input_folder) + '/derivatives/labels/' + mouse_nb  + '/' + session_nb + '/anat/' + str(image).split('/')[-1].split('.')[0] + '_lesion-manual.nii.gz'
+        #if the label exists
+        if os.path.exists(label):
+            #output label
+            output_label = label.split('.')[0] + '_crop.nii.gz'
+            #Crop the label
+            crop_to_mask(label, mask, output_label)
+
+
+    return None
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/nnunet/seg_dataset.py b/nnunet/seg_dataset.py
index 55fc247..92c33fc 100644
--- a/nnunet/seg_dataset.py
+++ b/nnunet/seg_dataset.py
@@ -35,12 +35,13 @@ def seg_sc(image, contrast, output_path, qc_folder):
     contrast_dict = {'STIR': 't2', 'PSIR': 't1', 'T2star':'t2s', 'T2w': 't2', 'T1w': 't1', 'MT-T1': 't1', 'MTon': 't2s' }
 
     if contrast=='PSIR':
-        os.system(f'sct_propseg_sc -i {image} -o {output_path} -c {contrast_dict[contrast]} -qc {qc_folder}')
+        os.system(f'sct_propseg -i {image} -o {output_path} -c {contrast_dict[contrast]} -qc {qc_folder}')
     else:
         os.system(f'sct_deepseg_sc -i {image} -o {output_path} -c {contrast_dict[contrast]} -qc {qc_folder}')
 
     return None
 
+
 def get_parser():
     """
     This function parses the arguments given to the script
@@ -59,6 +60,7 @@ def get_parser():
     
     return parser
 
+
 def main():
     """
     This function is the main function of the script. It calls the other functions to segment the spinal cord.
@@ -85,10 +87,6 @@ def main():
     seg_files = sorted(list(set(seg_files)))
 
     for seg_file in seg_files:
-        #get the mouse number
-        mouse_nb = seg_file.parts[-4]
-        #get session number
-        session_nb = seg_file.parts[-3]
         #get the contrast
         contrast = seg_file.parts[-1].split('.')[0].split('_')[-1]
         #get the output path
@@ -96,5 +94,6 @@ def main():
         #segment the spinal cord
         seg_sc(seg_file, contrast, seg_out_path, args.qc_folder)
 
+
 if __name__ == '__main__':
     main()
\ No newline at end of file

From 926078d4f7f69ef02f4eb1b7d102da2c54052d46 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Benveniste <pierrelouis.benveniste03@gmail.com>
Date: Thu, 20 Jul 2023 18:03:43 -0400
Subject: [PATCH 06/26] edited qc for seg and crop

---
 nnunet/convert_bids_to_nnunet.py              |  58 +----
 nnunet/convert_bids_to_nnunet_crop_and_seg.py | 229 ++++++++++++++++++
 nnunet/crop_dataset.py                        |  60 ++++-
 nnunet/seg_dataset.py                         |  13 +-
 4 files changed, 303 insertions(+), 57 deletions(-)
 create mode 100644 nnunet/convert_bids_to_nnunet_crop_and_seg.py

diff --git a/nnunet/convert_bids_to_nnunet.py b/nnunet/convert_bids_to_nnunet.py
index 88a9982..03db7e9 100644
--- a/nnunet/convert_bids_to_nnunet.py
+++ b/nnunet/convert_bids_to_nnunet.py
@@ -1,30 +1,27 @@
-
 """Convert data from BIDS to nnU-Net format
-
 This scripts was adapted from a script by the following authors : Julian McGinnis, Naga Karthik 
 This python script converts data from the BIDS format to the nnU-Net format in order to be able to perform pre-processing, training and inference.
 This script is specifically designed to the canproco dataset. It converts the dataset from a BIDS format to a nnU-Net format. 
 This script is specific in that it takes all labeled data for training and validation. Because, very little labelled data is available, no labelled data is used for testing.
 
 Example of run:
-
     $ python convert_bids_to_nnunet.py --path-data /path/to/data_extracted --path-out /path/to/nnUNet_raw --taskname TASK-NAME --tasknumber DATASET-ID
 
 Arguments:
-
     --path-data : Path to BIDS structured dataset. Accepts both cross-sectional and longitudinal datasets
     --path-out : Path to output directory.
     --taskname: Specify the task name - usually the anatomy to be segmented, e.g. Hippocampus
     --tasknumber : Specify the task number, has to be greater than 100 but less than 999
-    --label-folder : Path to the label folders in derivatives (default='manual_masks')
-    --crop-training : Crop images to spinal cord mask (default=False)
-    --crop-testing : Crop images to spinal cord mask (default=False)
+    --label-folder : Path to the label folders in derivatives (default='labels')
+
+Returns:
+    None
     
 Todo:
     * 
-
 Pierre-Louis Benveniste
 """
+
 import argparse
 import pathlib
 from pathlib import Path
@@ -33,9 +30,6 @@
 import shutil
 from collections import OrderedDict
 from tqdm import tqdm
-from seg_sc import segment_sc
-from crop_sc import crop_to_mask
-
 
 import nibabel as nib
 import numpy as np
@@ -50,8 +44,6 @@
 parser.add_argument('--tasknumber', default=501,type=int, 
                     help='Specify the task number, has to be greater than 500 but less than 999. e.g 502')
 parser.add_argument('--label-folder', help='Path to the label folders in derivatives', default='labels', type=str)
-parser.add_argument('--crop-training', help='Crop images to spinal cord mask', action='store_true', default=False)
-parser.add_argument('--crop-testing', help='Crop images to spinal cord mask', action='store_true', default=False)
 
 args = parser.parse_args()
 
@@ -116,26 +108,10 @@
             
             train_images.append(str(image_file_nnunet))
             train_labels.append(str(label_file_nnunet))
-
-            if args.crop_training:
-                # segment the spinal cord
-                temp_seg = os.path.join(path_out, f'temp_seg.nii.gz')
-                if 'STIR' in str(image_file):
-                    contrast = 't2'
-                else:
-                    contrast = 't1'
-                segment_sc(image_file, temp_seg, contrast=contrast)
-                # crop the image to the spinal cord
-                crop_to_mask(image_file, temp_seg, image_file_nnunet)
-                # crop the label to the spinal cord
-                crop_to_mask(label_file, temp_seg, label_file_nnunet)
-                # remove the temporary segmentation file
-                os.remove(temp_seg)
             
-            else:
-                # copy the files to new structure
-                shutil.copyfile(image_file, image_file_nnunet)
-                shutil.copyfile(label_file, label_file_nnunet)
+            # copy the files to new structure
+            shutil.copyfile(image_file, image_file_nnunet)
+            shutil.copyfile(label_file, label_file_nnunet)
            
             conversion_dict[str(os.path.abspath(image_file))] = image_file_nnunet
             conversion_dict[str(os.path.abspath(label_file))] = label_file_nnunet
@@ -150,22 +126,8 @@
             test_images.append(str(image_file_nnunet))
             test_labels.append(str(label_file_nnunet))
 
-            if args.crop_testing:
-                # segment the spinal cord
-                temp_seg = os.path.join(path_out, f'temp_seg.nii.gz')
-                if 'STIR' in str(image_file):
-                    contrast = 't2'
-                else:
-                    contrast = 't1'
-                segment_sc(image_file, temp_seg)
-                # crop the image to the spinal cord
-                crop_to_mask(image_file, temp_seg, image_file_nnunet)
-                # remove the temporary segmentation file
-                os.remove(temp_seg) 
-            
-            else:
-                # copy the files to new structure
-                shutil.copyfile(image_file, image_file_nnunet)
+            # copy the files to new structure
+            shutil.copyfile(image_file, image_file_nnunet)
 
             conversion_dict[str(os.path.abspath(image_file))] = image_file_nnunet
 
diff --git a/nnunet/convert_bids_to_nnunet_crop_and_seg.py b/nnunet/convert_bids_to_nnunet_crop_and_seg.py
new file mode 100644
index 0000000..7aade81
--- /dev/null
+++ b/nnunet/convert_bids_to_nnunet_crop_and_seg.py
@@ -0,0 +1,229 @@
+
+"""Convert data from BIDS to nnU-Net format
+
+This scripts was adapted from a script by the following authors : Julian McGinnis, Naga Karthik 
+This python script converts data from the BIDS format to the nnU-Net format in order to be able to perform pre-processing, training and inference.
+This script is specifically designed to the canproco dataset. It converts the dataset from a BIDS format to a nnU-Net format. 
+This script is specific in that it takes all labeled data for training and validation. Because, very little labelled data is available, no labelled data is used for testing.
+This script is also specific because it only takes the cropped images and their labels. 
+Also, it is specific because you can chose which contrast you want to use for the segmentation.
+
+Example of run:
+
+    $ python convert_bids_to_nnunet.py --path-data /path/to/data_extracted --path-out /path/to/nnUNet_raw --taskname TASK-NAME --tasknumber DATASET-ID
+
+Arguments:
+
+    --path-data : Path to BIDS structured dataset. Accepts both cross-sectional and longitudinal datasets
+    --path-out : Path to output directory.
+    --taskname: Specify the task name - usually the anatomy to be segmented, e.g. Hippocampus
+    --tasknumber : Specify the task number, has to be greater than 100 but less than 999
+    --label-folder : Path to the label folders in derivatives (default='labels')
+    --contrast : Contrast used for the segmentation (if multiple: do the following: PSIR,STIR (no space))
+    
+Todo:
+    * 
+
+Pierre-Louis Benveniste
+"""
+import argparse
+import pathlib
+from pathlib import Path
+import json
+import os
+import shutil
+from collections import OrderedDict
+from tqdm import tqdm
+from seg_sc import segment_sc
+from crop_sc import crop_to_mask
+
+
+import nibabel as nib
+import numpy as np
+
+# parse command line arguments
+parser = argparse.ArgumentParser(description='Convert BIDS-structured database to nnUNet format.')
+parser.add_argument('--path-data', required=True,
+                    help='Path to BIDS structured dataset. Accepts both cross-sectional and longitudinal datasets')
+parser.add_argument('--path-out', help='Path to output directory.', required=True)
+parser.add_argument('--taskname', default='MSSpineLesion', type=str,
+                    help='Specify the task name - usually the anatomy to be segmented, e.g. Hippocampus',)
+parser.add_argument('--tasknumber', default=501,type=int, 
+                    help='Specify the task number, has to be greater than 500 but less than 999. e.g 502')
+parser.add_argument('--label-folder', help='Path to the label folders in derivatives', default='labels', type=str)
+parser.add_argument('--crop-training', help='Crop images to spinal cord mask', action='store_true', default=False)
+parser.add_argument('--crop-testing', help='Crop images to spinal cord mask', action='store_true', default=False)
+
+args = parser.parse_args()
+
+path_in_images = Path(args.path_data)
+label_folder = args.label_folder
+path_in_labels = Path(os.path.join(args.path_data, 'derivatives', label_folder))
+path_out = Path(os.path.join(os.path.abspath(args.path_out), f'Dataset{args.tasknumber}_{args.taskname}'))
+
+# define paths for train and test folders 
+path_out_imagesTr = Path(os.path.join(path_out, 'imagesTr'))
+path_out_imagesTs = Path(os.path.join(path_out, 'imagesTs'))
+path_out_labelsTr = Path(os.path.join(path_out, 'labelsTr'))
+path_out_labelsTs = Path(os.path.join(path_out, 'labelsTs'))
+
+# we load both train and validation set into the train images as nnunet uses cross-fold-validation
+train_images, train_labels = [], []
+test_images, test_labels = [], []
+
+if __name__ == '__main__':
+
+    # make the directories
+    pathlib.Path(path_out).mkdir(parents=True, exist_ok=True)
+    pathlib.Path(path_out_imagesTr).mkdir(parents=True, exist_ok=True)
+    pathlib.Path(path_out_imagesTs).mkdir(parents=True, exist_ok=True)
+    pathlib.Path(path_out_labelsTr).mkdir(parents=True, exist_ok=True)
+    pathlib.Path(path_out_labelsTs).mkdir(parents=True, exist_ok=True)
+
+    conversion_dict = {}
+
+    #------------- EXTRACTION OF THE LABELED IMAGES NAMES--------------------------
+    labelled_imgs = []
+    
+    # We first extract all the label files' names    
+    label_files = sorted(list(path_in_labels.rglob('*_STIR_lesion-manual.nii.gz')) + list(path_in_labels.rglob('*PSIR_lesion-manual.nii.gz') ))
+    labelled_imgs += [str(k) for k in label_files]
+    
+    #--------------- DISPACTH OF LABELLED IMAGES AND UNLABELED IMAGES ------------------- 
+    
+    #Initialise the number of scans in train and in test folder
+    scan_cnt_train, scan_cnt_test = 0, 0
+
+    valid_train_imgs = []
+    valid_test_imgs = []
+
+    #The image folders
+    image_files = sorted(list(path_in_images.rglob('*_STIR.nii.gz')) + list(path_in_images.rglob('*_PSIR.nii.gz')))
+    
+    for image_file in image_files:
+
+        #Identify common data path
+        common = os.path.relpath(os.path.dirname(image_file), args.path_data)
+
+        file_id = str(image_file).rsplit('_',1)[0].split('/')[-1] + "_"
+        
+        if (file_id in str(labelled_imgs)):
+            label_file = [k for k in labelled_imgs if file_id in k][0]
+
+            scan_cnt_train+= 1
+
+            image_file_nnunet = os.path.join(path_out_imagesTr,f'{args.taskname}_{scan_cnt_train:03d}_0000.nii.gz')
+            label_file_nnunet = os.path.join(path_out_labelsTr,f'{args.taskname}_{scan_cnt_train:03d}.nii.gz')
+            
+            train_images.append(str(image_file_nnunet))
+            train_labels.append(str(label_file_nnunet))
+
+            if args.crop_training:
+                # segment the spinal cord
+                temp_seg = os.path.join(path_out, f'temp_seg.nii.gz')
+                if 'STIR' in str(image_file):
+                    contrast = 't2'
+                else:
+                    contrast = 't1'
+                segment_sc(image_file, temp_seg, contrast=contrast)
+                # crop the image to the spinal cord
+                crop_to_mask(image_file, temp_seg, image_file_nnunet)
+                # crop the label to the spinal cord
+                crop_to_mask(label_file, temp_seg, label_file_nnunet)
+                # remove the temporary segmentation file
+                os.remove(temp_seg)
+            
+            else:
+                # copy the files to new structure
+                shutil.copyfile(image_file, image_file_nnunet)
+                shutil.copyfile(label_file, label_file_nnunet)
+           
+            conversion_dict[str(os.path.abspath(image_file))] = image_file_nnunet
+            conversion_dict[str(os.path.abspath(label_file))] = label_file_nnunet
+        else:
+                            
+            # Repeat the above procedure for testing
+            scan_cnt_test += 1
+            # create the new convention names
+            image_file_nnunet = os.path.join(path_out_imagesTs,f'{args.taskname}_{scan_cnt_test:03d}_0000.nii.gz')
+            label_file_nnunet = os.path.join(path_out_labelsTs,f'{args.taskname}_{scan_cnt_test:03d}.nii.gz')
+
+            test_images.append(str(image_file_nnunet))
+            test_labels.append(str(label_file_nnunet))
+
+            if args.crop_testing:
+                # segment the spinal cord
+                temp_seg = os.path.join(path_out, f'temp_seg.nii.gz')
+                if 'STIR' in str(image_file):
+                    contrast = 't2'
+                else:
+                    contrast = 't1'
+                segment_sc(image_file, temp_seg)
+                # crop the image to the spinal cord
+                crop_to_mask(image_file, temp_seg, image_file_nnunet)
+                # remove the temporary segmentation file
+                os.remove(temp_seg) 
+            
+            else:
+                # copy the files to new structure
+                shutil.copyfile(image_file, image_file_nnunet)
+
+            conversion_dict[str(os.path.abspath(image_file))] = image_file_nnunet
+
+    #Display of number of training and number of testing images
+    print("Number of images for training: " + str(scan_cnt_train))
+    print("Number of images for testing: " + str(scan_cnt_test))
+
+    #----------------- CREATION OF THE DICTIONNARY-----------------------------------
+    # create dataset_description.json
+    json_object = json.dumps(conversion_dict, indent=4)
+    # write to dataset description
+    conversion_dict_name = f"conversion_dict.json"
+    with open(os.path.join(path_out, conversion_dict_name), "w") as outfile:
+        outfile.write(json_object)
+
+
+    # c.f. dataset json generation. This contains the metadata for the dataset that nnUNet uses during preprocessing and training
+    # general info : https://github.com/MIC-DKFZ/nnUNet/blob/master/nnunet/dataset_conversion/utils.py
+    # example: https://github.com/MIC-DKFZ/nnUNet/blob/master/nnunet/dataset_conversion/Task055_SegTHOR.py
+
+    json_dict = OrderedDict()
+    json_dict['name'] = args.taskname
+    json_dict['description'] = args.taskname
+    json_dict['tensorImageSize'] = "3D"
+    json_dict['reference'] = "TBD"
+    json_dict['licence'] = "TBD"
+    json_dict['release'] = "0.0"
+
+    
+    # Because only using one modality  
+    ## was changed from 'modality' to 'channel_names'
+    json_dict['channel_names'] = {
+            "0": "T1w",
+        }
+    
+    # 0 is always the background. Any class labels should start from 1.
+    json_dict['labels'] = {
+        "background" : "0",
+        "MS Lesion" : "1" ,
+    }
+   
+    json_dict['numTraining'] = scan_cnt_train
+    json_dict['numTest'] = scan_cnt_test
+    #Newly required field in the json file with v2
+    json_dict["file_ending"] = ".nii.gz"
+
+    json_dict['training'] = [{'image': str(train_labels[i]).replace("labelsTr", "imagesTr") , "label": train_labels[i] }
+                                 for i in range(len(train_images))]
+    # Note: See https://github.com/MIC-DKFZ/nnUNet/issues/407 for how this should be described
+
+    #Removed because useless in this case
+    json_dict['test'] = [str(test_labels[i]).replace("labelsTs", "imagesTs") for i in range(len(test_images))]
+
+    # create dataset_description.json
+    json_object = json.dumps(json_dict, indent=4)
+    # write to dataset description
+    # nn-unet requires it to be "dataset.json"
+    dataset_dict_name = f"dataset.json"
+    with open(os.path.join(path_out, dataset_dict_name), "w") as outfile:
+        outfile.write(json_object)
\ No newline at end of file
diff --git a/nnunet/crop_dataset.py b/nnunet/crop_dataset.py
index 602af4c..d0ba823 100644
--- a/nnunet/crop_dataset.py
+++ b/nnunet/crop_dataset.py
@@ -5,10 +5,15 @@
 Args:
     input_folder: path to the folder containing the images
     contrast: contrast used for the segmentation
+    qc_folder: path to the quality control folder
+    replacing: if True, the images will be replaced by the cropped images and the segmentations will be removed. If False, the cropped images will be saved with the suffix '_crop'.
 
 Returns:
     None
 
+Example:
+    python crop_dataset.py -i /path/to/dataset -c PSIR,STIR -q /path/to/qc/folder
+
 Todo:
     *
 
@@ -19,10 +24,11 @@
 import argparse
 from pathlib import Path
 
-def crop_to_mask(image, mask, output_path, dilate_size=2):
+def crop_to_mask(image, mask, output_path, qc_folder, dilate_size='2x1000x2',label=False):
     """
     This function is used to crop the images at the same size as the mask given. 
     It dilates the mask to avoid cutting the edges of the mask.
+    It also creates the quality control on the sagittal plane.
 
     Args:
         image: image to crop
@@ -34,8 +40,40 @@ def crop_to_mask(image, mask, output_path, dilate_size=2):
         None
     """
     
-    os.system(f'sct_crop_image -i {image} -m {mask} -o {output_path} -dilate {dilate_size} ')
+    os.system(f'sct_crop_image -i {image} -m {mask} -o {output_path} -dilate {dilate_size} -b 0')
+
+    if not label:
+        os.system(f'sct_qc -i {image} -d {output_path} -p sct_image_stitch -plane sagittal -qc {qc_folder}')
+
+    return None
+
+
+def replacing_fc(image, mask, cropped,label=False):
+    """
+    This function replaces the image by the cropped image and removes the segmentation.
+    It also deleted the _centerline.nii.gz file if it exists. 
+
+
+    Args:
+        image: image to replace
+        mask: mask to replace
+        cropped: cropped image
     
+    Returns:
+        None
+    """
+    if label:
+        os.system(f'rm {image}')
+        os.system(f'mv {cropped} {image}')
+
+    else:
+        os.system(f'rm {image}')
+        os.system(f'rm {mask}')
+        os.system(f'mv {cropped} {image}')
+
+        if os.path.exists(str(image).split('.')[0] + '_centerline.nii.gz'):
+            os.system(f'rm {str(image).split(".")[0] + "_centerline.nii.gz"}')
+
     return None
 
 
@@ -53,6 +91,8 @@ def get_parser():
     parser = argparse.ArgumentParser(description='This script crops the images and their label to the same size as the mask given. The mask given is the segmentation of the spinal cord.')
     parser.add_argument('-i', '--input_folder', type=str, help='path to the folder containing the images', required=True)
     parser.add_argument('-c', '--contrast', type=str, help='contrast used for the segmentation (if multiple: do the following: PSIR,STIR (no space))', required=True)
+    parser.add_argument('-q', '--qc_folder', type=str, help='path to the quality control folder', required=True)
+    parser.add_argument('-r', '--replacing', type=bool, help='if True, the images will be replaced by the cropped images and the segmentations will be removed. If False, the cropped images will be saved with the suffix _crop', required=False, default=False)
     
     return parser
 
@@ -90,8 +130,8 @@ def main():
         #output image
         output_image = str(image).split('.')[0] + '_crop.nii.gz'
         #Crop the image
-        crop_to_mask(image, mask, output_image)
-        
+        crop_to_mask(image, mask, output_image, args.qc_folder)
+
         #get the label
         label = str(input_folder) + '/derivatives/labels/' + mouse_nb  + '/' + session_nb + '/anat/' + str(image).split('/')[-1].split('.')[0] + '_lesion-manual.nii.gz'
         #if the label exists
@@ -99,8 +139,16 @@ def main():
             #output label
             output_label = label.split('.')[0] + '_crop.nii.gz'
             #Crop the label
-            crop_to_mask(label, mask, output_label)
-
+            crop_to_mask(label, mask, output_label, args.qc_folder,label=True)
+            #if replacing is True
+            if args.replacing:
+                #replace the label by the cropped label
+                replacing_fc(label, mask, output_label,label=True)
+        
+        #if replacing is True
+        if args.replacing:
+            #replace the image by the cropped image
+            replacing_fc(image, mask, output_image)
 
     return None
 
diff --git a/nnunet/seg_dataset.py b/nnunet/seg_dataset.py
index 92c33fc..9d6ba71 100644
--- a/nnunet/seg_dataset.py
+++ b/nnunet/seg_dataset.py
@@ -9,6 +9,9 @@
 Returns:
     None
 
+Example:
+    python seg_dataset.py -i /path/to/dataset -c PSIR,STIR -q /path/to/qc/folder
+
 Todo:
     *
 
@@ -21,7 +24,8 @@
 
 def seg_sc(image, contrast, output_path, qc_folder):
     """
-    This function is used to segment the spinal cord using the spinal cord toolbox. Is
+    This function is used to segment the spinal cord using the spinal cord toolbox.
+    It also generates the quality control on the sagittal plane.
 
     Args:
         image: image to segment
@@ -35,9 +39,12 @@ def seg_sc(image, contrast, output_path, qc_folder):
     contrast_dict = {'STIR': 't2', 'PSIR': 't1', 'T2star':'t2s', 'T2w': 't2', 'T1w': 't1', 'MT-T1': 't1', 'MTon': 't2s' }
 
     if contrast=='PSIR':
-        os.system(f'sct_propseg -i {image} -o {output_path} -c {contrast_dict[contrast]} -qc {qc_folder}')
+        os.system(f'sct_propseg -i {image} -o {output_path} -c {contrast_dict[contrast]}')
     else:
-        os.system(f'sct_deepseg_sc -i {image} -o {output_path} -c {contrast_dict[contrast]} -qc {qc_folder}')
+        os.system(f'sct_deepseg_sc -i {image} -o {output_path} -c {contrast_dict[contrast]}')
+    
+    #To generate the quality control
+    os.system(f'sct_qc -i {image} -s {output_path} -d {output_path} -p sct_deepseg_lesion -plane sagittal -qc {qc_folder}')
 
     return None
 

From 8b8e1a93b541be1e3accb900b067c50c71023a7c Mon Sep 17 00:00:00 2001
From: Pierre-Louis Benveniste <pierrelouis.benveniste03@gmail.com>
Date: Tue, 25 Jul 2023 16:39:47 -0400
Subject: [PATCH 07/26] lesion clustering accross slides

---
 lesion_analysis/lesion_seg_analysis.py | 150 +++++++++++++++++++++++++
 1 file changed, 150 insertions(+)
 create mode 100644 lesion_analysis/lesion_seg_analysis.py

diff --git a/lesion_analysis/lesion_seg_analysis.py b/lesion_analysis/lesion_seg_analysis.py
new file mode 100644
index 0000000..eaf8430
--- /dev/null
+++ b/lesion_analysis/lesion_seg_analysis.py
@@ -0,0 +1,150 @@
+"""
+In this file we analyse the results of the segmentation of the MS lesion on the spinal cord.
+The objective is to output the number of lesions segmented on the spinal cord for each patient.
+Also, we want to output the segmentation file of the lesions in different color
+
+Usage:
+    python3 lesion_seg_analysis.py -i <input_image> -seg <segmentation> -o <output_folder>
+    python3 lesion_seg_analysis.py -i ./data/rosenberg/nnUNet_raw/Dataset301_canproco/imagesTs/canproco_003_0000.nii.gz -seg ./data/rosenberg/seg_test/canproco_003.nii.gz -o ./data/rosenberg/analysis 
+
+Args:
+    -i/--input_image: path to the image
+    -seg/--segmentation: path to the segmentation
+    -o/--output_folder: path to the output folder
+    --plot: whether to plot the results or not
+
+Returns:
+    None
+
+Todo:
+    *
+
+Pierre-Louis Benveniste
+"""
+
+import os
+import argparse
+from pathlib import Path
+import nibabel as nib
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.cluster import DBSCAN
+
+def get_parser():
+    """
+    This function parses the arguments given to the script.
+
+    Args:
+        None
+
+    Returns:
+        parser: parser containing the arguments
+    """
+
+    parser = argparse.ArgumentParser(description='Analyse the results of the segmentation of the MS lesion on the spinal cord.')
+    parser.add_argument('-i', '--input_image', required=True,
+                        help='Path to the image')
+    parser.add_argument('-seg', '--segmentation', required=True,
+                        help='Path to the segmentation')
+    parser.add_argument('-o', '--output_folder', required=True,
+                        help='Path to the output folder')
+    parser.add_argument('--plot', action='store_true',
+                        help='Whether to plot the results or not')
+    return parser
+
+def main():
+    """
+    This function is the main function of the script.
+
+    Args:
+        None
+    
+    Returns:
+        None
+    """
+    #get the parser
+    parser = get_parser()
+    args = parser.parse_args()
+
+    #load image
+    img = nib.load(args.input_image)
+    img_data = img.get_fdata()
+
+    #load segmentation
+    seg = nib.load(args.segmentation)
+    seg_data = seg.get_fdata()
+
+    #perform clustering on the entire segmentation volume
+    ##first we modify the seg data
+    X = []
+    Y = []
+    Z = []
+    for x in range(seg_data.shape[0]):
+        for y in range(seg_data.shape[1]):
+            for z in range(seg_data.shape[2]):
+                if seg_data[x,y,z] != 0:
+                    X.append(x)
+                    Y.append(y)
+                    Z.append(z)
+    coords = np.stack((X,Y,Z), axis=1)
+
+    ##then we perform the clustering using DBSCAN
+    db = DBSCAN(eps=10, min_samples=5).fit(coords)
+    core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
+    core_samples_mask[db.core_sample_indices_] = True
+    labels = db.labels_
+    # Number of clusters in labels, ignoring noise if present.
+    n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
+
+    print('Estimated number of clusters: %d' % n_clusters_)
+
+    plot = args.plot
+    if plot:
+        #build color dictionnary
+        colors = {}
+        for i in range(n_clusters_):
+            colors[i] = np.random.rand(3,)
+        colors[-1] = [0,0,0]
+
+        #plot the clusters for each slice
+        fig, axs = plt.subplots(ncols=seg_data.shape[2], nrows=1,figsize=(20,3))
+        for i in range(seg_data.shape[2]):
+            slice_coords = coords[coords[:,2] == i]
+            slice_labels = labels[coords[:,2] == i]
+            axs[i].scatter(slice_coords[:,0], slice_coords[:,1], color=[colors[x] for x in slice_labels])
+            # plt.savefig(os.path.join(args.output_folder, f'cluster_slice_{i}.png'))
+            # plt.close()
+            axs[i].set_xlim(0,seg_data.shape[0])   
+            axs[i].set_ylim(0,seg_data.shape[1])
+        plt.show()
+
+    #saving the results in a nifti file
+    #first we create the new segmentation
+    new_seg_data = np.zeros_like(seg_data)
+    for i in range(len(labels)):
+        new_seg_data[coords[i,0], coords[i,1], coords[i,2]] = labels[i] + 1
+    #then we save it
+    new_seg = nib.Nifti1Image(new_seg_data, seg.affine, seg.header)
+    nib.save(new_seg, os.path.join(args.output_folder, 'clustered_seg.nii.gz'))
+
+    #for each lesion calculate volume and get center
+    ##first we get the volume of one voxel
+    voxel_volume = np.prod(seg.header.get_zooms())
+    ##then we get the volume of each lesion and its center
+    lesion_volumes = []
+    lesion_centers = []
+    for i in range(n_clusters_):
+        lesion_volumes.append(len(labels[labels == i])*voxel_volume)
+        lesion_centers.append(np.mean(coords[labels == i], axis=0))
+    
+    #save the results in a text file
+    with open(os.path.join(args.output_folder, 'lesion_analysis.txt'), 'w') as f:
+        f.write(f'Number of lesions: {n_clusters_}\n')
+        f.write('Volume and center of each lesion (mm3):\n')
+        for i in range(n_clusters_):
+            f.write(f'Lesion {i+1} : volume: {round(lesion_volumes[i],2)} mm3, center: {lesion_centers[i]}\n')
+    return None
+
+if __name__ == '__main__':
+    main()
+

From 65138b0dae7cd3f2bbf352ce26236e4be50f622b Mon Sep 17 00:00:00 2001
From: Pierre-Louis Benveniste <pierrelouis.benveniste03@gmail.com>
Date: Tue, 25 Jul 2023 16:46:01 -0400
Subject: [PATCH 08/26] removed unused files for seg and crop of lesion

---
 nnunet/convert_bids_to_nnunet_crop_and_seg.py | 229 ------------------
 nnunet/crop_sc.py                             |  28 ---
 nnunet/seg_sc.py                              |  26 --
 3 files changed, 283 deletions(-)
 delete mode 100644 nnunet/convert_bids_to_nnunet_crop_and_seg.py
 delete mode 100644 nnunet/crop_sc.py
 delete mode 100644 nnunet/seg_sc.py

diff --git a/nnunet/convert_bids_to_nnunet_crop_and_seg.py b/nnunet/convert_bids_to_nnunet_crop_and_seg.py
deleted file mode 100644
index 7aade81..0000000
--- a/nnunet/convert_bids_to_nnunet_crop_and_seg.py
+++ /dev/null
@@ -1,229 +0,0 @@
-
-"""Convert data from BIDS to nnU-Net format
-
-This scripts was adapted from a script by the following authors : Julian McGinnis, Naga Karthik 
-This python script converts data from the BIDS format to the nnU-Net format in order to be able to perform pre-processing, training and inference.
-This script is specifically designed to the canproco dataset. It converts the dataset from a BIDS format to a nnU-Net format. 
-This script is specific in that it takes all labeled data for training and validation. Because, very little labelled data is available, no labelled data is used for testing.
-This script is also specific because it only takes the cropped images and their labels. 
-Also, it is specific because you can chose which contrast you want to use for the segmentation.
-
-Example of run:
-
-    $ python convert_bids_to_nnunet.py --path-data /path/to/data_extracted --path-out /path/to/nnUNet_raw --taskname TASK-NAME --tasknumber DATASET-ID
-
-Arguments:
-
-    --path-data : Path to BIDS structured dataset. Accepts both cross-sectional and longitudinal datasets
-    --path-out : Path to output directory.
-    --taskname: Specify the task name - usually the anatomy to be segmented, e.g. Hippocampus
-    --tasknumber : Specify the task number, has to be greater than 100 but less than 999
-    --label-folder : Path to the label folders in derivatives (default='labels')
-    --contrast : Contrast used for the segmentation (if multiple: do the following: PSIR,STIR (no space))
-    
-Todo:
-    * 
-
-Pierre-Louis Benveniste
-"""
-import argparse
-import pathlib
-from pathlib import Path
-import json
-import os
-import shutil
-from collections import OrderedDict
-from tqdm import tqdm
-from seg_sc import segment_sc
-from crop_sc import crop_to_mask
-
-
-import nibabel as nib
-import numpy as np
-
-# parse command line arguments
-parser = argparse.ArgumentParser(description='Convert BIDS-structured database to nnUNet format.')
-parser.add_argument('--path-data', required=True,
-                    help='Path to BIDS structured dataset. Accepts both cross-sectional and longitudinal datasets')
-parser.add_argument('--path-out', help='Path to output directory.', required=True)
-parser.add_argument('--taskname', default='MSSpineLesion', type=str,
-                    help='Specify the task name - usually the anatomy to be segmented, e.g. Hippocampus',)
-parser.add_argument('--tasknumber', default=501,type=int, 
-                    help='Specify the task number, has to be greater than 500 but less than 999. e.g 502')
-parser.add_argument('--label-folder', help='Path to the label folders in derivatives', default='labels', type=str)
-parser.add_argument('--crop-training', help='Crop images to spinal cord mask', action='store_true', default=False)
-parser.add_argument('--crop-testing', help='Crop images to spinal cord mask', action='store_true', default=False)
-
-args = parser.parse_args()
-
-path_in_images = Path(args.path_data)
-label_folder = args.label_folder
-path_in_labels = Path(os.path.join(args.path_data, 'derivatives', label_folder))
-path_out = Path(os.path.join(os.path.abspath(args.path_out), f'Dataset{args.tasknumber}_{args.taskname}'))
-
-# define paths for train and test folders 
-path_out_imagesTr = Path(os.path.join(path_out, 'imagesTr'))
-path_out_imagesTs = Path(os.path.join(path_out, 'imagesTs'))
-path_out_labelsTr = Path(os.path.join(path_out, 'labelsTr'))
-path_out_labelsTs = Path(os.path.join(path_out, 'labelsTs'))
-
-# we load both train and validation set into the train images as nnunet uses cross-fold-validation
-train_images, train_labels = [], []
-test_images, test_labels = [], []
-
-if __name__ == '__main__':
-
-    # make the directories
-    pathlib.Path(path_out).mkdir(parents=True, exist_ok=True)
-    pathlib.Path(path_out_imagesTr).mkdir(parents=True, exist_ok=True)
-    pathlib.Path(path_out_imagesTs).mkdir(parents=True, exist_ok=True)
-    pathlib.Path(path_out_labelsTr).mkdir(parents=True, exist_ok=True)
-    pathlib.Path(path_out_labelsTs).mkdir(parents=True, exist_ok=True)
-
-    conversion_dict = {}
-
-    #------------- EXTRACTION OF THE LABELED IMAGES NAMES--------------------------
-    labelled_imgs = []
-    
-    # We first extract all the label files' names    
-    label_files = sorted(list(path_in_labels.rglob('*_STIR_lesion-manual.nii.gz')) + list(path_in_labels.rglob('*PSIR_lesion-manual.nii.gz') ))
-    labelled_imgs += [str(k) for k in label_files]
-    
-    #--------------- DISPACTH OF LABELLED IMAGES AND UNLABELED IMAGES ------------------- 
-    
-    #Initialise the number of scans in train and in test folder
-    scan_cnt_train, scan_cnt_test = 0, 0
-
-    valid_train_imgs = []
-    valid_test_imgs = []
-
-    #The image folders
-    image_files = sorted(list(path_in_images.rglob('*_STIR.nii.gz')) + list(path_in_images.rglob('*_PSIR.nii.gz')))
-    
-    for image_file in image_files:
-
-        #Identify common data path
-        common = os.path.relpath(os.path.dirname(image_file), args.path_data)
-
-        file_id = str(image_file).rsplit('_',1)[0].split('/')[-1] + "_"
-        
-        if (file_id in str(labelled_imgs)):
-            label_file = [k for k in labelled_imgs if file_id in k][0]
-
-            scan_cnt_train+= 1
-
-            image_file_nnunet = os.path.join(path_out_imagesTr,f'{args.taskname}_{scan_cnt_train:03d}_0000.nii.gz')
-            label_file_nnunet = os.path.join(path_out_labelsTr,f'{args.taskname}_{scan_cnt_train:03d}.nii.gz')
-            
-            train_images.append(str(image_file_nnunet))
-            train_labels.append(str(label_file_nnunet))
-
-            if args.crop_training:
-                # segment the spinal cord
-                temp_seg = os.path.join(path_out, f'temp_seg.nii.gz')
-                if 'STIR' in str(image_file):
-                    contrast = 't2'
-                else:
-                    contrast = 't1'
-                segment_sc(image_file, temp_seg, contrast=contrast)
-                # crop the image to the spinal cord
-                crop_to_mask(image_file, temp_seg, image_file_nnunet)
-                # crop the label to the spinal cord
-                crop_to_mask(label_file, temp_seg, label_file_nnunet)
-                # remove the temporary segmentation file
-                os.remove(temp_seg)
-            
-            else:
-                # copy the files to new structure
-                shutil.copyfile(image_file, image_file_nnunet)
-                shutil.copyfile(label_file, label_file_nnunet)
-           
-            conversion_dict[str(os.path.abspath(image_file))] = image_file_nnunet
-            conversion_dict[str(os.path.abspath(label_file))] = label_file_nnunet
-        else:
-                            
-            # Repeat the above procedure for testing
-            scan_cnt_test += 1
-            # create the new convention names
-            image_file_nnunet = os.path.join(path_out_imagesTs,f'{args.taskname}_{scan_cnt_test:03d}_0000.nii.gz')
-            label_file_nnunet = os.path.join(path_out_labelsTs,f'{args.taskname}_{scan_cnt_test:03d}.nii.gz')
-
-            test_images.append(str(image_file_nnunet))
-            test_labels.append(str(label_file_nnunet))
-
-            if args.crop_testing:
-                # segment the spinal cord
-                temp_seg = os.path.join(path_out, f'temp_seg.nii.gz')
-                if 'STIR' in str(image_file):
-                    contrast = 't2'
-                else:
-                    contrast = 't1'
-                segment_sc(image_file, temp_seg)
-                # crop the image to the spinal cord
-                crop_to_mask(image_file, temp_seg, image_file_nnunet)
-                # remove the temporary segmentation file
-                os.remove(temp_seg) 
-            
-            else:
-                # copy the files to new structure
-                shutil.copyfile(image_file, image_file_nnunet)
-
-            conversion_dict[str(os.path.abspath(image_file))] = image_file_nnunet
-
-    #Display of number of training and number of testing images
-    print("Number of images for training: " + str(scan_cnt_train))
-    print("Number of images for testing: " + str(scan_cnt_test))
-
-    #----------------- CREATION OF THE DICTIONNARY-----------------------------------
-    # create dataset_description.json
-    json_object = json.dumps(conversion_dict, indent=4)
-    # write to dataset description
-    conversion_dict_name = f"conversion_dict.json"
-    with open(os.path.join(path_out, conversion_dict_name), "w") as outfile:
-        outfile.write(json_object)
-
-
-    # c.f. dataset json generation. This contains the metadata for the dataset that nnUNet uses during preprocessing and training
-    # general info : https://github.com/MIC-DKFZ/nnUNet/blob/master/nnunet/dataset_conversion/utils.py
-    # example: https://github.com/MIC-DKFZ/nnUNet/blob/master/nnunet/dataset_conversion/Task055_SegTHOR.py
-
-    json_dict = OrderedDict()
-    json_dict['name'] = args.taskname
-    json_dict['description'] = args.taskname
-    json_dict['tensorImageSize'] = "3D"
-    json_dict['reference'] = "TBD"
-    json_dict['licence'] = "TBD"
-    json_dict['release'] = "0.0"
-
-    
-    # Because only using one modality  
-    ## was changed from 'modality' to 'channel_names'
-    json_dict['channel_names'] = {
-            "0": "T1w",
-        }
-    
-    # 0 is always the background. Any class labels should start from 1.
-    json_dict['labels'] = {
-        "background" : "0",
-        "MS Lesion" : "1" ,
-    }
-   
-    json_dict['numTraining'] = scan_cnt_train
-    json_dict['numTest'] = scan_cnt_test
-    #Newly required field in the json file with v2
-    json_dict["file_ending"] = ".nii.gz"
-
-    json_dict['training'] = [{'image': str(train_labels[i]).replace("labelsTr", "imagesTr") , "label": train_labels[i] }
-                                 for i in range(len(train_images))]
-    # Note: See https://github.com/MIC-DKFZ/nnUNet/issues/407 for how this should be described
-
-    #Removed because useless in this case
-    json_dict['test'] = [str(test_labels[i]).replace("labelsTs", "imagesTs") for i in range(len(test_images))]
-
-    # create dataset_description.json
-    json_object = json.dumps(json_dict, indent=4)
-    # write to dataset description
-    # nn-unet requires it to be "dataset.json"
-    dataset_dict_name = f"dataset.json"
-    with open(os.path.join(path_out, dataset_dict_name), "w") as outfile:
-        outfile.write(json_object)
\ No newline at end of file
diff --git a/nnunet/crop_sc.py b/nnunet/crop_sc.py
deleted file mode 100644
index 9c876ac..0000000
--- a/nnunet/crop_sc.py
+++ /dev/null
@@ -1,28 +0,0 @@
-"""
-This function is used to crop the images at the same size as the mask given
-
-Args:
-    image: image to crop
-    mask: mask to crop
-    dilate_size: size of the dilation
-    output_path: output folder path
-
-Returns:
-    None
-
-Todo:
-    * 
-
-Pierre-Louis Benveniste
-"""
-
-import os
-
-def crop_to_mask(image, mask, output_path, dilate_size=2):
-    #This function uses sct_crop_image to crop the image to the mask
-    #The mask is dilated to avoid cutting the edges of the mask
-    #The image is cropped to the mask
-
-    os.system(f'sct_crop_image -i {image} -m {mask} -o {output_path} -dilate {dilate_size} ')
-
-    return None
diff --git a/nnunet/seg_sc.py b/nnunet/seg_sc.py
deleted file mode 100644
index f7aa854..0000000
--- a/nnunet/seg_sc.py
+++ /dev/null
@@ -1,26 +0,0 @@
-"""
-This functions is used to segment the spinal cord using the spinal cord toolbox
-
-Args:
-    image: image to segment
-    output_path: output folder path
-    contrast: contrast used for the segmentation
-
-Returns:
-    None
-
-Todo:
-    * 
-
-Pierre-Louis Benveniste
-"""
-
-import os
-
-def segment_sc(image, output_path, contrast='t2'):
-    #This function uses sct_deepseg_sc to segment the spinal cord
-    #The image is segmented and the segmentation is saved in the output folder
-
-    os.system(f'sct_deepseg_sc -i {image} -o {output_path} -c {contrast}')
-
-    return None

From fc331828fc30a054435215665bee7cb248eda2b8 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Benveniste <pierrelouis.benveniste03@gmail.com>
Date: Tue, 25 Jul 2023 18:10:01 -0400
Subject: [PATCH 09/26] created file to compare two time point

---
 lesion_analysis/lesion_seg_analysis.py         | 2 +-
 lesion_analysis/time_point_lesion_evolution.py | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 create mode 100644 lesion_analysis/time_point_lesion_evolution.py

diff --git a/lesion_analysis/lesion_seg_analysis.py b/lesion_analysis/lesion_seg_analysis.py
index eaf8430..cc21366 100644
--- a/lesion_analysis/lesion_seg_analysis.py
+++ b/lesion_analysis/lesion_seg_analysis.py
@@ -5,7 +5,7 @@
 
 Usage:
     python3 lesion_seg_analysis.py -i <input_image> -seg <segmentation> -o <output_folder>
-    python3 lesion_seg_analysis.py -i ./data/rosenberg/nnUNet_raw/Dataset301_canproco/imagesTs/canproco_003_0000.nii.gz -seg ./data/rosenberg/seg_test/canproco_003.nii.gz -o ./data/rosenberg/analysis 
+    python3 canproco/lesion_analysis/lesion_seg_analysis.py -i ./data/lesion_comparison/sub-cal072_ses-M12_STIR.nii.gz -seg ./data/lesion_comparison/canproco_003.nii.gz -o ./data/lesion_comparison/output
 
 Args:
     -i/--input_image: path to the image
diff --git a/lesion_analysis/time_point_lesion_evolution.py b/lesion_analysis/time_point_lesion_evolution.py
new file mode 100644
index 0000000..e69de29

From db106a429ee42e318c612ed131c899e5dc9f45d8 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Benveniste <pierrelouis.benveniste03@gmail.com>
Date: Tue, 25 Jul 2023 18:28:01 -0400
Subject: [PATCH 10/26] modified lesion time point comparison

---
 .../time_point_lesion_evolution.py            | 88 +++++++++++++++++++
 1 file changed, 88 insertions(+)

diff --git a/lesion_analysis/time_point_lesion_evolution.py b/lesion_analysis/time_point_lesion_evolution.py
index e69de29..c83c049 100644
--- a/lesion_analysis/time_point_lesion_evolution.py
+++ b/lesion_analysis/time_point_lesion_evolution.py
@@ -0,0 +1,88 @@
+"""
+This python file is used to analyse the evolution of the lesions over time.
+It first performs a registration of the images and their segmentation on the two time points.
+Then, it performs clustering of the lesions on each image and computes the volume and the center of each lesion. 
+Finally, it compares the lesions between the two time points and computes the evolution of the lesions.
+
+Args:
+    -i-1, --input-first_image: path to the first image 
+    -i-2, --input-second_image: path to the second image
+    -seg-1, --segmentation-first_image: path to the segmentation of the first image
+    -seg-2, --segmentation-second_image: path to the segmentation of the second image
+    -o, --output_folder: path to the output folder
+    --plot: whether to plot the results or not
+
+Returns:
+    None
+
+Example:
+    python time_point_lesion_evolution.py -i-1 path/to/first/image -i-2 path/to/second/image -seg-1 path/to/first/segmentation -seg-2 path/to/second/segmentation -o path/to/output/folder
+
+To do:
+    *
+
+Pierre-Louis Benveniste
+"""
+
+import os
+import argparse
+from pathlib import Path
+import nibabel as nib
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.cluster import DBSCAN
+
+def get_parser():
+    """
+    This function parses the arguments given to the script.
+
+    Args:
+        None
+
+    Returns:
+        parser: parser containing the arguments
+    """
+
+    parser = argparse.ArgumentParser(description='Analyse the results of the segmentation of the MS lesion on the spinal cord.')
+    parser.add_argument('-i-1', '--input-first_image', required=True,
+                        help='Path to the first image')
+    parser.add_argument('-i-2', '--input-second_image', required=True,
+                        help='Path to the second image')
+    parser.add_argument('-seg-1', '--segmentation-first_image', required=True,
+                        help='Path to the segmentation of the first image')
+    parser.add_argument('-seg-2', '--segmentation-second_image', required=True,
+                        help='Path to the segmentation of the second image')
+    parser.add_argument('-o', '--output_folder', required=True,
+                        help='Path to the output folder')
+    parser.add_argument('--plot', action='store_true',
+                        help='Whether to plot the results or not')
+    return parser
+
+def main():
+    """
+    This function is the main function of the script.
+    
+    Args:
+        None
+
+    Returns:
+        None
+    """
+    #get the parser
+    parser = get_parser()
+    args = parser.parse_args()
+
+    #get the images and the segmentations
+    first_image = nib.load(args.input_first_image)
+    second_image = nib.load(args.input_second_image)
+    first_segmentation = nib.load(args.segmentation_first_image)
+    second_segmentation = nib.load(args.segmentation_second_image)
+
+    #first we perform a registration of the images and the segmentations
+    #we use the first image and the first segmentation as the reference
+    
+
+    return None
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file

From 511c168f8bc9f5350f4d0f97bba275dbb8b59778 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Benveniste <pierrelouis.benveniste03@gmail.com>
Date: Wed, 2 Aug 2023 09:47:59 -0400
Subject: [PATCH 11/26] trying to register M0 to M12

---
 .../time_point_lesion_evolution.py            | 29 ++++++++++++-------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/lesion_analysis/time_point_lesion_evolution.py b/lesion_analysis/time_point_lesion_evolution.py
index c83c049..216cc64 100644
--- a/lesion_analysis/time_point_lesion_evolution.py
+++ b/lesion_analysis/time_point_lesion_evolution.py
@@ -7,8 +7,8 @@
 Args:
     -i-1, --input-first_image: path to the first image 
     -i-2, --input-second_image: path to the second image
-    -seg-1, --segmentation-first_image: path to the segmentation of the first image
-    -seg-2, --segmentation-second_image: path to the segmentation of the second image
+    -seg-1, --segmentation-first_image: path to the lesion segmentation of the first image
+    -seg-2, --segmentation-second_image: path to the lesion segmentation of the second image
     -o, --output_folder: path to the output folder
     --plot: whether to plot the results or not
 
@@ -17,6 +17,7 @@
 
 Example:
     python time_point_lesion_evolution.py -i-1 path/to/first/image -i-2 path/to/second/image -seg-1 path/to/first/segmentation -seg-2 path/to/second/segmentation -o path/to/output/folder
+    python3 lesion_analysis/time_point_lesion_evolution.py -i-1 /Users/plbenveniste/Desktop/lesion_comparison_copy/sub-cal072_ses-M0_STIR.nii.gz -i-2 /Users/plbenveniste/Desktop/lesion_comparison_copy/sub-cal072_ses-M12_STIR.nii.gz -seg-1 /Users/plbenveniste/Desktop/lesion_comparison_copy/M0_inference_results.nii.gz -seg-2 /Users/plbenveniste/Desktop/lesion_comparison_copy/M12_inference_results.nii.gz -o /Users/plbenveniste/Desktop/lesion_comparison_copy/output
 
 To do:
     *
@@ -49,13 +50,13 @@ def get_parser():
     parser.add_argument('-i-2', '--input-second_image', required=True,
                         help='Path to the second image')
     parser.add_argument('-seg-1', '--segmentation-first_image', required=True,
-                        help='Path to the segmentation of the first image')
+                        help='Path to the lesion segmentation of the first image')
     parser.add_argument('-seg-2', '--segmentation-second_image', required=True,
-                        help='Path to the segmentation of the second image')
+                        help='Path to the lesion segmentation of the second image')
     parser.add_argument('-o', '--output_folder', required=True,
                         help='Path to the output folder')
-    parser.add_argument('--plot', action='store_true',
-                        help='Whether to plot the results or not')
+    parser.add_argument('--show', action='store_true',
+                        help='Whether to show the results or not')
     return parser
 
 def main():
@@ -75,12 +76,18 @@ def main():
     #get the images and the segmentations
     first_image = nib.load(args.input_first_image)
     second_image = nib.load(args.input_second_image)
-    first_segmentation = nib.load(args.segmentation_first_image)
-    second_segmentation = nib.load(args.segmentation_second_image)
+    first_lesion_segmentation = nib.load(args.segmentation_first_image)
+    second_lesion_segmentation = nib.load(args.segmentation_second_image)
 
-    #first we perform a registration of the images and the segmentations
-    #we use the first image and the first segmentation as the reference
-    
+    #first we segment the spinal cord on the two images using sct_deepseg_sc
+    os.system('sct_deepseg_sc -i ' + args.input_first_image + ' -c t2 -o ' + args.output_folder + '/first_image_sc_segmentation.nii.gz')
+    os.system('sct_deepseg_sc -i ' + args.input_second_image + ' -c t2 -o' + args.output_folder + '/second_image_sc_segmentation.nii.gz')
+
+    #then we register the images and the segmentations with the first image as reference using sct_register_multimodal
+    os.system('sct_register_multimodal -i ' + args.input_second_image + ' -d ' + args.input_first_image + ' -iseg ' + args.output_folder + '/second_image_sc_segmentation.nii.gz' + ' -dseg ' + args.output_folder + '/first_image_sc_segmentation.nii.gz' + ' -o ' + args.output_folder + '/second_image_registered.nii.gz' + ' -owarp ' + args.output_folder +'/warping_M0_to_M12.nii.gz')
+
+    #then we apply the warping field to the segmentation of the lesions on the second images using sct_apply_transfo
+    os.system('sct_apply_transfo -i ' + args.segmentation_second_image + ' -d ' + args.segmentation_first_image + ' -w ' + args.output_folder + '/warping_M0_to_M12.nii.gz' + ' -o ' + args.output_folder + '/second_image_lesion_segmentation_registered.nii.gz' + ' -x linear')
 
     return None
 

From 6e70cb111a9d139dd180e395783772facce4c8f1 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Benveniste <pierrelouis.benveniste03@gmail.com>
Date: Thu, 3 Aug 2023 10:31:50 -0400
Subject: [PATCH 12/26] problem with registration with vert levels

---
 .../time_point_lesion_evolution.py            | 26 ++++++++++++++-----
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/lesion_analysis/time_point_lesion_evolution.py b/lesion_analysis/time_point_lesion_evolution.py
index 216cc64..65c1b23 100644
--- a/lesion_analysis/time_point_lesion_evolution.py
+++ b/lesion_analysis/time_point_lesion_evolution.py
@@ -80,14 +80,28 @@ def main():
     second_lesion_segmentation = nib.load(args.segmentation_second_image)
 
     #first we segment the spinal cord on the two images using sct_deepseg_sc
-    os.system('sct_deepseg_sc -i ' + args.input_first_image + ' -c t2 -o ' + args.output_folder + '/first_image_sc_segmentation.nii.gz')
-    os.system('sct_deepseg_sc -i ' + args.input_second_image + ' -c t2 -o' + args.output_folder + '/second_image_sc_segmentation.nii.gz')
+    #os.system('sct_deepseg_sc -i ' + args.input_first_image + ' -c t2 -o ' + args.output_folder + '/first_image_sc_segmentation.nii.gz')
+    #os.system('sct_deepseg_sc -i ' + args.input_second_image + ' -c t2 -o' + args.output_folder + '/second_image_sc_segmentation.nii.gz')
 
-    #then we register the images and the segmentations with the first image as reference using sct_register_multimodal
-    os.system('sct_register_multimodal -i ' + args.input_second_image + ' -d ' + args.input_first_image + ' -iseg ' + args.output_folder + '/second_image_sc_segmentation.nii.gz' + ' -dseg ' + args.output_folder + '/first_image_sc_segmentation.nii.gz' + ' -o ' + args.output_folder + '/second_image_registered.nii.gz' + ' -owarp ' + args.output_folder +'/warping_M0_to_M12.nii.gz')
+    #then we compute the vertebral levels of the images using sct_label_vertebrae
+    #os.system('sct_label_vertebrae -i ' + args.input_first_image + ' -s ' + args.output_folder + '/first_image_sc_segmentation.nii.gz' + ' -c t2 -qc ' + args.output_folder + '/qc' + ' -ofolder ' + args.output_folder)
+    #os.system('sct_label_vertebrae -i ' + args.input_second_image + ' -s ' + args.output_folder + '/second_image_sc_segmentation.nii.gz' + ' -c t2 -qc ' + args.output_folder + '/qc' + ' -ofolder ' + args.output_folder)
 
-    #then we apply the warping field to the segmentation of the lesions on the second images using sct_apply_transfo
-    os.system('sct_apply_transfo -i ' + args.segmentation_second_image + ' -d ' + args.segmentation_first_image + ' -w ' + args.output_folder + '/warping_M0_to_M12.nii.gz' + ' -o ' + args.output_folder + '/second_image_lesion_segmentation_registered.nii.gz' + ' -x linear')
+    
+    #then we register the images and the segmentations with the first image as reference using sct_register_multimodal using both the spinal cord segmentation and the vertebral levels
+    parameters = 'step=0,type=label,dof=Tx_Ty_Tz:step=1,type=seg,algo=slicereg,poly=5:step=2,type=im,algo=syn,metric=MI,deformation=1x1x1,smooth=3'
+    os.system('sct_register_multimodal -i ' + args.input_second_image + ' -d ' + args.input_first_image + ' -iseg ' + args.output_folder + '/second_image_sc_segmentation.nii.gz'
+                + ' -dseg ' + args.output_folder + '/first_image_sc_segmentation.nii.gz' + ' -ilabel ' + args.output_folder + '/second_image_sc_segmentation_labeled.nii.gz' 
+                + ' -dlabel ' + args.output_folder + '/first_image_sc_segmentation_labeled.nii.gz' + ' -o ' + args.output_folder + '/first_image_registered.nii.gz'
+                + ' -owarp ' + args.output_folder +'/warping_M0_to_M0.nii.gz' + ' -param ' + parameters + ' -x linear -qc ' + args.output_folder + '/qc')
+                
+
+    # parameters = 'step=1,type=seg,algo=slicereg,poly=5:step=2,type=im,algo=syn,metric=MI,deformation=1x1x1,smooth=3'
+    # os.system('sct_register_multimodal -i ' + args.input_second_image + ' -d ' + args.input_first_image + ' -iseg ' + args.output_folder + '/second_image_sc_segmentation.nii.gz' 
+    #            + ' -dseg ' + args.output_folder + '/first_image_sc_segmentation.nii.gz' + ' -o ' + args.output_folder + '/second_image_registered.nii.gz' 
+    #            + ' -owarp ' + args.output_folder +'/warping_M0_to_M12.nii.gz' + ' -param ' + parameters + ' -x linear -qc ' + args.output_folder + '/qc')
+    # #then we apply the warping field to the segmentation of the lesions on the second images using sct_apply_transfo
+    # os.system('sct_apply_transfo -i ' + args.segmentation_second_image + ' -d ' + args.segmentation_first_image + ' -w ' + args.output_folder + '/warping_M0_to_M12.nii.gz' + ' -o ' + args.output_folder + '/second_image_lesion_segmentation_registered.nii.gz' + ' -x linear')
 
     return None
 

From 86a77d9c82939780817fdbd499537fb2dc48ee7a Mon Sep 17 00:00:00 2001
From: Pierre-Louis Benveniste <pierrelouis.benveniste03@gmail.com>
Date: Thu, 3 Aug 2023 18:03:36 -0400
Subject: [PATCH 13/26] registration and lesion matching

---
 .../time_point_lesion_evolution.py            | 171 +++++++++++++++---
 1 file changed, 150 insertions(+), 21 deletions(-)

diff --git a/lesion_analysis/time_point_lesion_evolution.py b/lesion_analysis/time_point_lesion_evolution.py
index 65c1b23..2df0601 100644
--- a/lesion_analysis/time_point_lesion_evolution.py
+++ b/lesion_analysis/time_point_lesion_evolution.py
@@ -17,7 +17,7 @@
 
 Example:
     python time_point_lesion_evolution.py -i-1 path/to/first/image -i-2 path/to/second/image -seg-1 path/to/first/segmentation -seg-2 path/to/second/segmentation -o path/to/output/folder
-    python3 lesion_analysis/time_point_lesion_evolution.py -i-1 /Users/plbenveniste/Desktop/lesion_comparison_copy/sub-cal072_ses-M0_STIR.nii.gz -i-2 /Users/plbenveniste/Desktop/lesion_comparison_copy/sub-cal072_ses-M12_STIR.nii.gz -seg-1 /Users/plbenveniste/Desktop/lesion_comparison_copy/M0_inference_results.nii.gz -seg-2 /Users/plbenveniste/Desktop/lesion_comparison_copy/M12_inference_results.nii.gz -o /Users/plbenveniste/Desktop/lesion_comparison_copy/output
+    python3 lesion_analysis/time_point_lesion_evolution.py -i-1 /Users/plbenveniste/Desktop/lesion_comparison_copy/sub-cal072_ses-M0_STIR.nii.gz -i-2 /Users/plbenveniste/Desktop/lesion_comparison_copy/sub-cal072_ses-M12_STIR.nii.gz -seg-1 /Users/plbenveniste/Desktop/lesion_comparison_copy/sub-cal072_ses-M0_STIR_lesion-manual.nii.gz -seg-2 /Users/plbenveniste/Desktop/lesion_comparison_copy/M12_inference_results.nii.gz -o /Users/plbenveniste/Desktop/lesion_comparison_copy/output
 
 To do:
     *
@@ -32,6 +32,8 @@
 import numpy as np
 import matplotlib.pyplot as plt
 from sklearn.cluster import DBSCAN
+import networkx as nx
+from scipy.optimize import linear_sum_assignment
 
 def get_parser():
     """
@@ -79,31 +81,158 @@ def main():
     first_lesion_segmentation = nib.load(args.segmentation_first_image)
     second_lesion_segmentation = nib.load(args.segmentation_second_image)
 
+    """
     #first we segment the spinal cord on the two images using sct_deepseg_sc
-    #os.system('sct_deepseg_sc -i ' + args.input_first_image + ' -c t2 -o ' + args.output_folder + '/first_image_sc_segmentation.nii.gz')
-    #os.system('sct_deepseg_sc -i ' + args.input_second_image + ' -c t2 -o' + args.output_folder + '/second_image_sc_segmentation.nii.gz')
+    os.system('sct_deepseg_sc -i ' + args.input_first_image + ' -c t2 -o ' + args.output_folder + '/first_image_sc_segmentation.nii.gz')
+    os.system('sct_deepseg_sc -i ' + args.input_second_image + ' -c t2 -o' + args.output_folder + '/second_image_sc_segmentation.nii.gz')
 
     #then we compute the vertebral levels of the images using sct_label_vertebrae
-    #os.system('sct_label_vertebrae -i ' + args.input_first_image + ' -s ' + args.output_folder + '/first_image_sc_segmentation.nii.gz' + ' -c t2 -qc ' + args.output_folder + '/qc' + ' -ofolder ' + args.output_folder)
-    #os.system('sct_label_vertebrae -i ' + args.input_second_image + ' -s ' + args.output_folder + '/second_image_sc_segmentation.nii.gz' + ' -c t2 -qc ' + args.output_folder + '/qc' + ' -ofolder ' + args.output_folder)
-
+    os.system('sct_label_vertebrae -i ' + args.input_first_image + ' -s ' + args.output_folder + '/first_image_sc_segmentation.nii.gz' + ' -c t2 -ofolder ' + args.output_folder)
+    os.system('sct_label_vertebrae -i ' + args.input_second_image + ' -s ' + args.output_folder + '/second_image_sc_segmentation.nii.gz' + ' -c t2 -ofolder ' + args.output_folder)
+
+    first_label = nib.load(args.output_folder + '/first_image_sc_segmentation_labeled.nii.gz')
+    second_label = nib.load(args.output_folder + '/second_image_sc_segmentation_labeled.nii.gz')
+    first_label_data = first_label.get_fdata()
+    second_label_data = second_label.get_fdata()
+    common_labels = np.intersect1d(np.unique(first_label_data), np.unique(second_label_data))
+
+    #we remove labels that are not in common in both images
+    for label in np.unique(first_label_data):
+        if label not in common_labels:
+            first_label_data[first_label_data == label] = 0
+    #we save the new segmentation
+    first_label = nib.Nifti1Image(first_label_data, first_label.affine, first_label.header)
+    nib.save(first_label, args.output_folder + '/first_image_sc_segmentation_labeled.nii.gz')
+    #same for the second image
+    for label in np.unique(second_label_data):
+        if label not in common_labels:
+            second_label_data[second_label_data == label] = 0
+    second_label = nib.Nifti1Image(second_label_data, second_label.affine, second_label.header)
+    nib.save(second_label, args.output_folder + '/second_image_sc_segmentation_labeled.nii.gz')
     
     #then we register the images and the segmentations with the first image as reference using sct_register_multimodal using both the spinal cord segmentation and the vertebral levels
-    parameters = 'step=0,type=label,dof=Tx_Ty_Tz:step=1,type=seg,algo=slicereg,poly=5:step=2,type=im,algo=syn,metric=MI,deformation=1x1x1,smooth=3'
-    os.system('sct_register_multimodal -i ' + args.input_second_image + ' -d ' + args.input_first_image + ' -iseg ' + args.output_folder + '/second_image_sc_segmentation.nii.gz'
-                + ' -dseg ' + args.output_folder + '/first_image_sc_segmentation.nii.gz' + ' -ilabel ' + args.output_folder + '/second_image_sc_segmentation_labeled.nii.gz' 
-                + ' -dlabel ' + args.output_folder + '/first_image_sc_segmentation_labeled.nii.gz' + ' -o ' + args.output_folder + '/first_image_registered.nii.gz'
-                + ' -owarp ' + args.output_folder +'/warping_M0_to_M0.nii.gz' + ' -param ' + parameters + ' -x linear -qc ' + args.output_folder + '/qc')
-                
-
-    # parameters = 'step=1,type=seg,algo=slicereg,poly=5:step=2,type=im,algo=syn,metric=MI,deformation=1x1x1,smooth=3'
-    # os.system('sct_register_multimodal -i ' + args.input_second_image + ' -d ' + args.input_first_image + ' -iseg ' + args.output_folder + '/second_image_sc_segmentation.nii.gz' 
-    #            + ' -dseg ' + args.output_folder + '/first_image_sc_segmentation.nii.gz' + ' -o ' + args.output_folder + '/second_image_registered.nii.gz' 
-    #            + ' -owarp ' + args.output_folder +'/warping_M0_to_M12.nii.gz' + ' -param ' + parameters + ' -x linear -qc ' + args.output_folder + '/qc')
-    # #then we apply the warping field to the segmentation of the lesions on the second images using sct_apply_transfo
-    # os.system('sct_apply_transfo -i ' + args.segmentation_second_image + ' -d ' + args.segmentation_first_image + ' -w ' + args.output_folder + '/warping_M0_to_M12.nii.gz' + ' -o ' + args.output_folder + '/second_image_lesion_segmentation_registered.nii.gz' + ' -x linear')
-
-    return None
+    parameters = 'step=0,type=label,dof=Tx_Ty_Tz:step=1,type=im,algo=dl'
+    os.system('sct_register_multimodal -i ' + args.input_second_image + ' -d ' + args.input_first_image + ' -ilabel ' + args.output_folder + '/second_image_sc_segmentation_labeled.nii.gz' 
+                + ' -dlabel ' + args.output_folder + '/first_image_sc_segmentation_labeled.nii.gz' + ' -ofolder ' + args.output_folder + ' -owarp ' + args.output_folder + '/warp_M12_to_M0.nii.gz'
+                 + ' -owarpinv ' + args.output_folder + '/warp_M0_to_M12.nii.gz' + ' -param ' + parameters + ' -x linear ')
+
+    #we apply the warping to the segmentation of the second image
+    os.system('sct_apply_transfo -i ' + args.segmentation_second_image + ' -d ' + args.segmentation_first_image + ' -w ' + args.output_folder + '/warp_M12_to_M0.nii.gz' + ' -o ' + args.output_folder + '/second_lesion_seg_registered.nii.gz' + ' -x linear')
+    """
+
+    #we first look at the values in the reg lesion  segmentation
+    second_lesion_seg = nib.load(args.output_folder + '/second_lesion_seg_registered.nii.gz')
+    second_lesion_seg_data = second_lesion_seg.get_fdata()
+    #we replace all values above a threshold by 1
+    second_lesion_seg_data[second_lesion_seg_data > 1e-5] = 1
+
+    #we then load data from the first lesion segmentation
+    first_lesion_seg = nib.load(args.segmentation_first_image)
+    first_lesion_seg_data = first_lesion_seg.get_fdata()
+
+    # we then perform clustering on the two images
+    # we first get the coordinates of the lesions
+    first_lesion_seg_coordinates = np.argwhere(first_lesion_seg_data == 1)
+    second_lesion_seg_coordinates = np.argwhere(second_lesion_seg_data == 1)
+    # we then perform clustering on the coordinates
+    clustering = DBSCAN(eps=10, min_samples=5).fit(first_lesion_seg_coordinates)
+    first_lesion_seg_labels = clustering.labels_
+    clustering = DBSCAN(eps=10, min_samples=5).fit(second_lesion_seg_coordinates)
+    second_lesion_seg_labels = clustering.labels_
+
+    #output the nifti images of the lesions where each lesion has a different value
+    first_lesion_seg_data2 = np.zeros(first_lesion_seg_data.shape)
+    second_lesion_seg_data2 = np.zeros(second_lesion_seg_data.shape)
+    for i,voxel in enumerate(first_lesion_seg_coordinates):
+        first_lesion_seg_data2[voxel[0], voxel[1], voxel[2]] = first_lesion_seg_labels[i]+1
+    for i,voxel in enumerate(second_lesion_seg_coordinates):
+        second_lesion_seg_data2[voxel[0], voxel[1], voxel[2]] = second_lesion_seg_labels[i]+1
+    first_lesion_seg2 = nib.Nifti1Image(first_lesion_seg_data2, first_lesion_seg.affine, first_lesion_seg.header)
+    second_lesion_seg2 = nib.Nifti1Image(second_lesion_seg_data2, second_lesion_seg.affine, second_lesion_seg.header)
+    nib.save(first_lesion_seg2, args.output_folder + '/first_lesion_seg_clustered.nii.gz')
+    nib.save(second_lesion_seg2, args.output_folder + '/second_lesion_seg_clustered.nii.gz')
+
+    #for each lesion of the second time point we consider that it corresponds to the lesion of the first time point with which it overlaps more than 50%
+    matching_table = np.empty(len(np.unique(second_lesion_seg_data2))-1)
+    for lesion_2nd in np.unique(second_lesion_seg_data2):
+        if lesion_2nd != 0:
+            for lesion_1st in np.unique(first_lesion_seg_data2):
+                if lesion_1st !=0:
+                    #we get the coordinates of the lesion
+                    second_lesion_seg_coordinates2 = np.argwhere(second_lesion_seg_data2 == lesion_2nd)
+                    #we get the coordinates of the lesion in the first time point
+                    first_lesion_seg_coordinates = np.argwhere(first_lesion_seg_data2 == lesion_1st)
+                    #we compute the intersection of the two sets of coordinates
+                    second_set = set( tuple(points) for points in second_lesion_seg_coordinates2)
+                    first_set = set(tuple(points) for points in  first_lesion_seg_coordinates)
+                    intersection = len(second_set.intersection(first_set))
+                    ratio = intersection/len(first_set)
+                    if ratio > 0.5:
+                        matching_table[int(lesion_2nd-1)] = int(lesion_1st)
+                    else:
+                        matching_table[int(lesion_2nd-1)] = None
+
+    #we then compute the center of each lesion
+    center_2nd = []
+    for lesion_2nd in np.unique(second_lesion_seg_data2):
+        if lesion_2nd != 0:
+            second_lesion_seg_coordinates2 = np.argwhere(second_lesion_seg_data2 == lesion_2nd)
+            center_2nd.append(np.mean(second_lesion_seg_coordinates2, axis=0))
+    center_2nd = np.array(center_2nd)
+    center_1st = []
+    for lesion_1st in np.unique(first_lesion_seg_data2):
+        if lesion_1st != 0:
+            first_lesion_seg_coordinates = np.argwhere(first_lesion_seg_data2 == lesion_1st)
+            center_1st.append(np.mean(first_lesion_seg_coordinates, axis=0))
+    center_1st = np.array(center_1st)
+ 
+    #we then perfrom lesion clustering on the original seg file from the second time point
+    real_second_lesion_seg = nib.load(args.segmentation_second_image)
+    real_second_lesion_seg_data = real_second_lesion_seg.get_fdata()
+    real_second_lesion_seg_coordinates = np.argwhere(real_second_lesion_seg_data == 1)
+    clustering = DBSCAN(eps=10, min_samples=5).fit(real_second_lesion_seg_coordinates)
+    real_second_lesion_seg_labels = clustering.labels_
+    #for these we conpute the center
+    real_second_lesion_seg_data2 = np.zeros(real_second_lesion_seg_data.shape)
+    for i,voxel in enumerate(real_second_lesion_seg_coordinates):
+        real_second_lesion_seg_data2[voxel[0], voxel[1], voxel[2]] = real_second_lesion_seg_labels[i]+1
+    center_real_2nd = []
+    for lesion_2nd in np.unique(real_second_lesion_seg_data2):
+        if lesion_2nd != 0:
+            real_second_lesion_seg_coordinates2 = np.argwhere(real_second_lesion_seg_data2 == lesion_2nd)
+            center_real_2nd.append(np.mean(real_second_lesion_seg_coordinates2, axis=0))
+    center_real_2nd = np.array(center_real_2nd)
+    
+    #we then perform matching between lesions of the second time point warpped and real lesions of the secont time point
+    matching_table2 = np.empty(len(np.unique(real_second_lesion_seg_data2))-1)
+
+    # Function to compute distance between two lesions (you can customize this based on your features)
+    def lesion_distance(lesion1, lesion2):
+        return np.sqrt((lesion1[0] - lesion2[0])**2 + (lesion1[1] - lesion2[1])**2 + (lesion1[2] - lesion2[2])**2)
+
+    # Create a graph for each time point
+    graph_warp = nx.Graph()
+    graph_t2 = nx.Graph()
+
+    # Add nodes (lesions) to each graph
+    graph_warp.add_nodes_from(range(len(center_2nd)))
+    graph_t2.add_nodes_from(range(len(center_real_2nd)))
+
+    # Add edges between nodes with similarity based on lesion distance
+    for i, lesion1 in enumerate(center_2nd):
+        for j, lesion2 in enumerate(center_real_2nd):
+            distance = lesion_distance(lesion1, lesion2)
+            graph_warp.add_edge(i, j, weight=distance)
+            graph_t2.add_edge(j, i, weight=distance)  # Adding bidirectional edges
+
+    # Use Hungarian algorithm for graph matching
+    cost_matrix = np.array(nx.to_numpy_array(graph_warp))
+    row_ind, col_ind = linear_sum_assignment(cost_matrix)
+
+    # Create temporal links based on matching results
+    warping_links = [(i, j) for i, j in zip(range(len(center_2nd)), col_ind) if cost_matrix[i, j] < np.inf]
+
+    print("Temporal Links:",warping_links)
 
 if __name__ == '__main__':
     main()
\ No newline at end of file

From 51374e17aa9c63cf977de8d62c3c4443bde197fa Mon Sep 17 00:00:00 2001
From: Pierre-Louis Benveniste <pierrelouis.benveniste03@gmail.com>
Date: Fri, 4 Aug 2023 17:44:39 -0400
Subject: [PATCH 14/26] need to fix the identification of lesions accross files

---
 .../time_point_lesion_evolution.py            | 351 +++++++++++++-----
 1 file changed, 254 insertions(+), 97 deletions(-)

diff --git a/lesion_analysis/time_point_lesion_evolution.py b/lesion_analysis/time_point_lesion_evolution.py
index 2df0601..53fe7da 100644
--- a/lesion_analysis/time_point_lesion_evolution.py
+++ b/lesion_analysis/time_point_lesion_evolution.py
@@ -61,178 +61,335 @@ def get_parser():
                         help='Whether to show the results or not')
     return parser
 
-def main():
+
+def perform_registration(path_first_image, path_second_image, path_first_lesion_seg, path_second_lesion_seg, path_output_folder):
     """
-    This function is the main function of the script.
-    
-    Args:
-        None
+    This function performs the registration of the images and the segmentations using spinal cord toolbox
 
+    Args:
+        path_first_image: path to the first image
+        path_second_image: path to the second image
+        path_first_lesion_seg: path to the lesion segmentation of the first image
+        path_second_lesion_seg: path to the lesion segmentation of the second image
+        path_output_folder: path to the output folder
+    
     Returns:
-        None
+        path_warp: path to the warp file
+        path_warpinv: path to the inverse warp file
+        path_second_image_registered: path to the second image registered
+        path_second_lesion_seg_registered: path to the second lesion segmentation registered
     """
-    #get the parser
-    parser = get_parser()
-    args = parser.parse_args()
 
-    #get the images and the segmentations
-    first_image = nib.load(args.input_first_image)
-    second_image = nib.load(args.input_second_image)
-    first_lesion_segmentation = nib.load(args.segmentation_first_image)
-    second_lesion_segmentation = nib.load(args.segmentation_second_image)
-
-    """
     #first we segment the spinal cord on the two images using sct_deepseg_sc
-    os.system('sct_deepseg_sc -i ' + args.input_first_image + ' -c t2 -o ' + args.output_folder + '/first_image_sc_segmentation.nii.gz')
-    os.system('sct_deepseg_sc -i ' + args.input_second_image + ' -c t2 -o' + args.output_folder + '/second_image_sc_segmentation.nii.gz')
+    os.system('sct_deepseg_sc -i ' + path_first_image + ' -c t2 -o ' + path_output_folder + '/first_image_sc_seg.nii.gz')
+    os.system('sct_deepseg_sc -i ' + path_second_image + ' -c t2 -o' + path_output_folder + '/second_image_sc_seg.nii.gz')
 
     #then we compute the vertebral levels of the images using sct_label_vertebrae
-    os.system('sct_label_vertebrae -i ' + args.input_first_image + ' -s ' + args.output_folder + '/first_image_sc_segmentation.nii.gz' + ' -c t2 -ofolder ' + args.output_folder)
-    os.system('sct_label_vertebrae -i ' + args.input_second_image + ' -s ' + args.output_folder + '/second_image_sc_segmentation.nii.gz' + ' -c t2 -ofolder ' + args.output_folder)
+    os.system('sct_label_vertebrae -i ' + path_first_image + ' -s ' + path_output_folder + '/first_image_sc_seg.nii.gz' + ' -c t2 -ofolder ' + path_output_folder)
+    os.system('sct_label_vertebrae -i ' + path_second_image + ' -s ' + path_output_folder + '/second_image_sc_seg.nii.gz' + ' -c t2 -ofolder ' + path_output_folder)
 
-    first_label = nib.load(args.output_folder + '/first_image_sc_segmentation_labeled.nii.gz')
-    second_label = nib.load(args.output_folder + '/second_image_sc_segmentation_labeled.nii.gz')
+    #we remove labels that are not in common in both images
+    ##we first get the labels of the two images
+    first_label = nib.load(path_output_folder + '/first_image_sc_seg_labeled.nii.gz')
+    second_label = nib.load(path_output_folder + '/second_image_sc_seg_labeled.nii.gz')
     first_label_data = first_label.get_fdata()
     second_label_data = second_label.get_fdata()
     common_labels = np.intersect1d(np.unique(first_label_data), np.unique(second_label_data))
 
-    #we remove labels that are not in common in both images
+    #we then remove the labels that are not in common
     for label in np.unique(first_label_data):
         if label not in common_labels:
             first_label_data[first_label_data == label] = 0
-    #we save the new segmentation
+    #we overwrite the segmentation to keep only common labels
     first_label = nib.Nifti1Image(first_label_data, first_label.affine, first_label.header)
-    nib.save(first_label, args.output_folder + '/first_image_sc_segmentation_labeled.nii.gz')
+    nib.save(first_label, path_output_folder + '/first_image_sc_seg_labeled.nii.gz')
     #same for the second image
     for label in np.unique(second_label_data):
         if label not in common_labels:
             second_label_data[second_label_data == label] = 0
     second_label = nib.Nifti1Image(second_label_data, second_label.affine, second_label.header)
-    nib.save(second_label, args.output_folder + '/second_image_sc_segmentation_labeled.nii.gz')
+    nib.save(second_label, path_output_folder + '/second_image_sc_seg_labeled.nii.gz')
     
     #then we register the images and the segmentations with the first image as reference using sct_register_multimodal using both the spinal cord segmentation and the vertebral levels
     parameters = 'step=0,type=label,dof=Tx_Ty_Tz:step=1,type=im,algo=dl'
-    os.system('sct_register_multimodal -i ' + args.input_second_image + ' -d ' + args.input_first_image + ' -ilabel ' + args.output_folder + '/second_image_sc_segmentation_labeled.nii.gz' 
-                + ' -dlabel ' + args.output_folder + '/first_image_sc_segmentation_labeled.nii.gz' + ' -ofolder ' + args.output_folder + ' -owarp ' + args.output_folder + '/warp_M12_to_M0.nii.gz'
-                 + ' -owarpinv ' + args.output_folder + '/warp_M0_to_M12.nii.gz' + ' -param ' + parameters + ' -x linear ')
+    os.system('sct_register_multimodal -i ' + path_second_image + ' -d ' + path_first_image + ' -ilabel ' + path_output_folder + '/second_image_sc_seg_labeled.nii.gz' 
+                + ' -dlabel ' + path_output_folder + '/first_image_sc_seg_labeled.nii.gz' + ' -ofolder ' + path_output_folder + ' -owarp ' + path_output_folder + '/warp_2nd_to_1st.nii.gz'
+                 + ' -owarpinv ' + path_output_folder + '/warp_1st_to_2nd.nii.gz' + ' -param ' + parameters + ' -x linear ')
 
     #we apply the warping to the segmentation of the second image
-    os.system('sct_apply_transfo -i ' + args.segmentation_second_image + ' -d ' + args.segmentation_first_image + ' -w ' + args.output_folder + '/warp_M12_to_M0.nii.gz' + ' -o ' + args.output_folder + '/second_lesion_seg_registered.nii.gz' + ' -x linear')
+    os.system('sct_apply_transfo -i ' + path_second_lesion_seg + ' -d ' + path_first_lesion_seg + ' -w ' + path_output_folder + '/warp_2nd_to_1st.nii.gz' 
+              + ' -o ' + path_output_folder + '/second_lesion_seg_registered.nii.gz' + ' -x linear')
+
+    return path_output_folder + '/warp_2nd_to_1st.nii.gz', path_output_folder + '/warp_1st_to_2nd.nii.gz', path_output_folder + '/second_image_sc_seg_registered.nii.gz',path_output_folder + '/second_lesion_seg_registered.nii.gz'
+
+
+def temporal_lesion_matching_on_reg_image(path_first_lesion_seg, path_second_lesion_seg_reg):
+    """
+    This function performs temporal lesion matching using the 1st time point lesion segmentation and the 2nd time point lesion segmentation registered to the 1st time point.
+
+    Args:
+        path_first_lesion_seg: path to the lesion segmentation of the first image
+        path_second_lesion_seg_reg: path to the lesion segmentation of the second image registered to the first image
+
+    Returns:
+        matching_table: table containing the matching between lesions of the first time point and lesions of the second time point
+        center_1st: coordinates of the center of the lesions of the first time point
+        center_2nd: coordinates of the center of the lesions of the second time point
+        1st_data_labeled: segmentation of the first time point with each lesion in a different color
+        2nd_data_labeled: segmentation of the second time point with each lesion in a different color
     """
 
-    #we first look at the values in the reg lesion  segmentation
-    second_lesion_seg = nib.load(args.output_folder + '/second_lesion_seg_registered.nii.gz')
-    second_lesion_seg_data = second_lesion_seg.get_fdata()
-    #we replace all values above a threshold by 1
-    second_lesion_seg_data[second_lesion_seg_data > 1e-5] = 1
+    #because registration modifies the values of the segmentation, we first replace all values above a threshold by 1
+    second_lesion_seg_reg = nib.load(path_second_lesion_seg_reg)
+    second_lesion_seg_reg_data = second_lesion_seg_reg.get_fdata()
+    second_lesion_seg_reg_data[second_lesion_seg_reg_data > 1e-5] = 1
 
     #we then load data from the first lesion segmentation
-    first_lesion_seg = nib.load(args.segmentation_first_image)
+    first_lesion_seg = nib.load(path_first_lesion_seg)
     first_lesion_seg_data = first_lesion_seg.get_fdata()
 
     # we then perform clustering on the two images
     # we first get the coordinates of the lesions
     first_lesion_seg_coordinates = np.argwhere(first_lesion_seg_data == 1)
-    second_lesion_seg_coordinates = np.argwhere(second_lesion_seg_data == 1)
+    second_lesion_seg_reg_coordinates = np.argwhere(second_lesion_seg_reg_data == 1)
     # we then perform clustering on the coordinates
-    clustering = DBSCAN(eps=10, min_samples=5).fit(first_lesion_seg_coordinates)
-    first_lesion_seg_labels = clustering.labels_
-    clustering = DBSCAN(eps=10, min_samples=5).fit(second_lesion_seg_coordinates)
-    second_lesion_seg_labels = clustering.labels_
+    clustering_1st = DBSCAN(eps=10, min_samples=5).fit(first_lesion_seg_coordinates)
+    first_lesion_seg_labels = clustering_1st.labels_
+    clustering_2nd = DBSCAN(eps=10, min_samples=5).fit(second_lesion_seg_reg_coordinates)
+    second_lesion_seg_reg_labels = clustering_2nd.labels_
 
-    #output the nifti images of the lesions where each lesion has a different value
+    #create the same tables with the lesions in different colors (voxel value = lesion ID)
     first_lesion_seg_data2 = np.zeros(first_lesion_seg_data.shape)
-    second_lesion_seg_data2 = np.zeros(second_lesion_seg_data.shape)
+    second_lesion_seg_reg_data2 = np.zeros(second_lesion_seg_reg_data.shape)
     for i,voxel in enumerate(first_lesion_seg_coordinates):
         first_lesion_seg_data2[voxel[0], voxel[1], voxel[2]] = first_lesion_seg_labels[i]+1
-    for i,voxel in enumerate(second_lesion_seg_coordinates):
-        second_lesion_seg_data2[voxel[0], voxel[1], voxel[2]] = second_lesion_seg_labels[i]+1
-    first_lesion_seg2 = nib.Nifti1Image(first_lesion_seg_data2, first_lesion_seg.affine, first_lesion_seg.header)
-    second_lesion_seg2 = nib.Nifti1Image(second_lesion_seg_data2, second_lesion_seg.affine, second_lesion_seg.header)
-    nib.save(first_lesion_seg2, args.output_folder + '/first_lesion_seg_clustered.nii.gz')
-    nib.save(second_lesion_seg2, args.output_folder + '/second_lesion_seg_clustered.nii.gz')
+    for i,voxel in enumerate(second_lesion_seg_reg_coordinates):
+        second_lesion_seg_reg_data2[voxel[0], voxel[1], voxel[2]] = second_lesion_seg_reg_labels[i]+1
 
     #for each lesion of the second time point we consider that it corresponds to the lesion of the first time point with which it overlaps more than 50%
-    matching_table = np.empty(len(np.unique(second_lesion_seg_data2))-1)
-    for lesion_2nd in np.unique(second_lesion_seg_data2):
+    matching_table = {}
+    for lesion_2nd in np.unique(second_lesion_seg_reg_data2):
         if lesion_2nd != 0:
             for lesion_1st in np.unique(first_lesion_seg_data2):
                 if lesion_1st !=0:
                     #we get the coordinates of the lesion
-                    second_lesion_seg_coordinates2 = np.argwhere(second_lesion_seg_data2 == lesion_2nd)
+                    second_lesion_seg_coordinates2 = np.argwhere(second_lesion_seg_reg_data2 == lesion_2nd)
                     #we get the coordinates of the lesion in the first time point
                     first_lesion_seg_coordinates = np.argwhere(first_lesion_seg_data2 == lesion_1st)
-                    #we compute the intersection of the two sets of coordinates
+                    #we compute the overlap of the two sets of coordinates
                     second_set = set( tuple(points) for points in second_lesion_seg_coordinates2)
                     first_set = set(tuple(points) for points in  first_lesion_seg_coordinates)
                     intersection = len(second_set.intersection(first_set))
                     ratio = intersection/len(first_set)
+                    matching_table[int(lesion_2nd)] = {}
                     if ratio > 0.5:
-                        matching_table[int(lesion_2nd-1)] = int(lesion_1st)
+                        matching_table[int(lesion_2nd)]["Lesion_1st_timepoint"] = int(lesion_1st)
+                        matching_table[int(lesion_2nd)]["Overlapping_ratio"] = round(ratio,2)
                     else:
-                        matching_table[int(lesion_2nd-1)] = None
+                        matching_table[int(lesion_2nd)]["Lesion_1st_timepoint"] = None
+                        matching_table[int(lesion_2nd)]["Overlapping_ratio"] = None
 
     #we then compute the center of each lesion
-    center_2nd = []
-    for lesion_2nd in np.unique(second_lesion_seg_data2):
+    center_2nd = {}
+    for lesion_2nd in np.unique(second_lesion_seg_reg_data2):
         if lesion_2nd != 0:
-            second_lesion_seg_coordinates2 = np.argwhere(second_lesion_seg_data2 == lesion_2nd)
-            center_2nd.append(np.mean(second_lesion_seg_coordinates2, axis=0))
-    center_2nd = np.array(center_2nd)
-    center_1st = []
+            second_lesion_seg_reg_coordinates2 = np.argwhere(second_lesion_seg_reg_data2 == lesion_2nd)
+            center_2nd[int(lesion_2nd)] = np.mean(second_lesion_seg_reg_coordinates2, axis=0)
+    center_1st = {}
     for lesion_1st in np.unique(first_lesion_seg_data2):
         if lesion_1st != 0:
             first_lesion_seg_coordinates = np.argwhere(first_lesion_seg_data2 == lesion_1st)
-            center_1st.append(np.mean(first_lesion_seg_coordinates, axis=0))
-    center_1st = np.array(center_1st)
- 
-    #we then perfrom lesion clustering on the original seg file from the second time point
-    real_second_lesion_seg = nib.load(args.segmentation_second_image)
-    real_second_lesion_seg_data = real_second_lesion_seg.get_fdata()
-    real_second_lesion_seg_coordinates = np.argwhere(real_second_lesion_seg_data == 1)
-    clustering = DBSCAN(eps=10, min_samples=5).fit(real_second_lesion_seg_coordinates)
-    real_second_lesion_seg_labels = clustering.labels_
-    #for these we conpute the center
-    real_second_lesion_seg_data2 = np.zeros(real_second_lesion_seg_data.shape)
-    for i,voxel in enumerate(real_second_lesion_seg_coordinates):
-        real_second_lesion_seg_data2[voxel[0], voxel[1], voxel[2]] = real_second_lesion_seg_labels[i]+1
-    center_real_2nd = []
-    for lesion_2nd in np.unique(real_second_lesion_seg_data2):
-        if lesion_2nd != 0:
-            real_second_lesion_seg_coordinates2 = np.argwhere(real_second_lesion_seg_data2 == lesion_2nd)
-            center_real_2nd.append(np.mean(real_second_lesion_seg_coordinates2, axis=0))
-    center_real_2nd = np.array(center_real_2nd)
+            center_1st[int(lesion_1st)] = np.mean(first_lesion_seg_coordinates, axis=0)
+
+    #we first overwrite the segmentation files with the lesions in different colors
+    final_first_lesion_seg = np.zeros(first_lesion_seg_data2.shape)
+    final_second_lesion_seg_reg = np.zeros(second_lesion_seg_reg_data2.shape)
+    for i,voxel in enumerate(first_lesion_seg_coordinates):
+        final_first_lesion_seg[voxel[0], voxel[1], voxel[2]] = matching_table[first_lesion_seg_data2[voxel[0], voxel[1], voxel[2]]]["Lesion_1st_timepoint"]
+
+    for i,lesion in enumerate(matching_table):
+        print(i)
+        print(lesion)
+        print(matching_table[lesion])
+        first_lesion_seg_data2.replace(matching_table[lesion]["Lesion_1st_timepoint"], lesion)
+    #print(first_lesion_seg_data2)
+
+
+    #first_lesion_seg2 = nib.Nifti1Image(first_lesion_seg_data2, first_lesion_seg.affine, first_lesion_seg.header)
+    #second_lesion_seg2 = nib.Nifti1Image(second_lesion_seg_data2, second_lesion_seg.affine, second_lesion_seg.header)
+    #nib.save(first_lesion_seg2, args.output_folder + '/first_lesion_seg_clustered.nii.gz')
+    #nib.save(second_lesion_seg2, args.output_folder + '/second_lesion_seg_clustered.nii.gz')
+
+    return matching_table, center_1st, center_2nd, first_lesion_seg_data2, second_lesion_seg_reg_data2
+
+
+def lesion_distance(lesion1, lesion2, volume1, volume2):
+    """
+    This function computes the distance betweeen two lesions for the graph matching.
+
+    Args:
+        lesion1: coordinates of the first lesion
+        lesion2: coordinates of the second lesion
     
-    #we then perform matching between lesions of the second time point warpped and real lesions of the secont time point
-    matching_table2 = np.empty(len(np.unique(real_second_lesion_seg_data2))-1)
+    Returns:
+        distance: distance between the two lesions
+    """
+    pos_distance = np.sqrt((lesion1[0] - lesion2[0])**2 + (lesion1[1] - lesion2[1])**2 + (lesion1[2] - lesion2[2])**2)
+    vol_distance = np.abs(volume1 - volume2)
+
+    return  pos_distance + vol_distance
+
+
+def volume_lesions(labeled_data, voxel_volume):
+    """
+    This function computes the volume of the lesions.
+
+    Args:
+        labeled_data: data with each lesion in a different color
+        voxel_volume: volume of a voxel
+    
+    Returns:
+        volumes: volume of the lesions
+    """
+
+    volumes = []
+    for lesion in np.unique(labeled_data):
+        if lesion != 0:
+            volume = len(np.argwhere(labeled_data == lesion))*voxel_volume
+            volumes.append((lesion,volume))
+    return np.array(volumes)  
+
 
-    # Function to compute distance between two lesions (you can customize this based on your features)
-    def lesion_distance(lesion1, lesion2):
-        return np.sqrt((lesion1[0] - lesion2[0])**2 + (lesion1[1] - lesion2[1])**2 + (lesion1[2] - lesion2[2])**2)
+def graph_matching(center_2nd_orig, center_2nd_reg, second_orig_volumes, second_reg_volumes):
+    """
+    This function performs graph matching between the lesions of the second time point registered and the lesions of the second time point.
+    The node distances take into account the distance between the lesions and the volume of the lesions.
+
+    Args:
+        center_2nd: coordinates of the center of the lesions of the second time point
+        center_2nd_reg: coordinates of the center of the lesions of the second time point registered
+        second_orig_volumes: volume of the lesions of the second time point
+        second_reg_volumes: volume of the lesions of the second time point registered
+    
+    Returns:
+        matching_table: table containing the matching between lesions of the second time point and lesions of the second time point registered
+    """
 
     # Create a graph for each time point
-    graph_warp = nx.Graph()
-    graph_t2 = nx.Graph()
+    graph_t2_orig = nx.Graph()
+    graph_t2_reg = nx.Graph()
 
     # Add nodes (lesions) to each graph
-    graph_warp.add_nodes_from(range(len(center_2nd)))
-    graph_t2.add_nodes_from(range(len(center_real_2nd)))
+    graph_t2_orig.add_nodes_from(range(len(center_2nd_orig)))
+    graph_t2_reg.add_nodes_from(range(len(center_2nd_reg)))
 
     # Add edges between nodes with similarity based on lesion distance
-    for i, lesion1 in enumerate(center_2nd):
-        for j, lesion2 in enumerate(center_real_2nd):
-            distance = lesion_distance(lesion1, lesion2)
-            graph_warp.add_edge(i, j, weight=distance)
-            graph_t2.add_edge(j, i, weight=distance)  # Adding bidirectional edges
+    for i, lesion_reg in enumerate(center_2nd_reg):
+        for j, lesion_orig in enumerate(center_2nd_orig):
+            distance = lesion_distance(center_2nd_reg[lesion_reg], center_2nd_orig[lesion_orig], second_reg_volumes[i][1], second_orig_volumes[j][1])
+            graph_t2_reg.add_edge(i, j, weight=distance)
+            graph_t2_orig.add_edge(j, i, weight=distance)  # Adding bidirectional edges
 
     # Use Hungarian algorithm for graph matching
-    cost_matrix = np.array(nx.to_numpy_array(graph_warp))
+    cost_matrix = np.array(nx.to_numpy_array(graph_t2_reg))
     row_ind, col_ind = linear_sum_assignment(cost_matrix)
 
     # Create temporal links based on matching results
-    warping_links = [(i, j) for i, j in zip(range(len(center_2nd)), col_ind) if cost_matrix[i, j] < np.inf]
+    matching_reg_to_orig = [(i, j) for i, j in zip(range(len(center_2nd_reg)), col_ind) if cost_matrix[i, j] < np.inf]
+    
+    return matching_reg_to_orig
+
+  
+
+def main():
+    """
+    This function is the main function of the script.
+    
+    Args:
+        None
+
+    Returns:
+        None
+    """
+    #get the parser
+    parser = get_parser()
+    args = parser.parse_args()
+
+    #we first perform the registration of the images and the segmentations
+    # path_warp, path_warpinv, path_second_image_reg, path_second_lesion_seg_reg = perform_registration(args.input_first_image, args.input_second_image, args.segmentation_first_image, 
+    #                                                                                                     args.segmentation_second_image, args.output_folder)
+    path_second_lesion_seg_reg = args.output_folder + '/second_lesion_seg_registered.nii.gz'
+    
+    #we then perform temporal lesion matching using the 1st time point lesion segmentation and the 2nd time point lesion segmentation registered to the 1st time point
+    matching_table_1st_to_2nd_reg, center_1st, center_2nd_reg, first_data_labeled, second_data_reg_labeled = temporal_lesion_matching_on_reg_image(args.segmentation_first_image, path_second_lesion_seg_reg)
+    
+    #we then perfrom lesion clustering on the original lesion segmentation file from the second time point
+    orig_second_lesion_seg = nib.load(args.segmentation_second_image)
+    orig_second_lesion_seg_data = orig_second_lesion_seg.get_fdata()
+    orig_second_lesion_seg_coordinates = np.argwhere(orig_second_lesion_seg_data == 1)
+    clustering_2nd_orig = DBSCAN(eps=10, min_samples=5).fit(orig_second_lesion_seg_coordinates)
+    orig_second_lesion_seg_labels = clustering_2nd_orig.labels_
+    #we label the data by cluster
+    orig_second_lesion_seg_data_labeled = np.zeros(orig_second_lesion_seg_data.shape)
+    for i,voxel in enumerate(orig_second_lesion_seg_coordinates):
+        orig_second_lesion_seg_data_labeled[voxel[0], voxel[1], voxel[2]] = orig_second_lesion_seg_labels[i]+1
+    center_2nd_orig = {}
+    #we compute the centers
+    for lesion_2nd in np.unique(orig_second_lesion_seg_data_labeled):
+        if lesion_2nd != 0:
+            orig_second_lesion_seg_coordinates2 = np.argwhere(orig_second_lesion_seg_data_labeled == lesion_2nd)
+            center_2nd_orig[int(lesion_2nd)] = np.mean(orig_second_lesion_seg_coordinates2, axis=0)
+
+
+    """
+    #we then perform lesion matching between the second time point lesion segmentation and the second time point lesion segmentation registered to the first time point
+    second_reg_voxel_volume = np.prod(nib.load(args.input_first_image).header.get_zooms())
+    second_voxel_volume = np.prod(nib.load(args.input_second_image).header.get_zooms())
+    second_reg_volumes = volume_lesions(second_data_reg_labeled, second_reg_voxel_volume)
+    second_orig_volumes = volume_lesions(orig_second_lesion_seg_data_labeled, second_voxel_volume)
+    matching_reg_to_orig = graph_matching(center_2nd_orig, center_2nd_reg, second_orig_volumes, second_reg_volumes)
+
+
+    
+    #we build a matching table between the lesions of the first time point and the lesions of the second time point
+    matching_table = np.empty(matching_table_1st_to_2nd_reg.shape)
+    for i in range(len(matching_table_1st_to_2nd_reg)):
+        matching_table[i][0] = matching_table_1st_to_2nd_reg[matching_reg_to_orig[i][1]][0]
+        matching_table[i][1] = matching_table_1st_to_2nd_reg[matching_reg_to_orig[i][1]][1]
+    
+    #compute the volume of the lesions
+    first_voxel_volume = np.prod(nib.load(args.input_first_image).header.get_zooms())
+    second_voxel_volume = np.prod(nib.load(args.input_second_image).header.get_zooms())
+    first_volumes = volume_lesions(first_data_labeled, first_voxel_volume)
+    second_volumes = volume_lesions(orig_second_lesion_seg_data_labeled, second_voxel_volume)
+
+    #we save the results in nifti files
+    second_data_reg_labeled = nib.Nifti1Image(second_data_reg_labeled, orig_second_lesion_seg.affine, orig_second_lesion_seg.header)
+    nib.save(second_data_reg_labeled, args.output_folder + '/second_lesion_seg_registered_clustered222.nii.gz')
+    orig_second_lesion_seg_data_labeled = nib.Nifti1Image(orig_second_lesion_seg_data_labeled, orig_second_lesion_seg.affine, orig_second_lesion_seg.header)
+    nib.save(orig_second_lesion_seg_data_labeled, args.output_folder + '/second_lesion_seg_clustered333.nii.gz')
+    """
+
+    #print output file
+    """
+    with open(args.output_folder + '/temporal_analysis_results.txt', 'w') as f:
+        f.write('TEMPORAL ANALYSIS OF MULTIPLE SCLEROSIS LESIONS:\n')
+        f.write('------------------------------------------------\n')
+        f.write('Subject Name: at t0 ' + args.input_first_image.split('/')[-1] + ' and at t1 ' + args.input_second_image.split('/')[-1] + ' \n')
+        f.write('------------------------------------------------\n')
+        f.write('Lesions at t0: \n')
+        f.write('------------------------------------------------\n')
+        f.write('Number of lesions: ' + str(len(np.unique(first_data_labeled))-1) + '\n')
+        for lesion in first_volumes:
+            f.write('Lesion ' + str(int(lesion[0])) + ' volume: ' + str(lesion[1]) + ' mm3\n')
+    """
+
+
+
+
+
+
 
-    print("Temporal Links:",warping_links)
 
 if __name__ == '__main__':
     main()
\ No newline at end of file

From 71883491a7301447b15f1f9e68161c1c965c0b23 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Benveniste <pierrelouis.benveniste03@gmail.com>
Date: Mon, 7 Aug 2023 12:42:28 -0400
Subject: [PATCH 15/26] first working version of lesion comparison

---
 .../time_point_lesion_evolution.py            | 338 ++++++++++--------
 1 file changed, 180 insertions(+), 158 deletions(-)

diff --git a/lesion_analysis/time_point_lesion_evolution.py b/lesion_analysis/time_point_lesion_evolution.py
index 53fe7da..417ae54 100644
--- a/lesion_analysis/time_point_lesion_evolution.py
+++ b/lesion_analysis/time_point_lesion_evolution.py
@@ -20,7 +20,7 @@
     python3 lesion_analysis/time_point_lesion_evolution.py -i-1 /Users/plbenveniste/Desktop/lesion_comparison_copy/sub-cal072_ses-M0_STIR.nii.gz -i-2 /Users/plbenveniste/Desktop/lesion_comparison_copy/sub-cal072_ses-M12_STIR.nii.gz -seg-1 /Users/plbenveniste/Desktop/lesion_comparison_copy/sub-cal072_ses-M0_STIR_lesion-manual.nii.gz -seg-2 /Users/plbenveniste/Desktop/lesion_comparison_copy/M12_inference_results.nii.gz -o /Users/plbenveniste/Desktop/lesion_comparison_copy/output
 
 To do:
-    *
+    * Remove some of the outputed files
 
 Pierre-Louis Benveniste
 """
@@ -118,18 +118,19 @@ def perform_registration(path_first_image, path_second_image, path_first_lesion_
 
     #we apply the warping to the segmentation of the second image
     os.system('sct_apply_transfo -i ' + path_second_lesion_seg + ' -d ' + path_first_lesion_seg + ' -w ' + path_output_folder + '/warp_2nd_to_1st.nii.gz' 
-              + ' -o ' + path_output_folder + '/second_lesion_seg_registered.nii.gz' + ' -x linear')
+              + ' -o ' + path_output_folder + '/second_lesion_seg_registered.nii.gz' + ' -x nn')
 
     return path_output_folder + '/warp_2nd_to_1st.nii.gz', path_output_folder + '/warp_1st_to_2nd.nii.gz', path_output_folder + '/second_image_sc_seg_registered.nii.gz',path_output_folder + '/second_lesion_seg_registered.nii.gz'
 
 
-def temporal_lesion_matching_on_reg_image(path_first_lesion_seg, path_second_lesion_seg_reg):
+def temporal_lesion_matching_on_reg_image(path_first_lesion_seg, path_second_lesion_seg_reg, path_output_folder):
     """
     This function performs temporal lesion matching using the 1st time point lesion segmentation and the 2nd time point lesion segmentation registered to the 1st time point.
 
     Args:
         path_first_lesion_seg: path to the lesion segmentation of the first image
         path_second_lesion_seg_reg: path to the lesion segmentation of the second image registered to the first image
+        path_output_folder: path to the output folder
 
     Returns:
         matching_table: table containing the matching between lesions of the first time point and lesions of the second time point
@@ -167,10 +168,25 @@ def temporal_lesion_matching_on_reg_image(path_first_lesion_seg, path_second_les
         second_lesion_seg_reg_data2[voxel[0], voxel[1], voxel[2]] = second_lesion_seg_reg_labels[i]+1
 
     #for each lesion of the second time point we consider that it corresponds to the lesion of the first time point with which it overlaps more than 50%
-    matching_table = {}
+    list_lesion_2nd = []
+    corresponding_lesion_1st = []
+    overlapping_ratio = []
+    center_2nd = []
+    center_1st = []
+    first_pass = True
     for lesion_2nd in np.unique(second_lesion_seg_reg_data2):
         if lesion_2nd != 0:
+            list_lesion_2nd.append(lesion_2nd)
+            #we compute the centers
+            second_lesion_iter_center = np.argwhere(second_lesion_seg_reg_data2 == lesion_2nd)
+            center_2nd.append(np.mean(second_lesion_iter_center, axis=0))
             for lesion_1st in np.unique(first_lesion_seg_data2):
+                #we compute the centers
+                if first_pass:
+                    first_lesion_iter_center = np.argwhere(first_lesion_seg_data2 == lesion_1st)
+                    center_1st.append(np.mean(first_lesion_iter_center, axis=0))
+                    first_pass = False
+                #we compute the overlap
                 if lesion_1st !=0:
                     #we get the coordinates of the lesion
                     second_lesion_seg_coordinates2 = np.argwhere(second_lesion_seg_reg_data2 == lesion_2nd)
@@ -181,125 +197,153 @@ def temporal_lesion_matching_on_reg_image(path_first_lesion_seg, path_second_les
                     first_set = set(tuple(points) for points in  first_lesion_seg_coordinates)
                     intersection = len(second_set.intersection(first_set))
                     ratio = intersection/len(first_set)
-                    matching_table[int(lesion_2nd)] = {}
                     if ratio > 0.5:
-                        matching_table[int(lesion_2nd)]["Lesion_1st_timepoint"] = int(lesion_1st)
-                        matching_table[int(lesion_2nd)]["Overlapping_ratio"] = round(ratio,2)
+                        corresponding_lesion_1st.append(lesion_1st)
+                        overlapping_ratio.append(ratio)
                     else:
-                        matching_table[int(lesion_2nd)]["Lesion_1st_timepoint"] = None
-                        matching_table[int(lesion_2nd)]["Overlapping_ratio"] = None
-
-    #we then compute the center of each lesion
-    center_2nd = {}
-    for lesion_2nd in np.unique(second_lesion_seg_reg_data2):
-        if lesion_2nd != 0:
-            second_lesion_seg_reg_coordinates2 = np.argwhere(second_lesion_seg_reg_data2 == lesion_2nd)
-            center_2nd[int(lesion_2nd)] = np.mean(second_lesion_seg_reg_coordinates2, axis=0)
-    center_1st = {}
-    for lesion_1st in np.unique(first_lesion_seg_data2):
-        if lesion_1st != 0:
-            first_lesion_seg_coordinates = np.argwhere(first_lesion_seg_data2 == lesion_1st)
-            center_1st[int(lesion_1st)] = np.mean(first_lesion_seg_coordinates, axis=0)
-
-    #we first overwrite the segmentation files with the lesions in different colors
-    final_first_lesion_seg = np.zeros(first_lesion_seg_data2.shape)
-    final_second_lesion_seg_reg = np.zeros(second_lesion_seg_reg_data2.shape)
-    for i,voxel in enumerate(first_lesion_seg_coordinates):
-        final_first_lesion_seg[voxel[0], voxel[1], voxel[2]] = matching_table[first_lesion_seg_data2[voxel[0], voxel[1], voxel[2]]]["Lesion_1st_timepoint"]
-
-    for i,lesion in enumerate(matching_table):
-        print(i)
-        print(lesion)
-        print(matching_table[lesion])
-        first_lesion_seg_data2.replace(matching_table[lesion]["Lesion_1st_timepoint"], lesion)
-    #print(first_lesion_seg_data2)
-
+                        corresponding_lesion_1st.append(None)
+                        overlapping_ratio.append(None)
 
-    #first_lesion_seg2 = nib.Nifti1Image(first_lesion_seg_data2, first_lesion_seg.affine, first_lesion_seg.header)
-    #second_lesion_seg2 = nib.Nifti1Image(second_lesion_seg_data2, second_lesion_seg.affine, second_lesion_seg.header)
-    #nib.save(first_lesion_seg2, args.output_folder + '/first_lesion_seg_clustered.nii.gz')
-    #nib.save(second_lesion_seg2, args.output_folder + '/second_lesion_seg_clustered.nii.gz')
+    lesion_dict = {"list_lesion_2nd": list_lesion_2nd, "corresponding_lesion_1st": corresponding_lesion_1st, "overlapping_ratio": overlapping_ratio,
+                    "center_2nd": center_2nd, "center_1st": center_1st}
 
-    return matching_table, center_1st, center_2nd, first_lesion_seg_data2, second_lesion_seg_reg_data2
-
-
-def lesion_distance(lesion1, lesion2, volume1, volume2):
-    """
-    This function computes the distance betweeen two lesions for the graph matching.
+    #finally we overwrite the segmentation files with the lesions in colors matching from t1 to t2
+    final_first_lesion_seg = np.zeros(first_lesion_seg_data2.shape)
+    for lesion in np.unique(first_lesion_seg_data2):
+        if lesion!=0:
+            arg_where_lesion = np.argwhere(first_lesion_seg_data2 == lesion)
+            index_lesion = np.argwhere(lesion_dict['corresponding_lesion_1st'] == lesion)[0]
+            for coord in arg_where_lesion:
+                final_first_lesion_seg[coord[0], coord[1], coord[2]] = lesion_dict['list_lesion_2nd'][index_lesion[0]]
+    final_second_lesion_seg_reg = np.copy(second_lesion_seg_reg_data2)
 
-    Args:
-        lesion1: coordinates of the first lesion
-        lesion2: coordinates of the second lesion
+    #we save the results in nifti files
+    first_lesion_seg2 = nib.Nifti1Image(first_lesion_seg_data2, first_lesion_seg.affine, first_lesion_seg.header)
+    second_lesion_seg2 = nib.Nifti1Image(final_second_lesion_seg_reg, second_lesion_seg_reg.affine, second_lesion_seg_reg.header)
+    nib.save(first_lesion_seg2, path_output_folder + '/first_lesion_seg_clustered.nii.gz')
+    nib.save(second_lesion_seg2, path_output_folder + '/second_lesion_seg_clustered.nii.gz')
     
-    Returns:
-        distance: distance between the two lesions
-    """
-    pos_distance = np.sqrt((lesion1[0] - lesion2[0])**2 + (lesion1[1] - lesion2[1])**2 + (lesion1[2] - lesion2[2])**2)
-    vol_distance = np.abs(volume1 - volume2)
+    return lesion_dict, final_first_lesion_seg, final_second_lesion_seg_reg
 
-    return  pos_distance + vol_distance
 
-
-def volume_lesions(labeled_data, voxel_volume):
+def lesion_matching(path_t2_seg, path_t2_inv_wrapped, path_output_folder):
     """
-    This function computes the volume of the lesions.
+    This function performs lesion matching between the 2nd time point lesion segmentation and the 2nd time point lesion segmentation inversely wrapped.
 
     Args:
-        labeled_data: data with each lesion in a different color
-        voxel_volume: volume of a voxel
-    
+        path_t2_seg: path to the lesion segmentation of the second time point
+        path_t2_inv_wrapped: path to the lesion segmentation of the second time point inversely wrapped
+        path_output_folder: path to the output folder
+
     Returns:
-        volumes: volume of the lesions
+        path_t2_seg: path to the lesion segmentation of the second time point with the lesions in same color as the lesions of the second time point inversely wrapped
     """
+    #we first load data
+    t2_seg = nib.load(path_t2_seg)
+    t2_seg_data = t2_seg.get_fdata()
+    t2_inv_wrapped = nib.load(path_t2_inv_wrapped)
+    t2_inv_wrapped_data = t2_inv_wrapped.get_fdata()
+
+    #we then perform clustering on t2_seg
+    t2_seg_coordinates = np.argwhere(t2_seg_data == 1)
+    clustering_t2 = DBSCAN(eps=10, min_samples=5).fit(t2_seg_coordinates)
+    t2_seg_labels = clustering_t2.labels_
+    #from this we build a dataset with the lesions in different colors
+    t2_seg_data_labeled = np.zeros(t2_seg_data.shape)
+    for i,voxel in enumerate(t2_seg_coordinates):
+        t2_seg_data_labeled[voxel[0], voxel[1], voxel[2]] = t2_seg_labels[i]+1
+    
+    #we now compute the centers of the lesions
+    center_t2 = []
+    color_t2 = []
+    for lesion_t2 in np.unique(t2_seg_data_labeled):
+        if lesion_t2 != 0:
+            t2_seg_coordinates2 = np.argwhere(t2_seg_data_labeled == lesion_t2)
+            center_t2.append(np.mean(t2_seg_coordinates2, axis=0))
+            color_t2.append(lesion_t2)
+    center_t2_inv_wrapped = []
+    color_t2_inv_wrapped = []
+    for lesion_t2 in np.unique(t2_inv_wrapped_data):
+        if lesion_t2 != 0:
+            t2_inv_wrapped_coordinates = np.argwhere(t2_inv_wrapped_data == lesion_t2)
+            center_t2_inv_wrapped.append(np.mean(t2_inv_wrapped_coordinates, axis=0))
+            color_t2_inv_wrapped.append(lesion_t2)
+
+    if len(center_t2) != len(center_t2_inv_wrapped):
+        print('Error: the number of lesions is different between the two segmentations.')
+        return None
+
+    #we then perform matching by closest distance of the two center lists
+    matching_table = np.zeros((len(center_t2), 2))
+    for i, center in enumerate(center_t2):
+        min_distance = np.inf
+        for j, center_inv_wrapped in enumerate(center_t2_inv_wrapped):
+            distance = np.sqrt((center[0] - center_inv_wrapped[0])**2 + (center[1] - center_inv_wrapped[1])**2 + (center[2] - center_inv_wrapped[2])**2)
+            if distance < min_distance:
+                min_distance = distance
+                matching_table[i][0] = i
+                matching_table[i][1] = j
+    
+    #we then overwrite the segmentation file with the lesions in colors matching from t1 to t2
+    final_t2_seg = np.zeros(t2_seg_data_labeled.shape)
+    for i,lesion_value in enumerate(color_t2):
+        final_t2_seg[t2_seg_data_labeled == lesion_value] = color_t2_inv_wrapped[int(matching_table[i][1])]
+    final_t2_seg = nib.Nifti1Image(final_t2_seg, t2_seg.affine, t2_seg.header)
+    out_path = path_output_folder + path_t2_seg.split('/')[-1].split('.')[0] + '_matched.nii.gz'
+    nib.save(final_t2_seg, out_path)
+
+    return out_path
+    
 
-    volumes = []
-    for lesion in np.unique(labeled_data):
-        if lesion != 0:
-            volume = len(np.argwhere(labeled_data == lesion))*voxel_volume
-            volumes.append((lesion,volume))
-    return np.array(volumes)  
-
-
-def graph_matching(center_2nd_orig, center_2nd_reg, second_orig_volumes, second_reg_volumes):
+def comparison_lesions_t1_t2(path_first_lesion_seg, path_second_lesion_seg):
     """
-    This function performs graph matching between the lesions of the second time point registered and the lesions of the second time point.
-    The node distances take into account the distance between the lesions and the volume of the lesions.
+    This function performs comparison of lesions from t1 and t2. 
 
     Args:
-        center_2nd: coordinates of the center of the lesions of the second time point
-        center_2nd_reg: coordinates of the center of the lesions of the second time point registered
-        second_orig_volumes: volume of the lesions of the second time point
-        second_reg_volumes: volume of the lesions of the second time point registered
-    
+        path_first_lesion_seg: path to the lesion segmentation of the first image
+        path_second_lesion_seg: path to the lesion segmentation of the second image
+
     Returns:
-        matching_table: table containing the matching between lesions of the second time point and lesions of the second time point registered
+        t1_dict: dictionary containing the list of lesions, the volume of the lesions and the center of the lesions for the first time point
+        t2_dict: dictionary containing the list of lesions, the volume of the lesions and the center of the lesions for the second time point
     """
 
-    # Create a graph for each time point
-    graph_t2_orig = nx.Graph()
-    graph_t2_reg = nx.Graph()
-
-    # Add nodes (lesions) to each graph
-    graph_t2_orig.add_nodes_from(range(len(center_2nd_orig)))
-    graph_t2_reg.add_nodes_from(range(len(center_2nd_reg)))
-
-    # Add edges between nodes with similarity based on lesion distance
-    for i, lesion_reg in enumerate(center_2nd_reg):
-        for j, lesion_orig in enumerate(center_2nd_orig):
-            distance = lesion_distance(center_2nd_reg[lesion_reg], center_2nd_orig[lesion_orig], second_reg_volumes[i][1], second_orig_volumes[j][1])
-            graph_t2_reg.add_edge(i, j, weight=distance)
-            graph_t2_orig.add_edge(j, i, weight=distance)  # Adding bidirectional edges
-
-    # Use Hungarian algorithm for graph matching
-    cost_matrix = np.array(nx.to_numpy_array(graph_t2_reg))
-    row_ind, col_ind = linear_sum_assignment(cost_matrix)
-
-    # Create temporal links based on matching results
-    matching_reg_to_orig = [(i, j) for i, j in zip(range(len(center_2nd_reg)), col_ind) if cost_matrix[i, j] < np.inf]
+    #we first load data
+    first_lesion_seg = nib.load(path_first_lesion_seg)
+    first_lesion_seg_data = first_lesion_seg.get_fdata()
+    second_lesion_seg = nib.load(path_second_lesion_seg)
+    second_lesion_seg_data = second_lesion_seg.get_fdata()
+
+    #we get voxel volume
+    voxel_volume_t1 = np.prod(first_lesion_seg.header.get_zooms())
+    voxel_volume_t2 = np.prod(second_lesion_seg.header.get_zooms())
+
+    #we iterate over the lesions of the first time point
+    list_lesions_t1 = np.unique(first_lesion_seg_data)
+    list_lesions_t1 = list_lesions_t1[1:] #removed 0
+    volume_lesions_t1 = []
+    center_t1 = []
+    for lesion in list_lesions_t1:
+        volume_lesions_t1.append(len(np.argwhere(first_lesion_seg_data == lesion))*voxel_volume_t1)
+        center_t1.append(np.mean(np.argwhere(first_lesion_seg_data == lesion), axis=0))
+    
+    rounded_list_t1 = [round(elem) for elem in list_lesions_t1]
+    t1_dict = {"list_lesions": rounded_list_t1, "volume_lesions": volume_lesions_t1, "center": center_t1}
     
-    return matching_reg_to_orig
+    #we iterate over the lesions of the second time point
+    list_lesions_t2 = np.unique(second_lesion_seg_data)
+    list_lesions_t2 = list_lesions_t2[1:] #removed 0
+    volume_lesions_t2 = []
+    center_t2 = []
+    for lesion in list_lesions_t2:
+        volume_lesions_t2.append(len(np.argwhere(second_lesion_seg_data == lesion))*voxel_volume_t2)
+        center_t2.append(np.mean(np.argwhere(second_lesion_seg_data == lesion), axis=0))
+    
+    rounded_list_t2 = [round(elem) for elem in list_lesions_t2]
+    t2_dict = {"list_lesions": rounded_list_t2, "volume_lesions": volume_lesions_t2, "center": center_t2}
 
-  
+    return t1_dict, t2_dict
+    
 
 def main():
     """
@@ -311,6 +355,7 @@ def main():
     Returns:
         None
     """
+    
     #get the parser
     parser = get_parser()
     args = parser.parse_args()
@@ -321,74 +366,51 @@ def main():
     path_second_lesion_seg_reg = args.output_folder + '/second_lesion_seg_registered.nii.gz'
     
     #we then perform temporal lesion matching using the 1st time point lesion segmentation and the 2nd time point lesion segmentation registered to the 1st time point
-    matching_table_1st_to_2nd_reg, center_1st, center_2nd_reg, first_data_labeled, second_data_reg_labeled = temporal_lesion_matching_on_reg_image(args.segmentation_first_image, path_second_lesion_seg_reg)
-    
-    #we then perfrom lesion clustering on the original lesion segmentation file from the second time point
-    orig_second_lesion_seg = nib.load(args.segmentation_second_image)
-    orig_second_lesion_seg_data = orig_second_lesion_seg.get_fdata()
-    orig_second_lesion_seg_coordinates = np.argwhere(orig_second_lesion_seg_data == 1)
-    clustering_2nd_orig = DBSCAN(eps=10, min_samples=5).fit(orig_second_lesion_seg_coordinates)
-    orig_second_lesion_seg_labels = clustering_2nd_orig.labels_
-    #we label the data by cluster
-    orig_second_lesion_seg_data_labeled = np.zeros(orig_second_lesion_seg_data.shape)
-    for i,voxel in enumerate(orig_second_lesion_seg_coordinates):
-        orig_second_lesion_seg_data_labeled[voxel[0], voxel[1], voxel[2]] = orig_second_lesion_seg_labels[i]+1
-    center_2nd_orig = {}
-    #we compute the centers
-    for lesion_2nd in np.unique(orig_second_lesion_seg_data_labeled):
-        if lesion_2nd != 0:
-            orig_second_lesion_seg_coordinates2 = np.argwhere(orig_second_lesion_seg_data_labeled == lesion_2nd)
-            center_2nd_orig[int(lesion_2nd)] = np.mean(orig_second_lesion_seg_coordinates2, axis=0)
-
-
-    """
-    #we then perform lesion matching between the second time point lesion segmentation and the second time point lesion segmentation registered to the first time point
-    second_reg_voxel_volume = np.prod(nib.load(args.input_first_image).header.get_zooms())
-    second_voxel_volume = np.prod(nib.load(args.input_second_image).header.get_zooms())
-    second_reg_volumes = volume_lesions(second_data_reg_labeled, second_reg_voxel_volume)
-    second_orig_volumes = volume_lesions(orig_second_lesion_seg_data_labeled, second_voxel_volume)
-    matching_reg_to_orig = graph_matching(center_2nd_orig, center_2nd_reg, second_orig_volumes, second_reg_volumes)
-
-
+    lesion_dict, final_first_lesion_seg, final_second_lesion_seg_reg = temporal_lesion_matching_on_reg_image(args.segmentation_first_image, path_second_lesion_seg_reg, args.output_folder)
     
-    #we build a matching table between the lesions of the first time point and the lesions of the second time point
-    matching_table = np.empty(matching_table_1st_to_2nd_reg.shape)
-    for i in range(len(matching_table_1st_to_2nd_reg)):
-        matching_table[i][0] = matching_table_1st_to_2nd_reg[matching_reg_to_orig[i][1]][0]
-        matching_table[i][1] = matching_table_1st_to_2nd_reg[matching_reg_to_orig[i][1]][1]
+    #we then perform inverse warpping on the image 
+    # os.system('sct_apply_transfo -i ' + args.output_folder +'/second_lesion_seg_clustered.nii.gz' + ' -d ' + args.segmentation_second_image + ' -w ' + args.output_folder + '/warp_1st_to_2nd.nii.gz' 
+    #           + ' -o ' + args.output_folder + '/second_lesion_seg_inv_wrapped.nii.gz' + ' -x nn')
     
-    #compute the volume of the lesions
-    first_voxel_volume = np.prod(nib.load(args.input_first_image).header.get_zooms())
-    second_voxel_volume = np.prod(nib.load(args.input_second_image).header.get_zooms())
-    first_volumes = volume_lesions(first_data_labeled, first_voxel_volume)
-    second_volumes = volume_lesions(orig_second_lesion_seg_data_labeled, second_voxel_volume)
+    #now we perform lesion matching between the 2nd time point lesion segmentation and the 2nd time point lesion segmentation inversely wrapped
+    t2_final_seg = lesion_matching(args.segmentation_second_image, args.output_folder + '/second_lesion_seg_inv_wrapped.nii.gz', args.output_folder)
 
-    #we save the results in nifti files
-    second_data_reg_labeled = nib.Nifti1Image(second_data_reg_labeled, orig_second_lesion_seg.affine, orig_second_lesion_seg.header)
-    nib.save(second_data_reg_labeled, args.output_folder + '/second_lesion_seg_registered_clustered222.nii.gz')
-    orig_second_lesion_seg_data_labeled = nib.Nifti1Image(orig_second_lesion_seg_data_labeled, orig_second_lesion_seg.affine, orig_second_lesion_seg.header)
-    nib.save(orig_second_lesion_seg_data_labeled, args.output_folder + '/second_lesion_seg_clustered333.nii.gz')
-    """
+    #now we perform comparison of lesions from t1 and t2
+    t1_dict, t2_dict = comparison_lesions_t1_t2(args.segmentation_first_image,t2_final_seg)
 
-    #print output file
-    """
+    #we then print the output file
+    list_new_lesions = set(t2_dict['list_lesions']).difference(set(t1_dict['list_lesions']))
+    common_lesions = list(set(t2_dict['list_lesions']).intersection(set(t1_dict['list_lesions'])))
+    
     with open(args.output_folder + '/temporal_analysis_results.txt', 'w') as f:
         f.write('TEMPORAL ANALYSIS OF MULTIPLE SCLEROSIS LESIONS:\n')
         f.write('------------------------------------------------\n')
         f.write('Subject Name: at t0 ' + args.input_first_image.split('/')[-1] + ' and at t1 ' + args.input_second_image.split('/')[-1] + ' \n')
         f.write('------------------------------------------------\n')
+        f.write('Lesions at t0: ' + str(len(t1_dict['list_lesions'])) + '\n')
+        f.write('Lesions at t1: ' + str(len(t2_dict['list_lesions'])) + '\n')
+        f.write('------------------------------------------------\n')
         f.write('Lesions at t0: \n')
+        for lesion in t1_dict['list_lesions']:
+            f.write('Lesion ' + str(int(lesion)) + ' \n')
+            f.write('Volume: ' + str(t1_dict['volume_lesions'][t1_dict['list_lesions'].index(lesion)]) + ' mm3\n')
+            f.write('Center: ' + str(t1_dict['center'][t1_dict['list_lesions'].index(lesion)]) + '\n')
+            f.write('\n')
+        f.write('------------------------------------------------\n')
+        f.write('Lesions at t1: \n')
+        for lesion in t2_dict['list_lesions']:
+            f.write('Lesion ' + str(int(lesion)) + ' \n')
+            f.write('Volume: ' + str(t2_dict['volume_lesions'][t2_dict['list_lesions'].index(lesion)]) + ' mm3\n')
+            f.write('Center: ' + str(t2_dict['center'][t2_dict['list_lesions'].index(lesion)]) + '\n')
+            f.write('\n')
+        f.write('------------------------------------------------\n')
+        f.write('EVOLUTION:')
+        for lesion in common_lesions:
+            f.write('Lesion ' + str(int(lesion)) + ' \n')
+            f.write('Volume at t0: ' + str(t1_dict['volume_lesions'][t1_dict['list_lesions'].index(lesion)]) + ' mm3\n')
+            f.write('Volume at t1: ' + str(t2_dict['volume_lesions'][t2_dict['list_lesions'].index(lesion)]) + ' mm3\n')
+            f.write('Volume increase in %: ' + str((t2_dict['volume_lesions'][t2_dict['list_lesions'].index(lesion)] - t1_dict['volume_lesions'][t1_dict['list_lesions'].index(lesion)])/t1_dict['volume_lesions'][t1_dict['list_lesions'].index(lesion)]*100) + ' %\n')
         f.write('------------------------------------------------\n')
-        f.write('Number of lesions: ' + str(len(np.unique(first_data_labeled))-1) + '\n')
-        for lesion in first_volumes:
-            f.write('Lesion ' + str(int(lesion[0])) + ' volume: ' + str(lesion[1]) + ' mm3\n')
-    """
-
-
-
-
-
-
 
 
 if __name__ == '__main__':

From c63ba5f37ab8d0cb5c0a9297ae59eb18ead13fc0 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Benveniste <pierrelouis.benveniste03@gmail.com>
Date: Mon, 11 Sep 2023 17:49:11 -0400
Subject: [PATCH 16/26] formatting of script

---
 lesion_analysis/lesion_seg_analysis.py        |   6 +-
 .../time_point_lesion_evolution.py            | 249 ++++++++++--------
 2 files changed, 148 insertions(+), 107 deletions(-)

diff --git a/lesion_analysis/lesion_seg_analysis.py b/lesion_analysis/lesion_seg_analysis.py
index cc21366..14a9b62 100644
--- a/lesion_analysis/lesion_seg_analysis.py
+++ b/lesion_analysis/lesion_seg_analysis.py
@@ -4,8 +4,7 @@
 Also, we want to output the segmentation file of the lesions in different color
 
 Usage:
-    python3 lesion_seg_analysis.py -i <input_image> -seg <segmentation> -o <output_folder>
-    python3 canproco/lesion_analysis/lesion_seg_analysis.py -i ./data/lesion_comparison/sub-cal072_ses-M12_STIR.nii.gz -seg ./data/lesion_comparison/canproco_003.nii.gz -o ./data/lesion_comparison/output
+    python lesion_seg_analysis.py -i <input_image> -seg <segmentation> -o <output_folder>
 
 Args:
     -i/--input_image: path to the image
@@ -30,6 +29,7 @@
 import matplotlib.pyplot as plt
 from sklearn.cluster import DBSCAN
 
+
 def get_parser():
     """
     This function parses the arguments given to the script.
@@ -52,6 +52,7 @@ def get_parser():
                         help='Whether to plot the results or not')
     return parser
 
+
 def main():
     """
     This function is the main function of the script.
@@ -145,6 +146,7 @@ def main():
             f.write(f'Lesion {i+1} : volume: {round(lesion_volumes[i],2)} mm3, center: {lesion_centers[i]}\n')
     return None
 
+
 if __name__ == '__main__':
     main()
 
diff --git a/lesion_analysis/time_point_lesion_evolution.py b/lesion_analysis/time_point_lesion_evolution.py
index 417ae54..adc65f1 100644
--- a/lesion_analysis/time_point_lesion_evolution.py
+++ b/lesion_analysis/time_point_lesion_evolution.py
@@ -3,21 +3,22 @@
 It first performs a registration of the images and their segmentation on the two time points.
 Then, it performs clustering of the lesions on each image and computes the volume and the center of each lesion. 
 Finally, it compares the lesions between the two time points and computes the evolution of the lesions.
+This file relies on spinal cord toolbox: version 6.0
 
 Args:
-    -i-1, --input-first_image: path to the first image 
-    -i-2, --input-second_image: path to the second image
-    -seg-1, --segmentation-first_image: path to the lesion segmentation of the first image
-    -seg-2, --segmentation-second_image: path to the lesion segmentation of the second image
-    -o, --output_folder: path to the output folder
-    --plot: whether to plot the results or not
+    -i-1, --input-first-image: path to the first image 
+    -i-2, --input-second-image: path to the second image
+    -seg-1, --segmentation-first-image: path to the lesion segmentation of the first image
+    -seg-2, --segmentation-second-image: path to the lesion segmentation of the second image
+    -sc-seg-1, --sc-segmentation-first-image: path to the spinal cord segmentation of the first image
+    -sc-seg-2, --sc-segmentation-second-image: path to the spinal cord segmentation of the second image
+    -o, --output-folder: path to the output folder
 
 Returns:
     None
 
 Example:
     python time_point_lesion_evolution.py -i-1 path/to/first/image -i-2 path/to/second/image -seg-1 path/to/first/segmentation -seg-2 path/to/second/segmentation -o path/to/output/folder
-    python3 lesion_analysis/time_point_lesion_evolution.py -i-1 /Users/plbenveniste/Desktop/lesion_comparison_copy/sub-cal072_ses-M0_STIR.nii.gz -i-2 /Users/plbenveniste/Desktop/lesion_comparison_copy/sub-cal072_ses-M12_STIR.nii.gz -seg-1 /Users/plbenveniste/Desktop/lesion_comparison_copy/sub-cal072_ses-M0_STIR_lesion-manual.nii.gz -seg-2 /Users/plbenveniste/Desktop/lesion_comparison_copy/M12_inference_results.nii.gz -o /Users/plbenveniste/Desktop/lesion_comparison_copy/output
 
 To do:
     * Remove some of the outputed files
@@ -33,7 +34,8 @@
 import matplotlib.pyplot as plt
 from sklearn.cluster import DBSCAN
 import networkx as nx
-from scipy.optimize import linear_sum_assignment
+from time import time
+
 
 def get_parser():
     """
@@ -46,23 +48,26 @@ def get_parser():
         parser: parser containing the arguments
     """
 
-    parser = argparse.ArgumentParser(description='Analyse the results of the segmentation of the MS lesion on the spinal cord.')
-    parser.add_argument('-i-1', '--input-first_image', required=True,
+    parser = argparse.ArgumentParser(description='Analyse and compare the results of the segmentation of the MS lesion on the spinal cord at two time point.')
+    parser.add_argument('-i-1', '--input-first-image', required=True,
                         help='Path to the first image')
-    parser.add_argument('-i-2', '--input-second_image', required=True,
+    parser.add_argument('-i-2', '--input-second-image', required=True,
                         help='Path to the second image')
-    parser.add_argument('-seg-1', '--segmentation-first_image', required=True,
+    parser.add_argument('-seg-1', '--segmentation-first-image', required=True,
                         help='Path to the lesion segmentation of the first image')
-    parser.add_argument('-seg-2', '--segmentation-second_image', required=True,
+    parser.add_argument('-seg-2', '--segmentation-second-image', required=True,
                         help='Path to the lesion segmentation of the second image')
-    parser.add_argument('-o', '--output_folder', required=True,
+    parser.add_argument ('-sc-seg-1', '--sc-segmentation-first-image', required=False, default=None,
+                        help='Path to the spinal cord segmentation of the first image')
+    parser.add_argument ('-sc-seg-2', '--sc-segmentation-second-image', required=False, default=None,
+                        help='Path to the spinal cord segmentation of the second image')
+    parser.add_argument('-o', '--output-folder', required=True,
                         help='Path to the output folder')
-    parser.add_argument('--show', action='store_true',
-                        help='Whether to show the results or not')
+    
     return parser
 
 
-def perform_registration(path_first_image, path_second_image, path_first_lesion_seg, path_second_lesion_seg, path_output_folder):
+def perform_registration(path_first_image, path_second_image, path_first_lesion_seg, path_second_lesion_seg, path_sc_seg_first_image, path_sc_seg_second_image, path_output_folder):
     """
     This function performs the registration of the images and the segmentations using spinal cord toolbox
 
@@ -71,6 +76,8 @@ def perform_registration(path_first_image, path_second_image, path_first_lesion_
         path_second_image: path to the second image
         path_first_lesion_seg: path to the lesion segmentation of the first image
         path_second_lesion_seg: path to the lesion segmentation of the second image
+        path_sc_seg_first_image: path to the spinal cord segmentation of the first image
+        path_sc_seg_second_image: path to the spinal cord segmentation of the second image
         path_output_folder: path to the output folder
     
     Returns:
@@ -80,52 +87,82 @@ def perform_registration(path_first_image, path_second_image, path_first_lesion_
         path_second_lesion_seg_registered: path to the second lesion segmentation registered
     """
 
-    #first we segment the spinal cord on the two images using sct_deepseg_sc
-    os.system('sct_deepseg_sc -i ' + path_first_image + ' -c t2 -o ' + path_output_folder + '/first_image_sc_seg.nii.gz')
-    os.system('sct_deepseg_sc -i ' + path_second_image + ' -c t2 -o' + path_output_folder + '/second_image_sc_seg.nii.gz')
+    # Segmentation of the spinal cord on the two images
+    ## If the spinal cord segmentation is given, we use it:
+    if path_sc_seg_first_image is not None:
+        first_sc_output_path = path_sc_seg_first_image
+    ## Otherwise, we compute it using sct_deepseg_sc
+    else:
+        first_sc_output_path = os.path.join(path_output_folder, 'first_image_sc_seg.nii.gz')
+        os.system('sct_deepseg_sc -i ' + path_first_image + ' -c t2 -o ' + first_sc_output_path + ' -qc ' + os.path.join(path_output_folder, 'QC_folder'))
+
+    ## Same for the second image
+    if path_sc_seg_second_image is not None:
+        second_sc_output_path = path_sc_seg_second_image
+    else:
+        second_sc_output_path = os.path.join(path_output_folder, 'second_image_sc_seg.nii.gz')
+        os.system('sct_deepseg_sc -i ' + path_second_image + ' -c t2 -o ' + second_sc_output_path + ' -qc ' + os.path.join(path_output_folder, 'QC_folder'))    
+
+    # Then we compute the vertebral levels of the images using sct_label_vertebrae
+    first_vert_seg_path = os.path.join(path_output_folder, 'first_image_sc_seg.nii.gz')
+    second_vert_seg_path = os.path.join(path_output_folder, 'second_image_sc_seg.nii.gz')
+    os.system('sct_label_vertebrae -i ' + path_first_image + ' -s ' + first_vert_seg_path + ' -c t2 -ofolder ' + path_output_folder + ' -qc ' + os.path.join(path_output_folder, 'QC_folder'))
+    os.system('sct_label_vertebrae -i ' + path_second_image + ' -s ' + second_vert_seg_path + ' -c t2 -ofolder ' + path_output_folder + ' -qc ' + os.path.join(path_output_folder, 'QC_folder'))
+
+    # We remove the vertebral levels that are not in common in both images using 'sct_label_utils -remove-reference'
+    first_vert_output_path = os.path.join(path_output_folder, 'first_image_sc_seg_labeled.nii.gz')
+    second_vert_output_path = os.path.join(path_output_folder, 'second_image_sc_seg_labeled.nii.gz')
+    t0=time()
+    print(t0)
+    os.system('sct_label_utils -i ' + first_vert_output_path + ' -remove-reference ' + second_vert_output_path + ' -o ' + first_vert_output_path)
+    t1 = time()
+    print("time to remove-reference",t1-t0)
+    os.system('sct_label_utils -i ' + second_vert_output_path + ' -remove-reference ' + first_vert_output_path + ' -o ' + second_vert_output_path)
+    t2 = time()
+    print("time to remove-reference",t2-t1)
 
-    #then we compute the vertebral levels of the images using sct_label_vertebrae
-    os.system('sct_label_vertebrae -i ' + path_first_image + ' -s ' + path_output_folder + '/first_image_sc_seg.nii.gz' + ' -c t2 -ofolder ' + path_output_folder)
-    os.system('sct_label_vertebrae -i ' + path_second_image + ' -s ' + path_output_folder + '/second_image_sc_seg.nii.gz' + ' -c t2 -ofolder ' + path_output_folder)
-
-    #we remove labels that are not in common in both images
-    ##we first get the labels of the two images
-    first_label = nib.load(path_output_folder + '/first_image_sc_seg_labeled.nii.gz')
-    second_label = nib.load(path_output_folder + '/second_image_sc_seg_labeled.nii.gz')
+    """
+    ## We first get the labels of the two images
+    first_label = nib.load(os.path.join(path_output_folder, 'first_image_sc_seg_labeled.nii.gz'))
+    second_label = nib.load(os.path.join(path_output_folder, 'second_image_sc_seg_labeled.nii.gz'))
     first_label_data = first_label.get_fdata()
     second_label_data = second_label.get_fdata()
     common_labels = np.intersect1d(np.unique(first_label_data), np.unique(second_label_data))
 
-    #we then remove the labels that are not in common
+    # We then remove the labels that are not in common
     for label in np.unique(first_label_data):
         if label not in common_labels:
             first_label_data[first_label_data == label] = 0
-    #we overwrite the segmentation to keep only common labels
+    # We overwrite the segmentation to keep only common labels
     first_label = nib.Nifti1Image(first_label_data, first_label.affine, first_label.header)
-    nib.save(first_label, path_output_folder + '/first_image_sc_seg_labeled.nii.gz')
-    #same for the second image
+    nib.save(first_label, os.path.join(path_output_folder, 'first_image_sc_seg_labeled.nii.gz'))
+    # Same for the second image
     for label in np.unique(second_label_data):
         if label not in common_labels:
             second_label_data[second_label_data == label] = 0
     second_label = nib.Nifti1Image(second_label_data, second_label.affine, second_label.header)
-    nib.save(second_label, path_output_folder + '/second_image_sc_seg_labeled.nii.gz')
+    nib.save(second_label, os.path.join(path_output_folder, 'second_image_sc_seg_labeled.nii.gz'))
     
-    #then we register the images and the segmentations with the first image as reference using sct_register_multimodal using both the spinal cord segmentation and the vertebral levels
+    """
+
+    # Then we register the images and the segmentations with the first image as reference using sct_register_multimodal using both the spinal cord segmentation and the vertebral levels
     parameters = 'step=0,type=label,dof=Tx_Ty_Tz:step=1,type=im,algo=dl'
-    os.system('sct_register_multimodal -i ' + path_second_image + ' -d ' + path_first_image + ' -ilabel ' + path_output_folder + '/second_image_sc_seg_labeled.nii.gz' 
-                + ' -dlabel ' + path_output_folder + '/first_image_sc_seg_labeled.nii.gz' + ' -ofolder ' + path_output_folder + ' -owarp ' + path_output_folder + '/warp_2nd_to_1st.nii.gz'
-                 + ' -owarpinv ' + path_output_folder + '/warp_1st_to_2nd.nii.gz' + ' -param ' + parameters + ' -x linear ')
+    os.system('sct_register_multimodal -i ' + path_second_image + ' -d ' + path_first_image + ' -ilabel ' + os.path.join(path_output_folder,'second_image_sc_seg_labeled.nii.gz') 
+                + ' -dlabel ' + os.path.join(path_output_folder,'first_image_sc_seg_labeled.nii.gz') + ' -dseg ' + first_sc_output_path + ' -ofolder ' + path_output_folder + ' -owarp ' + os.path.join(path_output_folder,'warp_2nd_to_1st.nii.gz')
+                 + ' -owarpinv ' + os.path.join(path_output_folder, 'warp_1st_to_2nd.nii.gz') + ' -param ' + parameters + ' -x linear -qc ' + os.path.join(path_output_folder, 'QC_folder'))
 
-    #we apply the warping to the segmentation of the second image
-    os.system('sct_apply_transfo -i ' + path_second_lesion_seg + ' -d ' + path_first_lesion_seg + ' -w ' + path_output_folder + '/warp_2nd_to_1st.nii.gz' 
-              + ' -o ' + path_output_folder + '/second_lesion_seg_registered.nii.gz' + ' -x nn')
+    # We apply the warping to the segmentation of the second image
+    os.system('sct_apply_transfo -i ' + path_second_lesion_seg + ' -d ' + path_first_lesion_seg + ' -w ' + os.path.join(path_output_folder,'warp_2nd_to_1st.nii.gz') 
+              + ' -o ' + os.path.join(path_output_folder, 'second_lesion_seg_registered.nii.gz') + ' -x nn')
 
-    return path_output_folder + '/warp_2nd_to_1st.nii.gz', path_output_folder + '/warp_1st_to_2nd.nii.gz', path_output_folder + '/second_image_sc_seg_registered.nii.gz',path_output_folder + '/second_lesion_seg_registered.nii.gz'
+    return os.path.join(path_output_folder,'warp_2nd_to_1st.nii.gz'), os.path.join(path_output_folder, 'warp_1st_to_2nd.nii.gz'), os.path.join(path_output_folder, 'second_image_sc_seg_registered.nii.gz'), os.path.join(path_output_folder, 'second_lesion_seg_registered.nii.gz')
 
 
 def temporal_lesion_matching_on_reg_image(path_first_lesion_seg, path_second_lesion_seg_reg, path_output_folder):
     """
     This function performs temporal lesion matching using the 1st time point lesion segmentation and the 2nd time point lesion segmentation registered to the 1st time point.
+    The matching is done using overlapping. If more than 50% of the lesion of the 1st time point is overlapping with a lesion of the 2nd time point,
+    then we consider that the two lesions are the same.
 
     Args:
         path_first_lesion_seg: path to the lesion segmentation of the first image
@@ -140,34 +177,36 @@ def temporal_lesion_matching_on_reg_image(path_first_lesion_seg, path_second_les
         2nd_data_labeled: segmentation of the second time point with each lesion in a different color
     """
 
-    #because registration modifies the values of the segmentation, we first replace all values above a threshold by 1
+    # Just in case, we binarize the second lesion segmentation registered to the first image to avoid any problem
     second_lesion_seg_reg = nib.load(path_second_lesion_seg_reg)
     second_lesion_seg_reg_data = second_lesion_seg_reg.get_fdata()
     second_lesion_seg_reg_data[second_lesion_seg_reg_data > 1e-5] = 1
 
-    #we then load data from the first lesion segmentation
+    # We then load data from the first lesion segmentation
     first_lesion_seg = nib.load(path_first_lesion_seg)
     first_lesion_seg_data = first_lesion_seg.get_fdata()
 
-    # we then perform clustering on the two images
-    # we first get the coordinates of the lesions
+    # We then perform clustering on the two images
+    # We first get the coordinates of the lesions
     first_lesion_seg_coordinates = np.argwhere(first_lesion_seg_data == 1)
     second_lesion_seg_reg_coordinates = np.argwhere(second_lesion_seg_reg_data == 1)
-    # we then perform clustering on the coordinates
+    # We then perform clustering on the coordinates
     clustering_1st = DBSCAN(eps=10, min_samples=5).fit(first_lesion_seg_coordinates)
     first_lesion_seg_labels = clustering_1st.labels_
     clustering_2nd = DBSCAN(eps=10, min_samples=5).fit(second_lesion_seg_reg_coordinates)
     second_lesion_seg_reg_labels = clustering_2nd.labels_
 
-    #create the same tables with the lesions in different colors (voxel value = lesion ID)
+    # Create the same tables with the lesions with unique voxel value (voxel value = lesion ID)
     first_lesion_seg_data2 = np.zeros(first_lesion_seg_data.shape)
     second_lesion_seg_reg_data2 = np.zeros(second_lesion_seg_reg_data.shape)
     for i,voxel in enumerate(first_lesion_seg_coordinates):
+        # Adding "1" to first_lesion_seg_labels[i] because the first label is 0.
         first_lesion_seg_data2[voxel[0], voxel[1], voxel[2]] = first_lesion_seg_labels[i]+1
     for i,voxel in enumerate(second_lesion_seg_reg_coordinates):
+        # Same here
         second_lesion_seg_reg_data2[voxel[0], voxel[1], voxel[2]] = second_lesion_seg_reg_labels[i]+1
 
-    #for each lesion of the second time point we consider that it corresponds to the lesion of the first time point with which it overlaps more than 50%
+    # For each lesion of the second time point we consider that it corresponds to the lesion of the first time point with which it overlaps more than 50%
     list_lesion_2nd = []
     corresponding_lesion_1st = []
     overlapping_ratio = []
@@ -177,22 +216,22 @@ def temporal_lesion_matching_on_reg_image(path_first_lesion_seg, path_second_les
     for lesion_2nd in np.unique(second_lesion_seg_reg_data2):
         if lesion_2nd != 0:
             list_lesion_2nd.append(lesion_2nd)
-            #we compute the centers
+            # We compute the centers
             second_lesion_iter_center = np.argwhere(second_lesion_seg_reg_data2 == lesion_2nd)
             center_2nd.append(np.mean(second_lesion_iter_center, axis=0))
             for lesion_1st in np.unique(first_lesion_seg_data2):
-                #we compute the centers
+                # We compute the centers
                 if first_pass:
                     first_lesion_iter_center = np.argwhere(first_lesion_seg_data2 == lesion_1st)
                     center_1st.append(np.mean(first_lesion_iter_center, axis=0))
                     first_pass = False
-                #we compute the overlap
+                # We compute the overlap
                 if lesion_1st !=0:
-                    #we get the coordinates of the lesion
+                    # We get the coordinates of the lesion
                     second_lesion_seg_coordinates2 = np.argwhere(second_lesion_seg_reg_data2 == lesion_2nd)
-                    #we get the coordinates of the lesion in the first time point
+                    # We get the coordinates of the lesion in the first time point
                     first_lesion_seg_coordinates = np.argwhere(first_lesion_seg_data2 == lesion_1st)
-                    #we compute the overlap of the two sets of coordinates
+                    # We compute the overlap of the two sets of coordinates
                     second_set = set( tuple(points) for points in second_lesion_seg_coordinates2)
                     first_set = set(tuple(points) for points in  first_lesion_seg_coordinates)
                     intersection = len(second_set.intersection(first_set))
@@ -207,7 +246,7 @@ def temporal_lesion_matching_on_reg_image(path_first_lesion_seg, path_second_les
     lesion_dict = {"list_lesion_2nd": list_lesion_2nd, "corresponding_lesion_1st": corresponding_lesion_1st, "overlapping_ratio": overlapping_ratio,
                     "center_2nd": center_2nd, "center_1st": center_1st}
 
-    #finally we overwrite the segmentation files with the lesions in colors matching from t1 to t2
+    # Finally we overwrite the segmentation files with the lesions in colors matching from t1 to t2
     final_first_lesion_seg = np.zeros(first_lesion_seg_data2.shape)
     for lesion in np.unique(first_lesion_seg_data2):
         if lesion!=0:
@@ -217,43 +256,43 @@ def temporal_lesion_matching_on_reg_image(path_first_lesion_seg, path_second_les
                 final_first_lesion_seg[coord[0], coord[1], coord[2]] = lesion_dict['list_lesion_2nd'][index_lesion[0]]
     final_second_lesion_seg_reg = np.copy(second_lesion_seg_reg_data2)
 
-    #we save the results in nifti files
+    # We save the results in nifti files
     first_lesion_seg2 = nib.Nifti1Image(first_lesion_seg_data2, first_lesion_seg.affine, first_lesion_seg.header)
     second_lesion_seg2 = nib.Nifti1Image(final_second_lesion_seg_reg, second_lesion_seg_reg.affine, second_lesion_seg_reg.header)
-    nib.save(first_lesion_seg2, path_output_folder + '/first_lesion_seg_clustered.nii.gz')
-    nib.save(second_lesion_seg2, path_output_folder + '/second_lesion_seg_clustered.nii.gz')
+    nib.save(first_lesion_seg2, os.path.join(path_output_folder, 'first_lesion_seg_clustered.nii.gz'))
+    nib.save(second_lesion_seg2, os.path.join(path_output_folder, 'second_lesion_seg_clustered.nii.gz'))
     
     return lesion_dict, final_first_lesion_seg, final_second_lesion_seg_reg
 
 
-def lesion_matching(path_t2_seg, path_t2_inv_wrapped, path_output_folder):
+def lesion_matching(path_t2_seg, path_t2_inv_warpped, path_output_folder):
     """
-    This function performs lesion matching between the 2nd time point lesion segmentation and the 2nd time point lesion segmentation inversely wrapped.
+    This function performs lesion matching between the 2nd time point lesion segmentation and the 2nd time point lesion segmentation inversely warpped.
 
     Args:
         path_t2_seg: path to the lesion segmentation of the second time point
-        path_t2_inv_wrapped: path to the lesion segmentation of the second time point inversely wrapped
+        path_t2_inv_warpped: path to the lesion segmentation of the second time point inversely warpped
         path_output_folder: path to the output folder
 
     Returns:
-        path_t2_seg: path to the lesion segmentation of the second time point with the lesions in same color as the lesions of the second time point inversely wrapped
+        path_t2_seg: path to the lesion segmentation of the second time point with the lesions in same color as the lesions of the second time point inversely warpped
     """
-    #we first load data
+    # We first load data
     t2_seg = nib.load(path_t2_seg)
     t2_seg_data = t2_seg.get_fdata()
-    t2_inv_wrapped = nib.load(path_t2_inv_wrapped)
-    t2_inv_wrapped_data = t2_inv_wrapped.get_fdata()
+    t2_inv_warpped = nib.load(path_t2_inv_warpped)
+    t2_inv_warpped_data = t2_inv_warpped.get_fdata()
 
-    #we then perform clustering on t2_seg
+    # We then perform clustering on t2_seg
     t2_seg_coordinates = np.argwhere(t2_seg_data == 1)
     clustering_t2 = DBSCAN(eps=10, min_samples=5).fit(t2_seg_coordinates)
     t2_seg_labels = clustering_t2.labels_
-    #from this we build a dataset with the lesions in different colors
+    # From this we build a dataset with the lesions in different colors
     t2_seg_data_labeled = np.zeros(t2_seg_data.shape)
     for i,voxel in enumerate(t2_seg_coordinates):
         t2_seg_data_labeled[voxel[0], voxel[1], voxel[2]] = t2_seg_labels[i]+1
     
-    #we now compute the centers of the lesions
+    # We now compute the centers of the lesions
     center_t2 = []
     color_t2 = []
     for lesion_t2 in np.unique(t2_seg_data_labeled):
@@ -261,33 +300,33 @@ def lesion_matching(path_t2_seg, path_t2_inv_wrapped, path_output_folder):
             t2_seg_coordinates2 = np.argwhere(t2_seg_data_labeled == lesion_t2)
             center_t2.append(np.mean(t2_seg_coordinates2, axis=0))
             color_t2.append(lesion_t2)
-    center_t2_inv_wrapped = []
-    color_t2_inv_wrapped = []
-    for lesion_t2 in np.unique(t2_inv_wrapped_data):
+    center_t2_inv_warpped = []
+    color_t2_inv_warpped = []
+    for lesion_t2 in np.unique(t2_inv_warpped_data):
         if lesion_t2 != 0:
-            t2_inv_wrapped_coordinates = np.argwhere(t2_inv_wrapped_data == lesion_t2)
-            center_t2_inv_wrapped.append(np.mean(t2_inv_wrapped_coordinates, axis=0))
-            color_t2_inv_wrapped.append(lesion_t2)
+            t2_inv_warpped_coordinates = np.argwhere(t2_inv_warpped_data == lesion_t2)
+            center_t2_inv_warpped.append(np.mean(t2_inv_warpped_coordinates, axis=0))
+            color_t2_inv_warpped.append(lesion_t2)
 
-    if len(center_t2) != len(center_t2_inv_wrapped):
+    if len(center_t2) != len(center_t2_inv_warpped):
         print('Error: the number of lesions is different between the two segmentations.')
         return None
 
-    #we then perform matching by closest distance of the two center lists
+    # We then perform matching by closest distance of the two center lists
     matching_table = np.zeros((len(center_t2), 2))
     for i, center in enumerate(center_t2):
         min_distance = np.inf
-        for j, center_inv_wrapped in enumerate(center_t2_inv_wrapped):
-            distance = np.sqrt((center[0] - center_inv_wrapped[0])**2 + (center[1] - center_inv_wrapped[1])**2 + (center[2] - center_inv_wrapped[2])**2)
+        for j, center_inv_warpped in enumerate(center_t2_inv_warpped):
+            distance = np.sqrt((center[0] - center_inv_warpped[0])**2 + (center[1] - center_inv_warpped[1])**2 + (center[2] - center_inv_warpped[2])**2)
             if distance < min_distance:
                 min_distance = distance
                 matching_table[i][0] = i
                 matching_table[i][1] = j
     
-    #we then overwrite the segmentation file with the lesions in colors matching from t1 to t2
+    # We then overwrite the segmentation file with the lesions in colors matching from t1 to t2
     final_t2_seg = np.zeros(t2_seg_data_labeled.shape)
     for i,lesion_value in enumerate(color_t2):
-        final_t2_seg[t2_seg_data_labeled == lesion_value] = color_t2_inv_wrapped[int(matching_table[i][1])]
+        final_t2_seg[t2_seg_data_labeled == lesion_value] = color_t2_inv_warpped[int(matching_table[i][1])]
     final_t2_seg = nib.Nifti1Image(final_t2_seg, t2_seg.affine, t2_seg.header)
     out_path = path_output_folder + path_t2_seg.split('/')[-1].split('.')[0] + '_matched.nii.gz'
     nib.save(final_t2_seg, out_path)
@@ -308,17 +347,17 @@ def comparison_lesions_t1_t2(path_first_lesion_seg, path_second_lesion_seg):
         t2_dict: dictionary containing the list of lesions, the volume of the lesions and the center of the lesions for the second time point
     """
 
-    #we first load data
+    # We first load data
     first_lesion_seg = nib.load(path_first_lesion_seg)
     first_lesion_seg_data = first_lesion_seg.get_fdata()
     second_lesion_seg = nib.load(path_second_lesion_seg)
     second_lesion_seg_data = second_lesion_seg.get_fdata()
 
-    #we get voxel volume
+    # We get voxel volume
     voxel_volume_t1 = np.prod(first_lesion_seg.header.get_zooms())
     voxel_volume_t2 = np.prod(second_lesion_seg.header.get_zooms())
 
-    #we iterate over the lesions of the first time point
+    # We iterate over the lesions of the first time point
     list_lesions_t1 = np.unique(first_lesion_seg_data)
     list_lesions_t1 = list_lesions_t1[1:] #removed 0
     volume_lesions_t1 = []
@@ -330,7 +369,7 @@ def comparison_lesions_t1_t2(path_first_lesion_seg, path_second_lesion_seg):
     rounded_list_t1 = [round(elem) for elem in list_lesions_t1]
     t1_dict = {"list_lesions": rounded_list_t1, "volume_lesions": volume_lesions_t1, "center": center_t1}
     
-    #we iterate over the lesions of the second time point
+    # We iterate over the lesions of the second time point
     list_lesions_t2 = np.unique(second_lesion_seg_data)
     list_lesions_t2 = list_lesions_t2[1:] #removed 0
     volume_lesions_t2 = []
@@ -355,49 +394,49 @@ def main():
     Returns:
         None
     """
-    
-    #get the parser
+
+    # Get the parser
     parser = get_parser()
     args = parser.parse_args()
 
-    #we first perform the registration of the images and the segmentations
-    # path_warp, path_warpinv, path_second_image_reg, path_second_lesion_seg_reg = perform_registration(args.input_first_image, args.input_second_image, args.segmentation_first_image, 
-    #                                                                                                     args.segmentation_second_image, args.output_folder)
-    path_second_lesion_seg_reg = args.output_folder + '/second_lesion_seg_registered.nii.gz'
+    # We first perform the registration of the images and the segmentations
+    path_warp, path_warpinv, path_second_image_reg, path_second_lesion_seg_reg = perform_registration(args.input_first_image, args.input_second_image, args.segmentation_first_image, 
+                                                                                                         args.segmentation_second_image, args.sc_segmentation_first_image,
+                                                                                                         args.sc_segmentation_second_image, args.output_folder)
     
-    #we then perform temporal lesion matching using the 1st time point lesion segmentation and the 2nd time point lesion segmentation registered to the 1st time point
+    # We then perform temporal lesion matching using the 1st time point lesion segmentation and the 2nd time point lesion segmentation registered to the 1st time point
     lesion_dict, final_first_lesion_seg, final_second_lesion_seg_reg = temporal_lesion_matching_on_reg_image(args.segmentation_first_image, path_second_lesion_seg_reg, args.output_folder)
     
-    #we then perform inverse warpping on the image 
-    # os.system('sct_apply_transfo -i ' + args.output_folder +'/second_lesion_seg_clustered.nii.gz' + ' -d ' + args.segmentation_second_image + ' -w ' + args.output_folder + '/warp_1st_to_2nd.nii.gz' 
-    #           + ' -o ' + args.output_folder + '/second_lesion_seg_inv_wrapped.nii.gz' + ' -x nn')
+    # We then perform inverse warpping on the image 
+    os.system('sct_apply_transfo -i ' + os.path.join(args.output_folder, 'second_lesion_seg_clustered.nii.gz') + ' -d ' + args.segmentation_second_image + ' -w ' + os.path.join(args.output_folder, 'warp_1st_to_2nd.nii.gz') 
+              + ' -o ' + os.path.join(args.output_folder, 'second_lesion_seg_inv_warpped.nii.gz') + ' -x nn')
     
-    #now we perform lesion matching between the 2nd time point lesion segmentation and the 2nd time point lesion segmentation inversely wrapped
-    t2_final_seg = lesion_matching(args.segmentation_second_image, args.output_folder + '/second_lesion_seg_inv_wrapped.nii.gz', args.output_folder)
+    # Now we perform lesion matching between the 2nd time point lesion segmentation and the 2nd time point lesion segmentation inversely warpped
+    t2_final_seg = lesion_matching(args.segmentation_second_image, os.path.join(args.output_folder, 'second_lesion_seg_inv_warpped.nii.gz'), args.output_folder)
 
-    #now we perform comparison of lesions from t1 and t2
+    # Now we perform comparison of lesions from t1 and t2
     t1_dict, t2_dict = comparison_lesions_t1_t2(args.segmentation_first_image,t2_final_seg)
 
-    #we then print the output file
+    # We then print the output file
     list_new_lesions = set(t2_dict['list_lesions']).difference(set(t1_dict['list_lesions']))
     common_lesions = list(set(t2_dict['list_lesions']).intersection(set(t1_dict['list_lesions'])))
     
     with open(args.output_folder + '/temporal_analysis_results.txt', 'w') as f:
         f.write('TEMPORAL ANALYSIS OF MULTIPLE SCLEROSIS LESIONS:\n')
         f.write('------------------------------------------------\n')
-        f.write('Subject Name: at t0 ' + args.input_first_image.split('/')[-1] + ' and at t1 ' + args.input_second_image.split('/')[-1] + ' \n')
+        f.write('Subject Name: at ses-0 ' + args.input_first_image.split('/')[-1] + ' and at ses-1 ' + args.input_second_image.split('/')[-1] + ' \n')
         f.write('------------------------------------------------\n')
-        f.write('Lesions at t0: ' + str(len(t1_dict['list_lesions'])) + '\n')
-        f.write('Lesions at t1: ' + str(len(t2_dict['list_lesions'])) + '\n')
+        f.write('Lesions at ses-0: ' + str(len(t1_dict['list_lesions'])) + '\n')
+        f.write('Lesions at ses-1: ' + str(len(t2_dict['list_lesions'])) + '\n')
         f.write('------------------------------------------------\n')
-        f.write('Lesions at t0: \n')
+        f.write('Lesions at ses-0: \n')
         for lesion in t1_dict['list_lesions']:
             f.write('Lesion ' + str(int(lesion)) + ' \n')
             f.write('Volume: ' + str(t1_dict['volume_lesions'][t1_dict['list_lesions'].index(lesion)]) + ' mm3\n')
             f.write('Center: ' + str(t1_dict['center'][t1_dict['list_lesions'].index(lesion)]) + '\n')
             f.write('\n')
         f.write('------------------------------------------------\n')
-        f.write('Lesions at t1: \n')
+        f.write('Lesions at ses-1: \n')
         for lesion in t2_dict['list_lesions']:
             f.write('Lesion ' + str(int(lesion)) + ' \n')
             f.write('Volume: ' + str(t2_dict['volume_lesions'][t2_dict['list_lesions'].index(lesion)]) + ' mm3\n')
@@ -407,8 +446,8 @@ def main():
         f.write('EVOLUTION:')
         for lesion in common_lesions:
             f.write('Lesion ' + str(int(lesion)) + ' \n')
-            f.write('Volume at t0: ' + str(t1_dict['volume_lesions'][t1_dict['list_lesions'].index(lesion)]) + ' mm3\n')
-            f.write('Volume at t1: ' + str(t2_dict['volume_lesions'][t2_dict['list_lesions'].index(lesion)]) + ' mm3\n')
+            f.write('Volume at ses-0: ' + str(t1_dict['volume_lesions'][t1_dict['list_lesions'].index(lesion)]) + ' mm3\n')
+            f.write('Volume at ses-1: ' + str(t2_dict['volume_lesions'][t2_dict['list_lesions'].index(lesion)]) + ' mm3\n')
             f.write('Volume increase in %: ' + str((t2_dict['volume_lesions'][t2_dict['list_lesions'].index(lesion)] - t1_dict['volume_lesions'][t1_dict['list_lesions'].index(lesion)])/t1_dict['volume_lesions'][t1_dict['list_lesions'].index(lesion)]*100) + ' %\n')
         f.write('------------------------------------------------\n')
 

From b19524570abafaee07ad0587d010a07943bf6a4d Mon Sep 17 00:00:00 2001
From: Pierre-Louis Benveniste <pierrelouis.benveniste03@gmail.com>
Date: Mon, 11 Sep 2023 17:49:51 -0400
Subject: [PATCH 17/26] data analysis of canproco

---
 data_analysis/data_analysis.py | 231 +++++++++++++++++++++++++++++++++
 1 file changed, 231 insertions(+)
 create mode 100644 data_analysis/data_analysis.py

diff --git a/data_analysis/data_analysis.py b/data_analysis/data_analysis.py
new file mode 100644
index 0000000..b4ce7c6
--- /dev/null
+++ b/data_analysis/data_analysis.py
@@ -0,0 +1,231 @@
+"""
+This python files performs data analysis on the canproco dataset.
+
+Args:
+    -d, --dataset-path: path to the dataset
+    -o, --output-path: path to the output directory
+
+Returns:
+    - a csv file containing the results of the analysis
+
+Example:
+    python data_analysis.py -d /path/to/dataset -o /path/to/output -c STIR,PSIR
+
+To do:
+    *
+
+Pierre-Louis Benveniste
+"""
+
+
+import argparse
+import os
+import json
+
+
+def get_parser():
+    """
+    This function parses the arguments given to the script.
+
+    Args:
+        None
+
+    Returns:
+        parser: parser containing the arguments
+    """
+
+    parser = argparse.ArgumentParser(description='Perform data analysis on the canproco dataset')
+    parser.add_argument('-d', '--dataset-path', type=str, required=True, help='path to the dataset')
+    parser.add_argument('-o', '--output-path', type=str, required=True, help='path to the output directory')
+   
+    return parser
+
+
+def main():
+    """
+    This function performs the data analysis.
+
+    Args:
+        None
+    
+    Returns:
+        None
+    """
+    # Get the parser
+    parser = get_parser()
+    args = parser.parse_args()
+
+    # Get the arguments
+    dataset_path = args.dataset_path
+    output_path = args.output_path
+
+    #time points (for now we only work on M0)
+    time_points = ['ses-M0', 'ses-M12']
+
+    # Get the list of subjects
+    subjects = os.listdir(dataset_path)
+    subjects = [subject for subject in subjects if 'sub-' in subject]
+    print("Total number of subjects: {}".format(len(subjects)))
+
+    #initialize lists
+    subjects_all_time_points = []
+    subjects_no_M0 = []
+    subjects_no_M12 = []
+    subjects_PSIR = []
+    subjects_STIR = []
+    subjects_PSIR_STIR = []
+    subjects_no_PSIR_no_STIR = []
+    subjects_no_PSIR_no_STIR_once = []
+
+    subjects_info = {}
+    
+    #Iterate over the subjects
+    for subject in subjects:
+        #iterate over the time_points
+        print("Subject: {}".format(subject))
+        sub_time_points = []
+        for time_point in time_points:
+             #if time_point exists for the subject
+            if os.path.exists(os.path.join(dataset_path, subject, time_point)):
+                sub_time_points.append(time_point)
+        print("Time points available: {}".format(sub_time_points))
+        #initialize the contrast_subject dictionary
+        contrast_subject = {}
+        for time_point in sub_time_points:
+            contrast_subject[time_point] = []
+        #iterate over the time points
+        for time_point in sub_time_points:
+            print("Time point: {}".format(time_point))
+            #get the MRI files for the subject
+            subject_path = os.path.join(dataset_path, subject, time_point, 'anat')
+            subject_files = os.listdir(subject_path)
+            subject_files = [file for file in subject_files if '.nii.gz' in file]
+            #we get the contrast for each file
+            for file in subject_files:
+                contrast_subject[time_point].append(file.split('_')[2].split('.')[0])
+            #we print the contrasts available for the subject
+            print("Contrasts available: {}".format(sorted(contrast_subject[time_point])))
+        print(contrast_subject)
+        print("-----------------------------------")
+        subject_info = {'subject': subject, 'time_points': sub_time_points, 'contrasts': contrast_subject}
+        subjects_info[subject] = subject_info
+
+        #we get the list of the subjects with all the time points
+        if len(sub_time_points) == len(time_points):
+            subjects_all_time_points.append(subject)
+        #we get the list of the subjects with no M0
+        if 'ses-M0' not in sub_time_points:
+            subjects_no_M0.append(subject)
+        #we get the list of the subjects with no M12
+        if 'ses-M12' not in sub_time_points:
+            subjects_no_M12.append(subject)
+
+        #we get the list of the subjects with PSIR at every time point that they have
+        psir_present = True
+        for time_point in sub_time_points:
+            if 'PSIR' not in contrast_subject[time_point]:
+                psir_present = False
+        if psir_present:
+            subjects_PSIR.append(subject)
+        #we get the list of the subjects with STIR at every time point that they have
+        stir_present = True
+        for time_point in sub_time_points:
+            if 'STIR' not in contrast_subject[time_point]:
+                stir_present = False
+        if stir_present:
+            subjects_STIR.append(subject)
+        #we get the list of the subjects with PSIR and STIR at every time point that they have
+        psir_stir_present = True
+        for time_point in sub_time_points:
+            if 'PSIR' not in contrast_subject[time_point] or 'STIR' not in contrast_subject[time_point]:
+                psir_stir_present = False
+        if psir_stir_present:
+            subjects_PSIR_STIR.append(subject)
+        #we get the list of the subjects with no PSIR and no STIR at every time point that they have
+        psir_stir_not_present = True
+        for time_point in sub_time_points:
+            if 'PSIR' in contrast_subject[time_point] or 'STIR' in contrast_subject[time_point]:
+                psir_stir_not_present = False
+        if psir_stir_not_present:
+            subjects_no_PSIR_no_STIR.append(subject)
+        #we get the list of the subjects with no PSIR and no STIR at least once
+        psir_stir_not_present_once = False
+        for time_point in sub_time_points:
+            if 'PSIR' not in contrast_subject[time_point] and 'STIR' not in contrast_subject[time_point]:
+                psir_stir_not_present_once = True
+        if psir_stir_not_present_once:
+            subjects_no_PSIR_no_STIR_once.append(subject)
+        
+    #we print the results
+    print("Total number of subjects: {}".format(len(subjects)))
+    print("Number of subjects with all time points: {}".format(len(subjects_all_time_points)))
+    print("Number of subjects with no M0: {}".format(len(subjects_no_M0)))
+    print("Number of subjects with no M12: {}".format(len(subjects_no_M12)))
+    print("Number of subjects with PSIR at every time point they have: {}".format(len(subjects_PSIR)))
+    print("Number of subjects with STIR at every time point they have: {}".format(len(subjects_STIR)))
+    print("Number of subjects with PSIR and STIR at every time point they have: {}".format(len(subjects_PSIR_STIR)))
+    print("Number of subjects with no PSIR and no STIR at every time point they have: {}".format(len(subjects_no_PSIR_no_STIR)))
+    print("Number of subjects with no PSIR and no STIR at least once: {}".format(len(subjects_no_PSIR_no_STIR_once)))
+    print("-----------------------------------")
+
+    #we save the subjects_info dictionary in a json file
+    with open(os.path.join(output_path, 'subjects_info.json'), 'w') as fp:
+        json.dump(subjects_info, fp, indent=4)
+    
+    #we write a txt file with the results
+    with open(os.path.join(output_path, 'results.txt'), 'w') as f:
+        f.write("Total number of subjects: {}\n".format(len(subjects)))
+        f.write("Number of subjects with all time points: {}\n".format(len(subjects_all_time_points)))
+        f.write("Number of subjects with no M0: {}\n".format(len(subjects_no_M0)))
+        f.write("Number of subjects with no M12: {}\n".format(len(subjects_no_M12)))
+        f.write("Number of subjects with PSIR at every time point they have: {}\n".format(len(subjects_PSIR)))
+        f.write("Number of subjects with STIR at every time point they have: {}\n".format(len(subjects_STIR)))
+        f.write("Number of subjects with PSIR and STIR at every time point they have: {}\n".format(len(subjects_PSIR_STIR)))
+        f.write("Number of subjects with no PSIR and no STIR at every time point they have: {}\n".format(len(subjects_no_PSIR_no_STIR)))
+        f.write("Number of subjects with no PSIR and no STIR at least once: {}\n".format(len(subjects_no_PSIR_no_STIR_once)))
+        f.write("-----------------------------------\n")
+        f.write("Subjects with all time points:\n")
+        for subject in subjects_all_time_points:
+            f.write("{}\n".format(subject))
+        f.write("-----------------------------------\n")
+        f.write("Subjects with no M0:\n")
+        for subject in subjects_no_M0:
+            f.write("{}\n".format(subject))
+        f.write("-----------------------------------\n")
+        f.write("Subjects with no M12:\n")
+        for subject in subjects_no_M12:
+            f.write("{}\n".format(subject))
+        f.write("-----------------------------------\n")
+        f.write("Subjects with PSIR at every time point they have:\n")
+        for subject in subjects_PSIR:
+            f.write("{}\n".format(subject))
+        f.write("-----------------------------------\n")
+        f.write("Subjects with STIR at every time point they have:\n")
+        for subject in subjects_STIR:
+            f.write("{}\n".format(subject))
+        f.write("-----------------------------------\n")
+        f.write("Subjects with PSIR and STIR at every time point they have:\n")
+        for subject in subjects_PSIR_STIR:
+            f.write("{}\n".format(subject))
+        f.write("-----------------------------------\n")
+        f.write("Subjects with no PSIR and no STIR at every time point they have:\n")
+        for subject in subjects_no_PSIR_no_STIR:
+            f.write("{}\n".format(subject))
+        f.write("-----------------------------------\n")
+        f.write("Subjects with no PSIR and no STIR at least once:\n")
+        for subject in subjects_no_PSIR_no_STIR_once:
+            f.write("{}\n".format(subject))
+        f.write("-----------------------------------\n")
+
+    return None
+
+
+if __name__ == '__main__':
+    
+    main()
+
+
+    
+
+
+

From c9ab8131b7406aa7c6cff4cac8f591f373b8bfb9 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Benveniste <pierrelouis.benveniste03@gmail.com>
Date: Tue, 12 Sep 2023 13:08:25 -0400
Subject: [PATCH 18/26] added healthy control analysis

---
 data_analysis/data_analysis.py | 93 +++++++++++++++++++++++++++++-----
 1 file changed, 80 insertions(+), 13 deletions(-)

diff --git a/data_analysis/data_analysis.py b/data_analysis/data_analysis.py
index b4ce7c6..1e97ed6 100644
--- a/data_analysis/data_analysis.py
+++ b/data_analysis/data_analysis.py
@@ -1,15 +1,19 @@
 """
 This python files performs data analysis on the canproco dataset.
+It details the number of subjects and the files available for each subject (for different time points and contrasts).
+It generates a text file containing the results of the analysis and a json file containing the information for each subject.
+It also looks into the participants.tsv file to get the list of healthy controls and see which contrats they have.
 
 Args:
     -d, --dataset-path: path to the dataset
     -o, --output-path: path to the output directory
+    -p, --participants-tsv-path: path to the participants.tsv file
 
 Returns:
     - a csv file containing the results of the analysis
 
 Example:
-    python data_analysis.py -d /path/to/dataset -o /path/to/output -c STIR,PSIR
+    python data_analysis.py -d /path/to/dataset -o /path/to/output -p /path/to/participants.tsv
 
 To do:
     *
@@ -37,10 +41,36 @@ def get_parser():
     parser = argparse.ArgumentParser(description='Perform data analysis on the canproco dataset')
     parser.add_argument('-d', '--dataset-path', type=str, required=True, help='path to the dataset')
     parser.add_argument('-o', '--output-path', type=str, required=True, help='path to the output directory')
+    parser.add_argument('-p', '--participants-tsv-path', type=str, required=True, help='path to the participants.tsv file')
    
     return parser
 
 
+def get_healthy_control(participants_tsv_path):
+    """
+    This function gets the list of healthy controls.
+
+    Args:
+        participants_tsv_path: path to the participants.tsv file
+    
+    Returns:
+        subjects: list of healthy controls
+    """
+    #initialize the list of subjects
+    subjects = []
+
+    #open the participants.tsv file
+    with open(participants_tsv_path, 'r') as f:
+        lines = f.readlines()
+        #iterate over the lines
+        for line in lines[1:]:
+            line = line.split('\t')
+            #if the subject is a healthy control
+            if line[3] == 'HC':
+                subjects.append(line[0])
+    return subjects
+
+
 def main():
     """
     This function performs the data analysis.
@@ -58,6 +88,10 @@ def main():
     # Get the arguments
     dataset_path = args.dataset_path
     output_path = args.output_path
+    participants_tsv_path = args.participants_tsv_path
+
+    # Get the list of subjects
+    healthy_controls = get_healthy_control(participants_tsv_path)
 
     #time points (for now we only work on M0)
     time_points = ['ses-M0', 'ses-M12']
@@ -65,7 +99,7 @@ def main():
     # Get the list of subjects
     subjects = os.listdir(dataset_path)
     subjects = [subject for subject in subjects if 'sub-' in subject]
-    print("Total number of subjects: {}".format(len(subjects)))
+    #print("Total number of subjects: {}".format(len(subjects)))
 
     #initialize lists
     subjects_all_time_points = []
@@ -76,26 +110,28 @@ def main():
     subjects_PSIR_STIR = []
     subjects_no_PSIR_no_STIR = []
     subjects_no_PSIR_no_STIR_once = []
+    subjects_hc_psir = []
+    subjects_hc_stir = []
 
     subjects_info = {}
     
     #Iterate over the subjects
     for subject in subjects:
         #iterate over the time_points
-        print("Subject: {}".format(subject))
+        #print("Subject: {}".format(subject))
         sub_time_points = []
         for time_point in time_points:
              #if time_point exists for the subject
             if os.path.exists(os.path.join(dataset_path, subject, time_point)):
                 sub_time_points.append(time_point)
-        print("Time points available: {}".format(sub_time_points))
+        #print("Time points available: {}".format(sub_time_points))
         #initialize the contrast_subject dictionary
         contrast_subject = {}
         for time_point in sub_time_points:
             contrast_subject[time_point] = []
         #iterate over the time points
         for time_point in sub_time_points:
-            print("Time point: {}".format(time_point))
+            #print("Time point: {}".format(time_point))
             #get the MRI files for the subject
             subject_path = os.path.join(dataset_path, subject, time_point, 'anat')
             subject_files = os.listdir(subject_path)
@@ -104,10 +140,13 @@ def main():
             for file in subject_files:
                 contrast_subject[time_point].append(file.split('_')[2].split('.')[0])
             #we print the contrasts available for the subject
-            print("Contrasts available: {}".format(sorted(contrast_subject[time_point])))
-        print(contrast_subject)
-        print("-----------------------------------")
-        subject_info = {'subject': subject, 'time_points': sub_time_points, 'contrasts': contrast_subject}
+            #print("Contrasts available: {}".format(sorted(contrast_subject[time_point])))
+        #print(contrast_subject)
+        #print("-----------------------------------")
+        sub_hc = False
+        if subject in healthy_controls:
+            sub_hc = True
+        subject_info = {'subject': subject, 'time_points': sub_time_points, 'contrasts': contrast_subject, 'healthy_control': sub_hc}
         subjects_info[subject] = subject_info
 
         #we get the list of the subjects with all the time points
@@ -155,9 +194,24 @@ def main():
                 psir_stir_not_present_once = True
         if psir_stir_not_present_once:
             subjects_no_PSIR_no_STIR_once.append(subject)
+        #we get the number of subjects with PSIR at every time point that they have
+        hc_psir_present = True
+        for time_point in sub_time_points:
+            if 'PSIR' not in contrast_subject[time_point] or subject not in healthy_controls:
+                hc_psir_present = False
+        if hc_psir_present:
+            subjects_hc_psir.append(subject)
+        #we get the number of subjects with STIR at every time point that they have
+        hc_stir_present = True
+        for time_point in sub_time_points:
+            if 'STIR' not in contrast_subject[time_point] or subject not in healthy_controls:
+                hc_stir_present = False
+        if hc_stir_present:
+            subjects_hc_psir.append(subject)
         
     #we print the results
     print("Total number of subjects: {}".format(len(subjects)))
+    print("Number of healthy controls: {}".format(len(healthy_controls)))
     print("Number of subjects with all time points: {}".format(len(subjects_all_time_points)))
     print("Number of subjects with no M0: {}".format(len(subjects_no_M0)))
     print("Number of subjects with no M12: {}".format(len(subjects_no_M12)))
@@ -166,6 +220,8 @@ def main():
     print("Number of subjects with PSIR and STIR at every time point they have: {}".format(len(subjects_PSIR_STIR)))
     print("Number of subjects with no PSIR and no STIR at every time point they have: {}".format(len(subjects_no_PSIR_no_STIR)))
     print("Number of subjects with no PSIR and no STIR at least once: {}".format(len(subjects_no_PSIR_no_STIR_once)))
+    print("Number of healthy control with PSIR at every time point they have: {}".format(len(subjects_hc_psir)))
+    print("Number of healthy control with STIR at every time point they have: {}".format(len(subjects_hc_stir)))
     print("-----------------------------------")
 
     #we save the subjects_info dictionary in a json file
@@ -175,6 +231,7 @@ def main():
     #we write a txt file with the results
     with open(os.path.join(output_path, 'results.txt'), 'w') as f:
         f.write("Total number of subjects: {}\n".format(len(subjects)))
+        f.write("Number of healthy controls: {}".format(len(healthy_controls)))
         f.write("Number of subjects with all time points: {}\n".format(len(subjects_all_time_points)))
         f.write("Number of subjects with no M0: {}\n".format(len(subjects_no_M0)))
         f.write("Number of subjects with no M12: {}\n".format(len(subjects_no_M12)))
@@ -183,11 +240,17 @@ def main():
         f.write("Number of subjects with PSIR and STIR at every time point they have: {}\n".format(len(subjects_PSIR_STIR)))
         f.write("Number of subjects with no PSIR and no STIR at every time point they have: {}\n".format(len(subjects_no_PSIR_no_STIR)))
         f.write("Number of subjects with no PSIR and no STIR at least once: {}\n".format(len(subjects_no_PSIR_no_STIR_once)))
+        f.write("Number of healthy control with PSIR at every time point they have: {}\n".format(len(subjects_hc_psir)))
+        f.write("Number of healthy control with STIR at every time point they have: {}\n".format(len(subjects_hc_stir)))
         f.write("-----------------------------------\n")
         f.write("Subjects with all time points:\n")
         for subject in subjects_all_time_points:
             f.write("{}\n".format(subject))
         f.write("-----------------------------------\n")
+        f.write("Healthy controls:\n")
+        for subject in healthy_controls:
+            f.write("{}\n".format(subject))
+        f.write("-----------------------------------\n")
         f.write("Subjects with no M0:\n")
         for subject in subjects_no_M0:
             f.write("{}\n".format(subject))
@@ -216,6 +279,14 @@ def main():
         for subject in subjects_no_PSIR_no_STIR_once:
             f.write("{}\n".format(subject))
         f.write("-----------------------------------\n")
+        f.write("Healthy control with PSIR at every time point they have:\n")
+        for subject in subjects_hc_psir:
+            f.write("{}\n".format(subject))
+        f.write("-----------------------------------\n")
+        f.write("Healthy control with STIR at every time point they have:\n")
+        for subject in subjects_hc_stir:
+            f.write("{}\n".format(subject))
+        f.write("-----------------------------------\n")
 
     return None
 
@@ -225,7 +296,3 @@ def main():
     main()
 
 
-    
-
-
-

From 11a1f5b045cd0c9ef10a24141d90ec05c66c1c3b Mon Sep 17 00:00:00 2001
From: Pierre-Louis Benveniste <pierrelouis.benveniste03@gmail.com>
Date: Tue, 12 Sep 2023 17:11:00 -0400
Subject: [PATCH 19/26] modified to select wanted contrast

---
 nnunet/convert_bids_to_nnunet.py | 102 +++++++++++++++++++++----------
 1 file changed, 69 insertions(+), 33 deletions(-)

diff --git a/nnunet/convert_bids_to_nnunet.py b/nnunet/convert_bids_to_nnunet.py
index 03db7e9..816de66 100644
--- a/nnunet/convert_bids_to_nnunet.py
+++ b/nnunet/convert_bids_to_nnunet.py
@@ -13,6 +13,7 @@
     --taskname: Specify the task name - usually the anatomy to be segmented, e.g. Hippocampus
     --tasknumber : Specify the task number, has to be greater than 100 but less than 999
     --label-folder : Path to the label folders in derivatives (default='labels')
+    --contrasts : Contrasts used for the images (default='PSIR') (separated by a comma)
 
 Returns:
     None
@@ -34,35 +35,59 @@
 import nibabel as nib
 import numpy as np
 
-# parse command line arguments
-parser = argparse.ArgumentParser(description='Convert BIDS-structured database to nnUNet format.')
-parser.add_argument('--path-data', required=True,
-                    help='Path to BIDS structured dataset. Accepts both cross-sectional and longitudinal datasets')
-parser.add_argument('--path-out', help='Path to output directory.', required=True)
-parser.add_argument('--taskname', default='MSSpineLesion', type=str,
-                    help='Specify the task name - usually the anatomy to be segmented, e.g. Hippocampus',)
-parser.add_argument('--tasknumber', default=501,type=int, 
-                    help='Specify the task number, has to be greater than 500 but less than 999. e.g 502')
-parser.add_argument('--label-folder', help='Path to the label folders in derivatives', default='labels', type=str)
-
-args = parser.parse_args()
-
-path_in_images = Path(args.path_data)
-label_folder = args.label_folder
-path_in_labels = Path(os.path.join(args.path_data, 'derivatives', label_folder))
-path_out = Path(os.path.join(os.path.abspath(args.path_out), f'Dataset{args.tasknumber}_{args.taskname}'))
-
-# define paths for train and test folders 
-path_out_imagesTr = Path(os.path.join(path_out, 'imagesTr'))
-path_out_imagesTs = Path(os.path.join(path_out, 'imagesTs'))
-path_out_labelsTr = Path(os.path.join(path_out, 'labelsTr'))
-path_out_labelsTs = Path(os.path.join(path_out, 'labelsTs'))
-
-# we load both train and validation set into the train images as nnunet uses cross-fold-validation
-train_images, train_labels = [], []
-test_images, test_labels = [], []
 
-if __name__ == '__main__':
+def get_parser():
+    """
+    This function parses the command line arguments and returns an argparse object.
+
+    Input:
+        None
+
+    Returns:
+        parser : argparse object
+    """
+    parser = argparse.ArgumentParser(description='Convert BIDS-structured database to nnUNet format.')
+    parser.add_argument('--path-data', required=True,
+                        help='Path to BIDS structured dataset. Accepts both cross-sectional and longitudinal datasets')
+    parser.add_argument('--path-out', help='Path to output directory.', required=True)
+    parser.add_argument('--taskname', default='MSSpineLesion', type=str,
+                        help='Specify the task name - usually the anatomy to be segmented, e.g. Hippocampus',)
+    parser.add_argument('--tasknumber', default=501,type=int, 
+                        help='Specify the task number, has to be greater than 500 but less than 999. e.g 502')
+    parser.add_argument('--label-folder', help='Path to the label folders in derivatives', default='labels', type=str)
+    parser.add_argument('--contrasts', help='Contrast used for the images', default='PSIR', type=str)
+    return parser
+
+
+def main():
+    """
+    This function is the main function of the script. It converts the data from BIDS to nnU-Net format.
+
+    Input:
+        None
+
+    Returns:
+        None
+    """
+    #------------- PARSE COMMAND LINE ARGUMENTS --------------------------
+    parser = get_parser()
+    args = parser.parse_args()
+
+    path_in_images = Path(args.path_data)
+    label_folder = args.label_folder
+    path_in_labels = Path(os.path.join(args.path_data, 'derivatives', label_folder))
+    path_out = Path(os.path.join(os.path.abspath(args.path_out), f'Dataset{args.tasknumber}_{args.taskname}'))
+    contrasts = args.contrasts.split(',')
+
+    # define paths for train and test folders 
+    path_out_imagesTr = Path(os.path.join(path_out, 'imagesTr'))
+    path_out_imagesTs = Path(os.path.join(path_out, 'imagesTs'))
+    path_out_labelsTr = Path(os.path.join(path_out, 'labelsTr'))
+    path_out_labelsTs = Path(os.path.join(path_out, 'labelsTs'))
+
+    # we load both train and validation set into the train images as nnunet uses cross-fold-validation
+    train_images, train_labels = [], []
+    test_images, test_labels = [], []
 
     # make the directories
     pathlib.Path(path_out).mkdir(parents=True, exist_ok=True)
@@ -76,8 +101,11 @@
     #------------- EXTRACTION OF THE LABELED IMAGES NAMES--------------------------
     labelled_imgs = []
     
-    # We first extract all the label files' names    
-    label_files = sorted(list(path_in_labels.rglob('*_STIR_lesion-manual.nii.gz')) + list(path_in_labels.rglob('*PSIR_lesion-manual.nii.gz') ))
+    # We first extract all the label files' names for every wanted contrast
+    label_files = []
+    for contrast in contrasts:
+        label_files += list(path_in_labels.rglob(f'*_{contrast}_lesion-manual.nii.gz'))
+    label_files = sorted(label_files)   
     labelled_imgs += [str(k) for k in label_files]
     
     #--------------- DISPACTH OF LABELLED IMAGES AND UNLABELED IMAGES ------------------- 
@@ -89,7 +117,10 @@
     valid_test_imgs = []
 
     #The image folders
-    image_files = sorted(list(path_in_images.rglob('*_STIR.nii.gz')) + list(path_in_images.rglob('*_PSIR.nii.gz')))
+    image_files = []
+    for contrast in contrasts:
+        image_files += list(path_in_images.rglob(f'*_{contrast}.nii.gz'))
+    imahe_files = sorted(image_files)
     
     for image_file in image_files:
 
@@ -155,7 +186,6 @@
     json_dict['reference'] = "TBD"
     json_dict['licence'] = "TBD"
     json_dict['release'] = "0.0"
-
     
     # Because only using one modality  
     ## was changed from 'modality' to 'channel_names'
@@ -187,4 +217,10 @@
     # nn-unet requires it to be "dataset.json"
     dataset_dict_name = f"dataset.json"
     with open(os.path.join(path_out, dataset_dict_name), "w") as outfile:
-        outfile.write(json_object)
\ No newline at end of file
+        outfile.write(json_object)
+
+    return None
+
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file

From ee3e865ce3b06278564ed757e01ebf5aecb84b73 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Benveniste <pierrelouis.benveniste03@gmail.com>
Date: Tue, 12 Sep 2023 17:14:12 -0400
Subject: [PATCH 20/26] added image of poor quality

---
 etc/exclude.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/etc/exclude.yml b/etc/exclude.yml
index 74ad5a6..fbd3b57 100644
--- a/etc/exclude.yml
+++ b/etc/exclude.yml
@@ -3,3 +3,5 @@
 - sub-cal209_ses-M0   # Missing T2w file
 # MT
 - sub-cal161          # Missing MT data
+# PSIR
+- sub-mon118          # poor image quality

From af03d7e24ef79a815e6cc89b75368ae4948ce609 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Benveniste <pierrelouis.benveniste03@gmail.com>
Date: Thu, 14 Sep 2023 14:18:19 -0400
Subject: [PATCH 21/26] removed useless line

---
 nnunet/convert_bids_to_nnunet.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nnunet/convert_bids_to_nnunet.py b/nnunet/convert_bids_to_nnunet.py
index 816de66..b4a222c 100644
--- a/nnunet/convert_bids_to_nnunet.py
+++ b/nnunet/convert_bids_to_nnunet.py
@@ -120,7 +120,7 @@ def main():
     image_files = []
     for contrast in contrasts:
         image_files += list(path_in_images.rglob(f'*_{contrast}.nii.gz'))
-    imahe_files = sorted(image_files)
+    image_files = sorted(image_files)
     
     for image_file in image_files:
 

From 4b7d247a5349f22f17a85d63dea6a2706ab2259f Mon Sep 17 00:00:00 2001
From: Pierre-Louis Benveniste <pierrelouis.benveniste03@gmail.com>
Date: Thu, 14 Sep 2023 14:19:08 -0400
Subject: [PATCH 22/26] extract labeled slices and convert to nnunet format

---
 nnunet/extract_slices.py | 243 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 243 insertions(+)
 create mode 100644 nnunet/extract_slices.py

diff --git a/nnunet/extract_slices.py b/nnunet/extract_slices.py
new file mode 100644
index 0000000..495149c
--- /dev/null
+++ b/nnunet/extract_slices.py
@@ -0,0 +1,243 @@
+"""
+This script is used to extract slices from the 3D images and labels and save them as 2D images and labels.
+The 2D images are stored in the nnU-Net format, i.e. in the folders imagesTr and imagesTs.
+
+Input:
+    -data_path: path to the data folder
+    -label_folder: name of the folder containing the labels
+    -out_path: path to the output folder
+    -taskname: name of the task
+    -tasknumber: number of the task
+    -contrasts: contrasts used for the images
+
+Output:
+    None
+
+To do:
+    -
+
+Pierre-Louis Benveniste
+"""
+
+import argparse
+import pathlib
+from pathlib import Path
+import json
+import os
+import nibabel as nib
+import shutil
+from collections import OrderedDict
+import numpy as np
+
+
+def get_parser():
+    """
+    This function parses the command line arguments and returns an argparse object.
+
+    Input:
+        None
+
+    Returns:
+        parser : argparse object
+    """
+    parser = argparse.ArgumentParser(description='Extract labeled slices from 3D images and labels.')
+    parser.add_argument('--path-data', required=True,
+                        help='Path to BIDS structured dataset. Accepts both cross-sectional and longitudinal datasets')
+    parser.add_argument('--label-folder', help='Path to the label folders in derivatives', default='labels', type=str)
+    parser.add_argument('--path-out', help='Path to output directory.', required=True)
+    parser.add_argument('--taskname', default='MSSpineLesion', type=str,
+                        help='Specify the task name - usually the anatomy to be segmented, e.g. Hippocampus',)
+    parser.add_argument('--tasknumber', default=501,type=int, 
+                        help='Specify the task number, has to be greater than 500 but less than 999. e.g 502')
+    parser.add_argument('--contrasts', help='Contrast used for the images', default='PSIR', type=str)
+    return parser
+
+
+def main():
+    """
+    This function is the main function of the script. It extracts the annotated slices and converts the data from BIDS to nnU-Net format.
+
+    Input:
+        None
+
+    Returns:
+        None
+    """
+    # parse command line arguments
+    parser = get_parser()
+    args = parser.parse_args()
+
+    path_in_images = Path(args.path_data)
+    label_folder = args.label_folder
+    path_in_labels = Path(os.path.join(args.path_data, 'derivatives', label_folder))
+    path_out = Path(os.path.join(os.path.abspath(args.path_out), f'Dataset{args.tasknumber}_{args.taskname}'))
+    contrasts = args.contrasts.split(',')
+
+    # define paths for train and test folders 
+    path_out_imagesTr = Path(os.path.join(path_out, 'imagesTr'))
+    path_out_imagesTs = Path(os.path.join(path_out, 'imagesTs'))
+    path_out_labelsTr = Path(os.path.join(path_out, 'labelsTr'))
+    path_out_labelsTs = Path(os.path.join(path_out, 'labelsTs'))
+
+    # we load both train and validation set into the train images as nnunet uses cross-fold-validation
+    train_images, train_labels = [], []
+    test_images, test_labels = [], []
+
+    # make the directories
+    pathlib.Path(path_out).mkdir(parents=True, exist_ok=True)
+    pathlib.Path(path_out_imagesTr).mkdir(parents=True, exist_ok=True)
+    pathlib.Path(path_out_imagesTs).mkdir(parents=True, exist_ok=True)
+    pathlib.Path(path_out_labelsTr).mkdir(parents=True, exist_ok=True)
+    pathlib.Path(path_out_labelsTs).mkdir(parents=True, exist_ok=True)
+
+    conversion_dict = {}
+
+    #------------- EXTRACTION OF THE LABELED IMAGES NAMES--------------------------
+    labelled_imgs = []
+    
+    # We first extract all the label files' names for every wanted contrast
+    label_files = []
+    for contrast in contrasts:
+        label_files += list(path_in_labels.rglob(f'*_{contrast}_lesion-manual.nii.gz'))
+    label_files = sorted(label_files)   
+    labelled_imgs += [str(k) for k in label_files]
+    
+
+    #Initialise the number of scans in train and in test folder
+    scan_cnt_train, scan_cnt_test = 0, 0
+
+    valid_train_imgs = []
+    valid_test_imgs = []
+
+    #The image folders
+    image_files = []
+    for contrast in contrasts:
+        image_files += list(path_in_images.rglob(f'*_{contrast}.nii.gz'))
+    image_files = sorted(image_files)
+    
+    for image_file in image_files:
+
+        file_id = str(image_file).rsplit('_',1)[0].split('/')[-1] + "_"
+        
+        if (file_id in str(labelled_imgs)):
+            label_file = [k for k in labelled_imgs if file_id in k][0]
+
+            #open the image and the label
+            image = nib.load(image_file)
+            image_data = image.get_fdata()
+            label = nib.load(label_file)
+            label_data = label.get_fdata()
+            nb_slices = image_data.shape[2]
+
+            for slice in range(nb_slices):
+                #we check if the label slice is empty or not
+                label_slice = np.asarray(label.dataobj)[:,:,slice]
+                if np.sum(label_slice)!=0 :
+                    #we add the slice to the train folder
+                    scan_cnt_train+= 1
+
+                    image_file_nnunet = os.path.join(path_out_imagesTr,f'{args.taskname}_{scan_cnt_train:03d}_0000.nii.gz')
+                    label_file_nnunet = os.path.join(path_out_labelsTr,f'{args.taskname}_{scan_cnt_train:03d}.nii.gz')
+            
+                    train_images.append(str(image_file_nnunet))
+                    train_labels.append(str(label_file_nnunet))
+
+                    #create the image_slice_file and the label_slice_file
+                    image_slice = np.asarray(image.dataobj)[:,:,slice]
+                    image_slice = np.expand_dims(image_slice, axis=2)
+                    image_slice_file = nib.Nifti1Image(image_slice, affine=image.affine)
+                    nib.save(image_slice_file, str(image_file_nnunet))
+                    label_slice = np.asarray(label.dataobj)[:,:,slice]
+                    label_slice = np.expand_dims(label_slice, axis=2)
+                    label_slice_file = nib.Nifti1Image(label_slice, affine=label.affine)
+                    nib.save(label_slice_file, str(label_file_nnunet))
+
+                    conversion_dict[str(os.path.abspath(image_file))] = image_file_nnunet
+                    conversion_dict[str(os.path.abspath(label_file))] = label_file_nnunet
+        else:
+
+            #open the image and the label
+            image = nib.load(image_file)
+            image_data = image.get_fdata()
+            nb_slices = image_data.shape[2]
+
+            for slice in range(nb_slices):
+                #we add the slice to the test folder
+                scan_cnt_test += 1
+                # create the new convention names
+                image_file_nnunet = os.path.join(path_out_imagesTs,f'{args.taskname}_{scan_cnt_test:03d}_0000.nii.gz')
+                label_file_nnunet = os.path.join(path_out_labelsTs,f'{args.taskname}_{scan_cnt_test:03d}.nii.gz')
+
+                test_images.append(str(image_file_nnunet))
+                test_labels.append(str(label_file_nnunet))
+
+                #create the image_slice_file
+                image_slice = np.asarray(image.dataobj)[:,:,slice]
+                image_slice = np.expand_dims(image_slice, axis=2)
+                image_slice_file = nib.Nifti1Image(image_slice, affine=image.affine)
+                nib.save(image_slice_file, str(image_file_nnunet))
+
+                conversion_dict[str(os.path.abspath(image_file))] = image_file_nnunet
+    
+    #Display of number of training and number of testing images
+    print("Number of images for training: " + str(scan_cnt_train))
+    print("Number of images for testing: " + str(scan_cnt_test))
+
+    #----------------- CREATION OF THE DICTIONNARY-----------------------------------
+    # create dataset_description.json
+    json_object = json.dumps(conversion_dict, indent=4)
+    # write to dataset description
+    conversion_dict_name = f"conversion_dict.json"
+    with open(os.path.join(path_out, conversion_dict_name), "w") as outfile:
+        outfile.write(json_object)
+
+
+    # c.f. dataset json generation. This contains the metadata for the dataset that nnUNet uses during preprocessing and training
+    # general info : https://github.com/MIC-DKFZ/nnUNet/blob/master/nnunet/dataset_conversion/utils.py
+    # example: https://github.com/MIC-DKFZ/nnUNet/blob/master/nnunet/dataset_conversion/Task055_SegTHOR.py
+
+    json_dict = OrderedDict()
+    json_dict['name'] = args.taskname
+    json_dict['description'] = args.taskname
+    json_dict['tensorImageSize'] = "2D"
+    json_dict['reference'] = "TBD"
+    json_dict['licence'] = "TBD"
+    json_dict['release'] = "0.0"
+    
+    # Because only using one modality  
+    ## was changed from 'modality' to 'channel_names'
+    json_dict['channel_names'] = {
+            "0": "PSIR",
+        }
+    
+    # 0 is always the background. Any class labels should start from 1.
+    json_dict['labels'] = {
+        "background" : "0",
+        "MS Lesion" : "1" ,
+    }
+   
+    json_dict['numTraining'] = scan_cnt_train
+    json_dict['numTest'] = scan_cnt_test
+    #Newly required field in the json file with v2
+    json_dict["file_ending"] = ".nii.gz"
+
+    json_dict['training'] = [{'image': str(train_labels[i]).replace("labelsTr", "imagesTr") , "label": train_labels[i] }
+                                 for i in range(len(train_images))]
+    # Note: See https://github.com/MIC-DKFZ/nnUNet/issues/407 for how this should be described
+
+    #Removed because useless in this case
+    json_dict['test'] = [str(test_labels[i]).replace("labelsTs", "imagesTs") for i in range(len(test_images))]
+
+    # create dataset_description.json
+    json_object = json.dumps(json_dict, indent=4)
+    # write to dataset description
+    # nn-unet requires it to be "dataset.json"
+    dataset_dict_name = f"dataset.json"
+    with open(os.path.join(path_out, dataset_dict_name), "w") as outfile:
+        outfile.write(json_object)
+
+    return None
+
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file

From 6406dac3dcfab3ef20fd0d39658c66c6f8f324a3 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Benveniste <pierrelouis.benveniste03@gmail.com>
Date: Thu, 14 Sep 2023 17:18:25 -0400
Subject: [PATCH 23/26] renamed file to explain conversion to nnunet format

---
 ...ces.py => extract_slices_and_convert_bids_to_nnunet.py} | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)
 rename nnunet/{extract_slices.py => extract_slices_and_convert_bids_to_nnunet.py} (97%)

diff --git a/nnunet/extract_slices.py b/nnunet/extract_slices_and_convert_bids_to_nnunet.py
similarity index 97%
rename from nnunet/extract_slices.py
rename to nnunet/extract_slices_and_convert_bids_to_nnunet.py
index 495149c..940bdfb 100644
--- a/nnunet/extract_slices.py
+++ b/nnunet/extract_slices_and_convert_bids_to_nnunet.py
@@ -3,9 +3,9 @@
 The 2D images are stored in the nnU-Net format, i.e. in the folders imagesTr and imagesTs.
 
 Input:
-    -data_path: path to the data folder
+    -path-data: path to the data folder
     -label_folder: name of the folder containing the labels
-    -out_path: path to the output folder
+    -path-out: path to the output folder
     -taskname: name of the task
     -tasknumber: number of the task
     -contrasts: contrasts used for the images
@@ -16,6 +16,9 @@
 To do:
     -
 
+Example:
+    python extract_slices_and_convert_bids_to_nnunet.py --path-data /path/to/data_extracted --path-out /path/to/nnUNet_raw --taskname TASK-NAME --tasknumber DATASET-ID --label-folder labels --contrasts PSIR
+
 Pierre-Louis Benveniste
 """
 

From accea6e315f4c2984be0763b3108165942db2950 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Benveniste <pierrelouis.benveniste03@gmail.com>
Date: Thu, 14 Sep 2023 18:02:23 -0400
Subject: [PATCH 24/26] extract slice and sc_seg for region-based training

---
 ...gion-based_sc_seg_convert_nnunet_format.py | 276 ++++++++++++++++++
 1 file changed, 276 insertions(+)
 create mode 100644 nnunet/extract_slice_region-based_sc_seg_convert_nnunet_format.py

diff --git a/nnunet/extract_slice_region-based_sc_seg_convert_nnunet_format.py b/nnunet/extract_slice_region-based_sc_seg_convert_nnunet_format.py
new file mode 100644
index 0000000..9211395
--- /dev/null
+++ b/nnunet/extract_slice_region-based_sc_seg_convert_nnunet_format.py
@@ -0,0 +1,276 @@
+"""
+This script is used to extract slices from the 3D images and labels and save them as 2D images and labels.
+For each of these slices, the script segments the spinal cord and saves the images and multi-labels in the nnU-Net format.
+The images are stored in the nnU-Net format, i.e. in the folders imagesTr and imagesTs.
+
+Input:
+    -path-data: path to the data folder
+    -label_folder: name of the folder containing the labels
+    -path-out: path to the output folder
+    -taskname: name of the task
+    -tasknumber: number of the task
+    -contrasts: contrasts used for the images
+    -qc-folder: path to the quality control folder
+
+Output:
+    None
+
+To do:
+    -
+
+Example:
+    python nnunet/extract_slice_region-based_sc_seg_convert_nnunet_format.py --path-data /path/to/data_extracted --path-out /path/to/nnUNet_raw --taskname TASK-NAME --tasknumber DATASET-ID --label-folder labels --contrasts PSIR --qc_folder /path/to/qc/folder  
+
+Pierre-Louis Benveniste
+"""
+
+import argparse
+import pathlib
+from pathlib import Path
+import json
+import os
+import nibabel as nib
+import shutil
+from collections import OrderedDict
+import numpy as np
+
+
+def get_parser():
+    """
+    This function parses the command line arguments and returns an argparse object.
+
+    Input:
+        None
+
+    Returns:
+        parser : argparse object
+    """
+    parser = argparse.ArgumentParser(description='Extract labeled slices from 3D images and labels.')
+    parser.add_argument('--path-data', required=True,
+                        help='Path to BIDS structured dataset. Accepts both cross-sectional and longitudinal datasets')
+    parser.add_argument('--label-folder', help='Path to the label folders in derivatives', default='labels', type=str)
+    parser.add_argument('--path-out', help='Path to output directory.', required=True)
+    parser.add_argument('--taskname', default='MSSpineLesion', type=str,
+                        help='Specify the task name - usually the anatomy to be segmented, e.g. Hippocampus',)
+    parser.add_argument('--tasknumber', default=501,type=int, 
+                        help='Specify the task number, has to be greater than 500 but less than 999. e.g 502')
+    parser.add_argument('--contrasts', help='Contrast used for the images', default='PSIR', type=str)
+    parser.add_argument('--qc-folder', help='Path to the quality control folder', required=True)
+    return parser
+
+
+def main():
+    """
+    This function is the main function of the script. It extracts the annotated slices and converts the data from BIDS to nnU-Net format.
+
+    Input:
+        None
+
+    Returns:
+        None
+    """
+    #parameters
+    label_threshold = 0.0001
+
+    # parse command line arguments
+    parser = get_parser()
+    args = parser.parse_args()
+
+    path_in_images = Path(args.path_data)
+    label_folder = args.label_folder
+    path_in_labels = Path(os.path.join(args.path_data, 'derivatives', label_folder))
+    path_out = Path(os.path.join(os.path.abspath(args.path_out), f'Dataset{args.tasknumber}_{args.taskname}'))
+    contrasts = args.contrasts.split(',')
+    qc_folder = args.qc_folder
+
+    # define paths for train and test folders 
+    path_out_imagesTr = Path(os.path.join(path_out, 'imagesTr'))
+    path_out_imagesTs = Path(os.path.join(path_out, 'imagesTs'))
+    path_out_labelsTr = Path(os.path.join(path_out, 'labelsTr'))
+    path_out_labelsTs = Path(os.path.join(path_out, 'labelsTs'))
+
+    # we load both train and validation set into the train images as nnunet uses cross-fold-validation
+    train_images, train_labels = [], []
+    test_images, test_labels = [], []
+
+    # make the directories
+    pathlib.Path(path_out).mkdir(parents=True, exist_ok=True)
+    pathlib.Path(path_out_imagesTr).mkdir(parents=True, exist_ok=True)
+    pathlib.Path(path_out_imagesTs).mkdir(parents=True, exist_ok=True)
+    pathlib.Path(path_out_labelsTr).mkdir(parents=True, exist_ok=True)
+    pathlib.Path(path_out_labelsTs).mkdir(parents=True, exist_ok=True)
+
+    conversion_dict = {}
+
+    #------------- EXTRACTION OF THE LABELED IMAGES NAMES--------------------------
+    labelled_imgs = []
+    
+    # We first extract all the label files' names for every wanted contrast
+    label_files = []
+    for contrast in contrasts:
+        label_files += list(path_in_labels.rglob(f'*_{contrast}_lesion-manual.nii.gz'))
+    label_files = sorted(label_files)   
+    labelled_imgs += [str(k) for k in label_files]
+    
+
+    #Initialise the number of scans in train and in test folder
+    scan_cnt_train, scan_cnt_test = 0, 0
+
+    valid_train_imgs = []
+    valid_test_imgs = []
+
+    #The image folders
+    image_files = []
+    for contrast in contrasts:
+        image_files += list(path_in_images.rglob(f'*_{contrast}.nii.gz'))
+    image_files = sorted(image_files)
+    
+    for image_file in image_files:
+
+        file_id = str(image_file).rsplit('_',1)[0].split('/')[-1] + "_"
+        
+        if (file_id in str(labelled_imgs)):
+            label_file = [k for k in labelled_imgs if file_id in k][0]
+
+            #open the image and the label
+            image = nib.load(image_file)
+            image_data = image.get_fdata()
+            label = nib.load(label_file)
+            label_data = label.get_fdata()
+            nb_slices = image_data.shape[2]
+
+
+            ###VERIFIER SI LE FICHIER LABEL EST VIDE OU PAS
+            #SI PAS VIDE: FAIRE SEGMENTATION
+            #et ensuite faire sur chaque slice
+
+            for slice in range(nb_slices):
+                #we check if the label slice is empty or not
+                label_slice = np.asarray(label.dataobj)[:,:,slice]
+                if np.sum(label_slice)!=0 :
+                    #we add the slice to the train folder
+                    scan_cnt_train+= 1
+
+                    image_file_nnunet = os.path.join(path_out_imagesTr,f'{args.taskname}_{scan_cnt_train:03d}_0000.nii.gz')
+                    label_file_nnunet = os.path.join(path_out_labelsTr,f'{args.taskname}_{scan_cnt_train:03d}.nii.gz')
+            
+                    train_images.append(str(image_file_nnunet))
+                    train_labels.append(str(label_file_nnunet))
+
+                    #create the image_slice_file and the label_slice_file
+                    image_slice = np.asarray(image.dataobj)[:,:,slice]
+                    image_slice = np.expand_dims(image_slice, axis=2)
+                    image_slice_file = nib.Nifti1Image(image_slice, affine=image.affine)
+                    nib.save(image_slice_file, str(image_file_nnunet))
+
+                    #we segment the spinal cord
+                    sc_seg_file_path = os.path.join(qc_folder,f'{args.taskname}_{scan_cnt_train:03d}_sc_seg.nii.gz')
+                    os.system(f'sct_propseg -i {image_file} -o {sc_seg_file_path} -c t1 ')
+                    #generate the quality control
+                    os.system(f'sct_qc -i {image_file} -s {sc_seg_file_path} -d {sc_seg_file_path} -p sct_deepseg_lesion -plane sagittal -qc {qc_folder}')
+
+                    label_slice = np.asarray(label.dataobj)[:,:,slice]
+                    label_slice = np.expand_dims(label_slice, axis=2)
+                    label_slice = np.where(label_slice > label_threshold, 1, 0)
+
+                    #we create the multi-label
+                    sc_seg = nib.load(sc_seg_file_path)
+                    sc_seg = np.asarray(sc_seg.dataobj)[:,:,slice]
+                    sc_seg = np.where(sc_seg > label_threshold, 1, 0)
+                    
+                    multi_label_slice = np.zeros(image_slice.shape)
+                    multi_label_slice[sc_seg==1] = 1
+                    multi_label_slice[label_slice==1] = 2
+
+                    label_slice_file = nib.Nifti1Image(multi_label_slice, affine=label.affine)
+                    nib.save(label_slice_file, str(label_file_nnunet))
+
+                    conversion_dict[str(os.path.abspath(image_file))] = image_file_nnunet
+                    conversion_dict[str(os.path.abspath(label_file))] = label_file_nnunet
+        else:
+
+            #open the image and the label
+            image = nib.load(image_file)
+            image_data = image.get_fdata()
+            nb_slices = image_data.shape[2]
+
+            for slice in range(nb_slices):
+                #we add the slice to the test folder
+                scan_cnt_test += 1
+                # create the new convention names
+                image_file_nnunet = os.path.join(path_out_imagesTs,f'{args.taskname}_{scan_cnt_test:03d}_0000.nii.gz')
+                label_file_nnunet = os.path.join(path_out_labelsTs,f'{args.taskname}_{scan_cnt_test:03d}.nii.gz')
+
+                test_images.append(str(image_file_nnunet))
+                test_labels.append(str(label_file_nnunet))
+
+                #create the image_slice_file
+                image_slice = np.asarray(image.dataobj)[:,:,slice]
+                image_slice = np.expand_dims(image_slice, axis=2)
+                image_slice_file = nib.Nifti1Image(image_slice, affine=image.affine)
+                nib.save(image_slice_file, str(image_file_nnunet))
+
+                conversion_dict[str(os.path.abspath(image_file))] = image_file_nnunet
+    
+    #Display of number of training and number of testing images
+    print("Number of images for training: " + str(scan_cnt_train))
+    print("Number of images for testing: " + str(scan_cnt_test))
+
+    #----------------- CREATION OF THE DICTIONNARY-----------------------------------
+    # create dataset_description.json
+    json_object = json.dumps(conversion_dict, indent=4)
+    # write to dataset description
+    conversion_dict_name = f"conversion_dict.json"
+    with open(os.path.join(path_out, conversion_dict_name), "w") as outfile:
+        outfile.write(json_object)
+
+
+    # c.f. dataset json generation. This contains the metadata for the dataset that nnUNet uses during preprocessing and training
+    # general info : https://github.com/MIC-DKFZ/nnUNet/blob/master/nnunet/dataset_conversion/utils.py
+    # example: https://github.com/MIC-DKFZ/nnUNet/blob/master/nnunet/dataset_conversion/Task055_SegTHOR.py
+
+    json_dict = OrderedDict()
+    json_dict['name'] = args.taskname
+    json_dict['description'] = args.taskname
+    json_dict['tensorImageSize'] = "2D"
+    json_dict['reference'] = "TBD"
+    json_dict['licence'] = "TBD"
+    json_dict['release'] = "0.0"
+    
+    # Because only using one modality  
+    ## was changed from 'modality' to 'channel_names'
+    json_dict['channel_names'] = {
+            "0": "PSIR",
+        }
+    
+    # 0 is always the background. Any class labels should start from 1.
+    json_dict['labels'] = {
+        "background" : "0",
+        "MS Lesion" : "1" ,
+    }
+   
+    json_dict['numTraining'] = scan_cnt_train
+    json_dict['numTest'] = scan_cnt_test
+    #Newly required field in the json file with v2
+    json_dict["file_ending"] = ".nii.gz"
+
+    json_dict['training'] = [{'image': str(train_labels[i]).replace("labelsTr", "imagesTr") , "label": train_labels[i] }
+                                 for i in range(len(train_images))]
+    # Note: See https://github.com/MIC-DKFZ/nnUNet/issues/407 for how this should be described
+
+    #Removed because useless in this case
+    json_dict['test'] = [str(test_labels[i]).replace("labelsTs", "imagesTs") for i in range(len(test_images))]
+
+    # create dataset_description.json
+    json_object = json.dumps(json_dict, indent=4)
+    # write to dataset description
+    # nn-unet requires it to be "dataset.json"
+    dataset_dict_name = f"dataset.json"
+    with open(os.path.join(path_out, dataset_dict_name), "w") as outfile:
+        outfile.write(json_object)
+
+    return None
+
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file

From f18e8462b73e8250c3b5854f7aecc81421785a39 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Benveniste <pierrelouis.benveniste03@gmail.com>
Date: Fri, 15 Sep 2023 14:23:57 -0400
Subject: [PATCH 25/26] finished file for sc seg, slice extraction and
 conversion to nnunet format

---
 ...gion-based_sc_seg_convert_nnunet_format.py | 98 +++++++++----------
 1 file changed, 49 insertions(+), 49 deletions(-)

diff --git a/nnunet/extract_slice_region-based_sc_seg_convert_nnunet_format.py b/nnunet/extract_slice_region-based_sc_seg_convert_nnunet_format.py
index 9211395..39fb9b8 100644
--- a/nnunet/extract_slice_region-based_sc_seg_convert_nnunet_format.py
+++ b/nnunet/extract_slice_region-based_sc_seg_convert_nnunet_format.py
@@ -139,54 +139,51 @@ def main():
             label_data = label.get_fdata()
             nb_slices = image_data.shape[2]
 
-
-            ###VERIFIER SI LE FICHIER LABEL EST VIDE OU PAS
-            #SI PAS VIDE: FAIRE SEGMENTATION
-            #et ensuite faire sur chaque slice
-
-            for slice in range(nb_slices):
-                #we check if the label slice is empty or not
-                label_slice = np.asarray(label.dataobj)[:,:,slice]
-                if np.sum(label_slice)!=0 :
-                    #we add the slice to the train folder
-                    scan_cnt_train+= 1
-
-                    image_file_nnunet = os.path.join(path_out_imagesTr,f'{args.taskname}_{scan_cnt_train:03d}_0000.nii.gz')
-                    label_file_nnunet = os.path.join(path_out_labelsTr,f'{args.taskname}_{scan_cnt_train:03d}.nii.gz')
-            
-                    train_images.append(str(image_file_nnunet))
-                    train_labels.append(str(label_file_nnunet))
-
-                    #create the image_slice_file and the label_slice_file
-                    image_slice = np.asarray(image.dataobj)[:,:,slice]
-                    image_slice = np.expand_dims(image_slice, axis=2)
-                    image_slice_file = nib.Nifti1Image(image_slice, affine=image.affine)
-                    nib.save(image_slice_file, str(image_file_nnunet))
-
-                    #we segment the spinal cord
-                    sc_seg_file_path = os.path.join(qc_folder,f'{args.taskname}_{scan_cnt_train:03d}_sc_seg.nii.gz')
-                    os.system(f'sct_propseg -i {image_file} -o {sc_seg_file_path} -c t1 ')
-                    #generate the quality control
-                    os.system(f'sct_qc -i {image_file} -s {sc_seg_file_path} -d {sc_seg_file_path} -p sct_deepseg_lesion -plane sagittal -qc {qc_folder}')
-
+            if (np.sum(label_data)!=0 ):
+
+                #we segment the spinal cord
+                sc_seg_file_path = os.path.join(qc_folder,f'{args.taskname}_{scan_cnt_train:03d}_sc_seg.nii.gz')
+                os.system(f'sct_propseg -i {image_file} -o {sc_seg_file_path} -c t1 -v 0')
+                #generate the quality control
+                os.system(f'sct_qc -i {image_file} -s {sc_seg_file_path} -d {sc_seg_file_path} -p sct_deepseg_lesion -plane sagittal -qc {qc_folder}')
+                
+                for slice in range(nb_slices):
+                    #we check if the label slice is empty or not
                     label_slice = np.asarray(label.dataobj)[:,:,slice]
-                    label_slice = np.expand_dims(label_slice, axis=2)
-                    label_slice = np.where(label_slice > label_threshold, 1, 0)
-
-                    #we create the multi-label
-                    sc_seg = nib.load(sc_seg_file_path)
-                    sc_seg = np.asarray(sc_seg.dataobj)[:,:,slice]
-                    sc_seg = np.where(sc_seg > label_threshold, 1, 0)
-                    
-                    multi_label_slice = np.zeros(image_slice.shape)
-                    multi_label_slice[sc_seg==1] = 1
-                    multi_label_slice[label_slice==1] = 2
-
-                    label_slice_file = nib.Nifti1Image(multi_label_slice, affine=label.affine)
-                    nib.save(label_slice_file, str(label_file_nnunet))
-
-                    conversion_dict[str(os.path.abspath(image_file))] = image_file_nnunet
-                    conversion_dict[str(os.path.abspath(label_file))] = label_file_nnunet
+                    if np.sum(label_slice)!=0 :
+                        #we add the slice to the train folder
+                        scan_cnt_train+= 1
+                        print("Number of images for training: ", scan_cnt_train)
+
+                        image_file_nnunet = os.path.join(path_out_imagesTr,f'{args.taskname}_{scan_cnt_train:03d}_0000.nii.gz')
+                        label_file_nnunet = os.path.join(path_out_labelsTr,f'{args.taskname}_{scan_cnt_train:03d}.nii.gz')
+                
+                        train_images.append(str(image_file_nnunet))
+                        train_labels.append(str(label_file_nnunet))
+
+                        #create the image_slice_file and the label_slice_file
+                        image_slice = np.asarray(image.dataobj)[:,:,slice]
+                        image_slice = np.expand_dims(image_slice, axis=2)
+                        image_slice_file = nib.Nifti1Image(image_slice, affine=image.affine)
+                        nib.save(image_slice_file, str(image_file_nnunet))
+                        label_slice = np.asarray(label.dataobj)[:,:,slice]
+                        label_slice = np.expand_dims(label_slice, axis=2)
+                        label_slice = np.where(label_slice > label_threshold, 1, 0)
+
+                        #we create the multi-label
+                        sc_seg = nib.load(sc_seg_file_path)
+                        sc_seg = np.asarray(sc_seg.dataobj)[:,:,slice]
+                        sc_seg = np.where(sc_seg > label_threshold, 1, 0)
+                        
+                        multi_label_slice = np.zeros(image_slice.shape)
+                        multi_label_slice[sc_seg==1] = 1
+                        multi_label_slice[label_slice==1] = 2
+
+                        multi_label_slice_file = nib.Nifti1Image(multi_label_slice, affine=label.affine)
+                        nib.save(multi_label_slice_file, str(label_file_nnunet))
+
+                        conversion_dict[str(os.path.abspath(image_file))] = image_file_nnunet
+                        conversion_dict[str(os.path.abspath(label_file))] = label_file_nnunet
         else:
 
             #open the image and the label
@@ -245,9 +242,12 @@ def main():
     
     # 0 is always the background. Any class labels should start from 1.
     json_dict['labels'] = {
-        "background" : "0",
-        "MS Lesion" : "1" ,
+        "background" : 0,
+        "Spinal cord" : [1, 2] ,
+        "Lesion" : [2]
     }
+
+    json_dict['regions_class_order'] = [1,2]
    
     json_dict['numTraining'] = scan_cnt_train
     json_dict['numTest'] = scan_cnt_test

From cf8ca41447bf989c8fa3f6e124045c939c0ea809 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Benveniste <pierrelouis.benveniste03@gmail.com>
Date: Fri, 15 Sep 2023 14:26:07 -0400
Subject: [PATCH 26/26] code for sc seg on 3d and conversion to nnunet format

---
 nnunet/seg_sc_and_convert_bids_to_nnunet.py | 252 ++++++++++++++++++++
 1 file changed, 252 insertions(+)
 create mode 100644 nnunet/seg_sc_and_convert_bids_to_nnunet.py

diff --git a/nnunet/seg_sc_and_convert_bids_to_nnunet.py b/nnunet/seg_sc_and_convert_bids_to_nnunet.py
new file mode 100644
index 0000000..479aeee
--- /dev/null
+++ b/nnunet/seg_sc_and_convert_bids_to_nnunet.py
@@ -0,0 +1,252 @@
+"""Convert data from BIDS to nnU-Net format
+This scripts was adapted from a script by the following authors : Julian McGinnis, Naga Karthik 
+This python script converts data from the BIDS format to the nnU-Net format in order to be able to perform pre-processing, training and inference.
+This script is specifically designed to the canproco dataset. It converts the dataset from a BIDS format to a nnU-Net format. 
+This script is specific in that it takes all labeled data for training and validation. Because, very little labelled data is available, no labelled data is used for testing.
+
+Example of run:
+    $ python nnunet/seg_sc_and_convert_bids_to_nnunet.py --path-data /path/to/data_extracted --path-out /path/to/nnUNet_raw --taskname TASK-NAME --tasknumber DATASET-ID --contrasts PSIR --qc-folder /path/to/qc_folder
+
+Arguments:
+    --path-data : Path to BIDS structured dataset. Accepts both cross-sectional and longitudinal datasets
+    --path-out : Path to output directory.
+    --taskname: Specify the task name - usually the anatomy to be segmented, e.g. Hippocampus
+    --tasknumber : Specify the task number, has to be greater than 100 but less than 999
+    --label-folder : Path to the label folders in derivatives (default='labels')
+    --contrasts : Contrasts used for the images (default='PSIR') (separated by a comma)
+    --qc-folder : Path to the qc_folder
+
+Returns:
+    None
+    
+Todo:
+    * 
+Pierre-Louis Benveniste
+"""
+
+import argparse
+import pathlib
+from pathlib import Path
+import json
+import os
+import shutil
+from collections import OrderedDict
+from tqdm import tqdm
+
+import nibabel as nib
+import numpy as np
+
+
+def get_parser():
+    """
+    This function parses the command line arguments and returns an argparse object.
+
+    Input:
+        None
+
+    Returns:
+        parser : argparse object
+    """
+    parser = argparse.ArgumentParser(description='Convert BIDS-structured database to nnUNet format.')
+    parser.add_argument('--path-data', required=True,
+                        help='Path to BIDS structured dataset. Accepts both cross-sectional and longitudinal datasets')
+    parser.add_argument('--path-out', help='Path to output directory.', required=True)
+    parser.add_argument('--taskname', default='MSSpineLesion', type=str,
+                        help='Specify the task name - usually the anatomy to be segmented, e.g. Hippocampus',)
+    parser.add_argument('--tasknumber', default=501,type=int, 
+                        help='Specify the task number, has to be greater than 500 but less than 999. e.g 502')
+    parser.add_argument('--label-folder', help='Path to the label folders in derivatives', default='labels', type=str)
+    parser.add_argument('--contrasts', help='Contrast used for the images', default='PSIR', type=str)
+    parser.add_argument('--qc-folder', help='Path to the quality control folder', required=True)
+    return parser
+
+
+def main():
+    """
+    This function is the main function of the script. It converts the data from BIDS to nnU-Net format.
+
+    Input:
+        None
+
+    Returns:
+        None
+    """
+    #------------- PARSE COMMAND LINE ARGUMENTS --------------------------
+    parser = get_parser()
+    args = parser.parse_args()
+
+    path_in_images = Path(args.path_data)
+    label_folder = args.label_folder
+    path_in_labels = Path(os.path.join(args.path_data, 'derivatives', label_folder))
+    path_out = Path(os.path.join(os.path.abspath(args.path_out), f'Dataset{args.tasknumber}_{args.taskname}'))
+    contrasts = args.contrasts.split(',')
+    qc_folder = args.qc_folder
+
+    # define paths for train and test folders 
+    path_out_imagesTr = Path(os.path.join(path_out, 'imagesTr'))
+    path_out_imagesTs = Path(os.path.join(path_out, 'imagesTs'))
+    path_out_labelsTr = Path(os.path.join(path_out, 'labelsTr'))
+    path_out_labelsTs = Path(os.path.join(path_out, 'labelsTs'))
+
+    # we load both train and validation set into the train images as nnunet uses cross-fold-validation
+    train_images, train_labels = [], []
+    test_images, test_labels = [], []
+
+    #fixed parameters
+    label_threshold = 0.001
+
+    # make the directories
+    pathlib.Path(path_out).mkdir(parents=True, exist_ok=True)
+    pathlib.Path(path_out_imagesTr).mkdir(parents=True, exist_ok=True)
+    pathlib.Path(path_out_imagesTs).mkdir(parents=True, exist_ok=True)
+    pathlib.Path(path_out_labelsTr).mkdir(parents=True, exist_ok=True)
+    pathlib.Path(path_out_labelsTs).mkdir(parents=True, exist_ok=True)
+
+    conversion_dict = {}
+
+    #------------- EXTRACTION OF THE LABELED IMAGES NAMES--------------------------
+    labelled_imgs = []
+    
+    # We first extract all the label files' names for every wanted contrast
+    label_files = []
+    for contrast in contrasts:
+        label_files += list(path_in_labels.rglob(f'*_{contrast}_lesion-manual.nii.gz'))
+    label_files = sorted(label_files)   
+    labelled_imgs += [str(k) for k in label_files]
+    
+    #--------------- DISPACTH OF LABELLED IMAGES AND UNLABELED IMAGES ------------------- 
+    
+    #Initialise the number of scans in train and in test folder
+    scan_cnt_train, scan_cnt_test = 0, 0
+
+    valid_train_imgs = []
+    valid_test_imgs = []
+
+    #The image folders
+    image_files = []
+    for contrast in contrasts:
+        image_files += list(path_in_images.rglob(f'*_{contrast}.nii.gz'))
+    image_files = sorted(image_files)
+    
+    for image_file in image_files:
+
+
+        file_id = str(image_file).rsplit('_',1)[0].split('/')[-1] + "_"
+        
+        if (file_id in str(labelled_imgs)):
+            label_file = [k for k in labelled_imgs if file_id in k][0]
+
+            #we segment the spinal cord
+            sc_seg_file_path = os.path.join(qc_folder,f'{args.taskname}_{scan_cnt_train:03d}_sc_seg.nii.gz')
+            os.system(f'sct_propseg -i {image_file} -o {sc_seg_file_path} -c t1 -v 0')
+            #generate the quality control
+            os.system(f'sct_qc -i {image_file} -s {sc_seg_file_path} -d {sc_seg_file_path} -p sct_deepseg_lesion -plane sagittal -qc {qc_folder}')
+
+            scan_cnt_train+= 1
+
+            image_file_nnunet = os.path.join(path_out_imagesTr,f'{args.taskname}_{scan_cnt_train:03d}_0000.nii.gz')
+            label_file_nnunet = os.path.join(path_out_labelsTr,f'{args.taskname}_{scan_cnt_train:03d}.nii.gz')
+            
+            train_images.append(str(image_file_nnunet))
+            train_labels.append(str(label_file_nnunet))
+
+            #we create the multi-label
+            sc_seg = nib.load(sc_seg_file_path)
+            sc_seg = np.asarray(sc_seg.dataobj)
+            sc_seg = np.where(sc_seg > label_threshold, 1, 0)
+
+            label_file_data = nib.load(label_file)
+            label_file_data = np.asarray(label_file_data.dataobj)
+            label_file_data = np.where(label_file_data > label_threshold, 1, 0)
+            
+            multi_label_slice = np.zeros(label_file_data.shape)
+            multi_label_slice[sc_seg==1] = 1
+            multi_label_slice[label_file_data==1] = 2
+
+            # copy the files to new structure
+            shutil.copyfile(image_file, image_file_nnunet)
+            shutil.copyfile(label_file, label_file_nnunet)
+           
+            conversion_dict[str(os.path.abspath(image_file))] = image_file_nnunet
+            conversion_dict[str(os.path.abspath(label_file))] = label_file_nnunet
+        else:
+                            
+            # Repeat the above procedure for testing
+            scan_cnt_test += 1
+            # create the new convention names
+            image_file_nnunet = os.path.join(path_out_imagesTs,f'{args.taskname}_{scan_cnt_test:03d}_0000.nii.gz')
+            label_file_nnunet = os.path.join(path_out_labelsTs,f'{args.taskname}_{scan_cnt_test:03d}.nii.gz')
+
+            test_images.append(str(image_file_nnunet))
+            test_labels.append(str(label_file_nnunet))
+
+            # copy the files to new structure
+            shutil.copyfile(image_file, image_file_nnunet)
+
+            conversion_dict[str(os.path.abspath(image_file))] = image_file_nnunet
+
+    #Display of number of training and number of testing images
+    print("Number of images for training: " + str(scan_cnt_train))
+    print("Number of images for testing: " + str(scan_cnt_test))
+
+    #----------------- CREATION OF THE DICTIONNARY-----------------------------------
+    # create dataset_description.json
+    json_object = json.dumps(conversion_dict, indent=4)
+    # write to dataset description
+    conversion_dict_name = f"conversion_dict.json"
+    with open(os.path.join(path_out, conversion_dict_name), "w") as outfile:
+        outfile.write(json_object)
+
+
+    # c.f. dataset json generation. This contains the metadata for the dataset that nnUNet uses during preprocessing and training
+    # general info : https://github.com/MIC-DKFZ/nnUNet/blob/master/nnunet/dataset_conversion/utils.py
+    # example: https://github.com/MIC-DKFZ/nnUNet/blob/master/nnunet/dataset_conversion/Task055_SegTHOR.py
+
+    json_dict = OrderedDict()
+    json_dict['name'] = args.taskname
+    json_dict['description'] = args.taskname
+    json_dict['tensorImageSize'] = "3D"
+    json_dict['reference'] = "TBD"
+    json_dict['licence'] = "TBD"
+    json_dict['release'] = "0.0"
+    
+    # Because only using one modality  
+    ## was changed from 'modality' to 'channel_names'
+    json_dict['channel_names'] = {
+            "0": "T1w",
+        }
+    
+    # 0 is always the background. Any class labels should start from 1.
+    json_dict['labels'] = {
+        "background" : 0,
+        "Spinal cord" : [1, 2] ,
+        "Lesion" : [2]
+    }
+
+    json_dict['regions_class_order'] = [1,2]
+   
+    json_dict['numTraining'] = scan_cnt_train
+    json_dict['numTest'] = scan_cnt_test
+    #Newly required field in the json file with v2
+    json_dict["file_ending"] = ".nii.gz"
+
+    json_dict['training'] = [{'image': str(train_labels[i]).replace("labelsTr", "imagesTr") , "label": train_labels[i] }
+                                 for i in range(len(train_images))]
+    # Note: See https://github.com/MIC-DKFZ/nnUNet/issues/407 for how this should be described
+
+    #Removed because useless in this case
+    json_dict['test'] = [str(test_labels[i]).replace("labelsTs", "imagesTs") for i in range(len(test_images))]
+
+    # create dataset_description.json
+    json_object = json.dumps(json_dict, indent=4)
+    # write to dataset description
+    # nn-unet requires it to be "dataset.json"
+    dataset_dict_name = f"dataset.json"
+    with open(os.path.join(path_out, dataset_dict_name), "w") as outfile:
+        outfile.write(json_object)
+
+    return None
+
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file