diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index fb6dfe6..bc32605 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -20,7 +20,7 @@ jobs: python-version: "3.8" - name: Install lint dependencies - run: pip install wheel setuptools black==22.3.0 isort==5.10.1 flake8==4.0.1 + run: pip install wheel setuptools isort==5.10.1 flake8==4.0.1 black==22.3.0 "black[jupyter]" - name: Lint the code run: sh shell/lint.sh diff --git a/FindOptimumNumberOfClasses.py b/FindOptimumNumberOfClasses.py index 84f377e..85b0f7b 100644 --- a/FindOptimumNumberOfClasses.py +++ b/FindOptimumNumberOfClasses.py @@ -1,17 +1,17 @@ import os +import random + import numpy as np -import tensorflow as tf +from sklearn.metrics import silhouette_score from tensorflow.keras.applications import MobileNetV2 -from tensorflow.keras.preprocessing import image from tensorflow.keras.applications.mobilenet_v2 import preprocess_input -from scipy.spatial.distance import cdist -from sklearn.metrics import silhouette_score -import random +from tensorflow.keras.preprocessing import image + class ImageProcessor: def __init__(self, image_directory): self.image_directory = image_directory - self.model = MobileNetV2(weights='imagenet', include_top=False, pooling='avg') + self.model = MobileNetV2(weights="imagenet", include_top=False, pooling="avg") def load_and_preprocess_image(self, img_path): img = image.load_img(img_path, target_size=(224, 224)) @@ -29,6 +29,7 @@ def extract_features(self): filenames.append(filename) return np.array(features), filenames + class GeneticAlgorithm: def __init__(self, population_size, generations, mutation_rate, max_clusters): self.population_size = population_size @@ -37,7 +38,10 @@ def __init__(self, population_size, generations, mutation_rate, max_clusters): self.max_clusters = max_clusters def initialize_population(self, num_images): - return [np.random.randint(1, min(i + 2, self.max_clusters + 1), size=num_images) for i in range(self.population_size)] + return [ + np.random.randint(1, min(i + 2, self.max_clusters + 1), size=num_images) + for i in range(self.population_size) + ] def fitness(self, individual, features): try: @@ -62,7 +66,7 @@ def evolve(self, features): for generation in range(self.generations): fitness_scores = [self.fitness(ind, features) for ind in population] sorted_indices = np.argsort(fitness_scores) - best_individuals = [population[idx] for idx in sorted_indices[-(self.population_size // 2):]] + best_individuals = [population[idx] for idx in sorted_indices[-(self.population_size // 2) :]] next_generation = best_individuals[:] while len(next_generation) < self.population_size: @@ -77,6 +81,7 @@ def evolve(self, features): return max(population, key=lambda ind: self.fitness(ind, features)) + class ImageClassifier: def __init__(self, image_directory, output_file): self.processor = ImageProcessor(image_directory) @@ -89,11 +94,11 @@ def run(self): self.output_classification(optimal_classes, filenames) def output_classification(self, classes, filenames): - with open(self.output_file, 'w') as file: + with open(self.output_file, "w") as file: for filename, cluster in zip(filenames, classes): file.write(f"{filename}, {cluster}\n") if __name__ == "__main__": - classifier = ImageClassifier('path_to_images', 'output.txt') + classifier = ImageClassifier("path_to_images", "output.txt") classifier.run() diff --git a/README.md b/README.md index 60fcdd5..85af065 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ pip install -r requirements.txt First install linting dependencies: ``` -pip install black==22.3.0 isort==5.10.1 flake8==4.0.1 +pip install isort==5.10.1 flake8==4.0.1 black==22.3.0 "black[jupyter]" ``` Then run linting test by: diff --git a/notebooks/ManyShotTransferLearning.ipynb b/notebooks/ManyShotTransferLearning.ipynb index 6091b90..55514bc 100644 --- a/notebooks/ManyShotTransferLearning.ipynb +++ b/notebooks/ManyShotTransferLearning.ipynb @@ -20,7 +20,7 @@ "from tensorflow.keras.callbacks import Callback, EarlyStopping, ModelCheckpoint\n", "from MLD import multi_lens_distortion\n", "\n", - "os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' \n", + "os.environ[\"TF_FORCE_GPU_ALLOW_GROWTH\"] = \"true\"\n", "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"1\" # Select GPU" ] }, @@ -33,10 +33,11 @@ "IMG_SIZE = (224, 224)\n", "IMG_SHAPE = IMG_SIZE + (3,)\n", "\n", + "\n", "def network_1():\n", " # Load pre-trained DenseNet201 and ResNet101V2 models\n", - " dense_net_full = tf.keras.applications.DenseNet201(input_shape=IMG_SHAPE, include_top=False, weights='imagenet')\n", - " res_net_full = tf.keras.applications.ResNet101V2(input_shape=IMG_SHAPE, include_top=False, weights='imagenet')\n", + " dense_net_full = tf.keras.applications.DenseNet201(input_shape=IMG_SHAPE, include_top=False, weights=\"imagenet\")\n", + " res_net_full = tf.keras.applications.ResNet101V2(input_shape=IMG_SHAPE, include_top=False, weights=\"imagenet\")\n", "\n", " # Create a new model with only the first 54 layers of DenseNet201\n", " dense_net = tf.keras.Model(inputs=dense_net_full.input, outputs=dense_net_full.layers[178].output)\n", @@ -60,18 +61,19 @@ "\n", " # Dense layers for classification\n", " z = layers.Dropout(0.4)(concatenated)\n", - " z = layers.Dense(512, activation='relu')(z)\n", + " z = layers.Dense(512, activation=\"relu\")(z)\n", " # z = layers.Dropout(0.2)(z)\n", - " z = layers.Dense(2, activation='softmax')(z)\n", + " z = layers.Dense(2, activation=\"softmax\")(z)\n", "\n", " # Final model\n", " model = Model(inputs=input, outputs=z)\n", - " model.compile(optimizer=optimizers.Adam(1e-4), loss=\"CategoricalCrossentropy\", metrics=['accuracy'])\n", + " model.compile(optimizer=optimizers.Adam(1e-4), loss=\"CategoricalCrossentropy\", metrics=[\"accuracy\"])\n", "\n", " model.summary()\n", " return model\n", "\n", - "model = network_1()\n" + "\n", + "model = network_1()" ] }, { @@ -81,8 +83,9 @@ "outputs": [], "source": [ "from os import walk\n", - "filenames = next(walk('./NLCB/Data3/'), (None, None, []))[2] # [] if no file\n", - "filenames_val = next(walk('./NLCB/Data3/Validation/'), (None, None, []))[2] # [] if no file" + "\n", + "filenames = next(walk(\"./NLCB/Data3/\"), (None, None, []))[2] # [] if no file\n", + "filenames_val = next(walk(\"./NLCB/Data3/Validation/\"), (None, None, []))[2] # [] if no file" ] }, { @@ -92,21 +95,20 @@ "outputs": [], "source": [ "def custom_data_generator(directory):\n", - " for filepath in glob.glob(os.path.join(directory, '*.png')): # assuming jpeg images\n", + " for filepath in glob.glob(os.path.join(directory, \"*.png\")): # assuming jpeg images\n", " image = tf.io.read_file(filepath)\n", " image = tf.image.decode_jpeg(image, channels=3)\n", " label = []\n", - " label[0] = 1 if filepath[4] == 'n' else 0 # Check the 5th character from the end for 'n'\n", - " label[1] = 0 if filepath[4] != 'n' else 1\n", + " label[0] = 1 if filepath[4] == \"n\" else 0 # Check the 5th character from the end for 'n'\n", + " label[1] = 0 if filepath[4] != \"n\" else 1\n", " yield image, label\n", "\n", "\n", - "\n", "def custom_preprocessing_function(img):\n", "\n", - " if tf.random.uniform((), minval= 0, maxval=1) > 0.5:\n", + " if tf.random.uniform((), minval=0, maxval=1) > 0.5:\n", " nbr_rot = tf.random.uniform(shape=[], minval=1, maxval=4, dtype=tf.int32)\n", - " img =tf.image.rot90(img, k=nbr_rot)\n", + " img = tf.image.rot90(img, k=nbr_rot)\n", "\n", " img = tf.image.random_hue(img, 0.08)\n", " img = tf.image.random_contrast(img, 0.7, 1.3)\n", @@ -117,20 +119,18 @@ " # print(img.shape)\n", " # img = tf.image.random_crop(img, (int(img.shape[0]/2),int(img.shape[1]/2), 3))\n", " img = tf.image.random_crop(img, (224, 224, 3))\n", - " img = img/255.\n", - " img = tf.image.resize(img,(224,224))\n", - " img = tf.numpy_function(\n", - " multi_lens_distortion, \n", - " [img, 4, (80, 110), (-0.4, 0.4)], \n", - " tf.uint8\n", - " )\n", + " img = img / 255.0\n", + " img = tf.image.resize(img, (224, 224))\n", + " img = tf.numpy_function(multi_lens_distortion, [img, 4, (80, 110), (-0.4, 0.4)], tf.uint8)\n", "\n", " return img\n", "\n", + "\n", "def validation_preprocessing_function(img):\n", " # img = tf.image.random_crop(img, (224, 224, 3))\n", - " img = img/255.\n", - " img = tf.image.resize(img,(224,224))\n", + " img = img / 255.0\n", + " img = tf.image.resize(img, (224, 224))\n", + "\n", "\n", "# Paths\n", "train_data_dir = \"./NLCB/Data3/Training/\"\n", @@ -142,7 +142,7 @@ " shear_range=0.2,\n", " zoom_range=0.2,\n", " horizontal_flip=True,\n", - " preprocessing_function=custom_preprocessing_function # Add more augmentations here\n", + " preprocessing_function=custom_preprocessing_function, # Add more augmentations here\n", ")\n", "\n", "# Create a data generator for validation data\n", @@ -150,21 +150,12 @@ "\n", "# Use custom data generator for training and validation datasets\n", "train_generator = train_datagen.flow_from_directory(\n", - " train_data_dir,\n", - " target_size=(224, 224),\n", - " batch_size=16,\n", - " class_mode='categorical',\n", - " shuffle=True\n", + " train_data_dir, target_size=(224, 224), batch_size=16, class_mode=\"categorical\", shuffle=True\n", ")\n", "\n", "validation_generator = validation_datagen.flow_from_directory(\n", - " validation_data_dir,\n", - " target_size=(224, 224),\n", - " batch_size=16,\n", - " class_mode='categorical',\n", - " shuffle=False\n", - ")\n", - "\n" + " validation_data_dir, target_size=(224, 224), batch_size=16, class_mode=\"categorical\", shuffle=False\n", + ")" ] }, { @@ -173,7 +164,7 @@ "metadata": {}, "outputs": [], "source": [ - "model.compile(optimizer=optimizers.Adamax(1e-4), loss=\"CategoricalCrossentropy\", metrics=['accuracy'])\n", + "model.compile(optimizer=optimizers.Adamax(1e-4), loss=\"CategoricalCrossentropy\", metrics=[\"accuracy\"])\n", "\n", "# # considering you want to monitor accuracy:\n", "# acc_thresh = 0.95\n", @@ -204,19 +195,19 @@ "\n", "# Setting up callbacks for early stopping on minimum validation loss and saving the best model\n", "early_stopping_callback = EarlyStopping(\n", - " monitor='val_loss',\n", + " monitor=\"val_loss\",\n", " patience=patience,\n", " verbose=1,\n", - " mode='min',\n", - " restore_best_weights=True # Restores model weights from the epoch with the best value of the monitored quantity.\n", + " mode=\"min\",\n", + " restore_best_weights=True, # Restores model weights from the epoch with the best value of the monitored quantity.\n", ")\n", "\n", "model_checkpoint_callback = ModelCheckpoint(\n", - " './PWCModel/best_model.h5', # Path where the model will be saved\n", - " monitor='val_loss',\n", + " \"./PWCModel/best_model.h5\", # Path where the model will be saved\n", + " monitor=\"val_loss\",\n", " save_best_only=True, # Only the best model according to the validation loss is saved\n", - " mode='min',\n", - " verbose=1\n", + " mode=\"min\",\n", + " verbose=1,\n", ")\n", "\n", "history = model.fit(\n", @@ -225,11 +216,11 @@ " validation_data=validation_generator,\n", " validation_steps=len(validation_generator),\n", " epochs=200,\n", - " callbacks=[early_stopping_callback, model_checkpoint_callback]\n", + " callbacks=[early_stopping_callback, model_checkpoint_callback],\n", ")\n", "\n", "# Save the overall model after training (optional, as the best model is already saved)\n", - "model.save('./PWCModel/best_PWC_model.h5')\n" + "model.save(\"./PWCModel/best_PWC_model.h5\")" ] }, { @@ -252,20 +243,20 @@ "outputs": [], "source": [ "# summarize history for accuracy\n", - "plt.plot(history.history['accuracy'])\n", - "plt.plot(history.history['val_accuracy'])\n", - "plt.title('model accuracy')\n", - "plt.ylabel('accuracy')\n", - "plt.xlabel('epoch')\n", - "plt.legend(['Training', 'Validation'], loc='upper left')\n", + "plt.plot(history.history[\"accuracy\"])\n", + "plt.plot(history.history[\"val_accuracy\"])\n", + "plt.title(\"model accuracy\")\n", + "plt.ylabel(\"accuracy\")\n", + "plt.xlabel(\"epoch\")\n", + "plt.legend([\"Training\", \"Validation\"], loc=\"upper left\")\n", "plt.show()\n", "# summarize history for loss\n", - "plt.plot(history.history['loss'])\n", - "plt.plot(history.history['val_loss'])\n", - "plt.title('model loss')\n", - "plt.ylabel('loss')\n", - "plt.xlabel('epoch')\n", - "plt.legend(['Training', 'Validation'], loc='upper left')\n", + "plt.plot(history.history[\"loss\"])\n", + "plt.plot(history.history[\"val_loss\"])\n", + "plt.title(\"model loss\")\n", + "plt.ylabel(\"loss\")\n", + "plt.xlabel(\"epoch\")\n", + "plt.legend([\"Training\", \"Validation\"], loc=\"upper left\")\n", "plt.show()" ] } diff --git a/notebooks/PatchPositions.ipynb b/notebooks/PatchPositions.ipynb index d2d0a5a..fd1ce80 100644 --- a/notebooks/PatchPositions.ipynb +++ b/notebooks/PatchPositions.ipynb @@ -11,7 +11,7 @@ "import glob\n", "\n", "The_list = []\n", - "sls = glob.glob('Path/To/Thumbnails/*.png')\n", + "sls = glob.glob(\"Path/To/Thumbnails/*.png\")\n", "for i in sls:\n", " The_list.append(str(i[11:-4]))\n", "\n", @@ -33,10 +33,10 @@ "import matplotlib.pyplot as plt\n", "\n", "# Select GPU\n", - "# os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\" \n", + "# os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n", "\n", "## if two levels of patches are to be used, if only one is used lvl3_B_p_size can be ignored\n", - "lvl3_B_p_size = 256*3\n", + "lvl3_B_p_size = 256 * 3\n", "lvl3_s_p_size = 256\n", "Th_B_p_size = 51\n", "\n", @@ -50,22 +50,29 @@ "SlNames_valid = The_list[76:97]\n", "\n", "\n", - "def get_sizes_into_set(SlideNames, SlideDir,ThumbDir,EmptySet):\n", + "def get_sizes_into_set(SlideNames, SlideDir, ThumbDir, EmptySet):\n", " for filename in SlideNames:\n", " if not isinstance(filename, str):\n", " filename = filename.numpy().decode(\"utf-8\")\n", - " \n", - " importer = fast.WholeSlideImageImporter.create(SlideDir + filename + '.vsi')\n", + "\n", + " importer = fast.WholeSlideImageImporter.create(SlideDir + filename + \".vsi\")\n", " wsi = importer.runAndGetOutputData()\n", " plane_height5 = wsi.getLevelHeight(5)\n", " plane_width5 = wsi.getLevelWidth(5)\n", " plane_height3 = wsi.getLevelHeight(3)\n", " plane_width3 = wsi.getLevelWidth(3)\n", - " thumbnail = cv2.imread(ThumbDir+ filename + '.png')\n", + " thumbnail = cv2.imread(ThumbDir + filename + \".png\")\n", " thumbnail_height = thumbnail.shape[0]\n", " thumbnail_width = thumbnail.shape[1]\n", " # print(filename+': ', [plane_height5, plane_width5, plane_height3, plane_width3, thumbnail_height, thumbnail_width])\n", - " EmptySet[filename] = [plane_height5, plane_width5, plane_height3, plane_width3, thumbnail_height, thumbnail_width]\n", + " EmptySet[filename] = [\n", + " plane_height5,\n", + " plane_width5,\n", + " plane_height3,\n", + " plane_width3,\n", + " thumbnail_height,\n", + " thumbnail_width,\n", + " ]\n", "\n", "\n", "def get_initial_starting_pos(Slides):\n", @@ -73,10 +80,9 @@ " # finding the last posion on the thumbnail to stop forming patches\n", " last_pos_X = []\n", " last_pos_Y = []\n", - " for Nmbr, filename in enumerate(Slides):\n", - " last_pos_X.append(lvl3_B_p_size*(math.floor(Slides[filename][3]/lvl3_B_p_size)) - 256)\n", - " last_pos_Y.append(lvl3_B_p_size*(math.floor(Slides[filename][2]/lvl3_B_p_size)) - 256)\n", - "\n", + " for Nmbr, filename in enumerate(Slides):\n", + " last_pos_X.append(lvl3_B_p_size * (math.floor(Slides[filename][3] / lvl3_B_p_size)) - 256)\n", + " last_pos_Y.append(lvl3_B_p_size * (math.floor(Slides[filename][2] / lvl3_B_p_size)) - 256)\n", "\n", " # finding the starting positions of X for lvl3_B_patches\n", " Start_pos_X_small = []\n", @@ -86,19 +92,18 @@ " Start_pos_Y_big = []\n", "\n", " for indx, filename in enumerate(Slides):\n", - " Start_pos_X_small.append(list(range((256)*2,last_pos_X[indx],int(lvl3_B_p_size/3))))\n", - " Start_pos_Y_small.append(list(range((256)*2,last_pos_Y[indx],int(lvl3_B_p_size/3))))\n", + " Start_pos_X_small.append(list(range((256) * 2, last_pos_X[indx], int(lvl3_B_p_size / 3))))\n", + " Start_pos_Y_small.append(list(range((256) * 2, last_pos_Y[indx], int(lvl3_B_p_size / 3))))\n", "\n", - " for i,_ in enumerate(Start_pos_X_small):\n", + " for i, _ in enumerate(Start_pos_X_small):\n", " Start_pos_X_big.append([])\n", " for j in Start_pos_X_small[i]:\n", - " Start_pos_X_big[i].append(round(j-256*1.5))\n", + " Start_pos_X_big[i].append(round(j - 256 * 1.5))\n", "\n", - " for i,_ in enumerate(Start_pos_Y_small):\n", + " for i, _ in enumerate(Start_pos_Y_small):\n", " Start_pos_Y_big.append([])\n", " for j in Start_pos_Y_small[i]:\n", - " Start_pos_Y_big[i].append(round(j-256*1.5))\n", - "\n", + " Start_pos_Y_big[i].append(round(j - 256 * 1.5))\n", "\n", " # getting the fifth level starting positions:\n", "\n", @@ -108,12 +113,14 @@ " for indx, filename in enumerate(Slides):\n", " Start_pos_X_fifthLevel.append([])\n", " for ind, _ in enumerate(Start_pos_X_big[indx]):\n", - " Start_pos_X_fifthLevel[indx].append(round((Start_pos_X_big[indx][ind]/Slides[filename][3])*Slides[filename][1]))\n", + " Start_pos_X_fifthLevel[indx].append(\n", + " round((Start_pos_X_big[indx][ind] / Slides[filename][3]) * Slides[filename][1])\n", + " )\n", " Start_pos_Y_fifthLevel.append([])\n", " for ind2, _ in enumerate(Start_pos_Y_big[indx]):\n", - " Start_pos_Y_fifthLevel[indx].append(round((Start_pos_Y_big[indx][ind2]/Slides[filename][2])*Slides[filename][0]))\n", - "\n", - "\n", + " Start_pos_Y_fifthLevel[indx].append(\n", + " round((Start_pos_Y_big[indx][ind2] / Slides[filename][2]) * Slides[filename][0])\n", + " )\n", "\n", " # # get the corresponding Th_B_patch starting positions from Start_pos_X and Start_pos_Y, so you can check the tissue and GT\n", "\n", @@ -126,35 +133,74 @@ " for indx, filename in enumerate(Slides):\n", " Th_Start_pos_X_small.append([])\n", " for ind, _ in enumerate(Start_pos_X_small[indx]):\n", - " Th_Start_pos_X_small[indx].append(round((Start_pos_X_small[indx][ind]/Slides[filename][3])*Slides[filename][5])) # we add thr 20 to get the central small patch not the big one then the patch size should be 10px\n", + " Th_Start_pos_X_small[indx].append(\n", + " round((Start_pos_X_small[indx][ind] / Slides[filename][3]) * Slides[filename][5])\n", + " ) # we add thr 20 to get the central small patch not the big one then the patch size should be 10px\n", " Th_Start_pos_Y_small.append([])\n", " for ind2, _ in enumerate(Start_pos_Y_small[indx]):\n", - " Th_Start_pos_Y_small[indx].append(round((Start_pos_Y_small[indx][ind2]/Slides[filename][2])*Slides[filename][4])) # we add thr 20 to get the central small patch not the big one then the patch size should be 10px\n", + " Th_Start_pos_Y_small[indx].append(\n", + " round((Start_pos_Y_small[indx][ind2] / Slides[filename][2]) * Slides[filename][4])\n", + " ) # we add thr 20 to get the central small patch not the big one then the patch size should be 10px\n", "\n", " Th_Start_pos_X_big.append([])\n", " for ind, _ in enumerate(Start_pos_X_fifthLevel[indx]):\n", - " Th_Start_pos_X_big[indx].append(round((Start_pos_X_fifthLevel[indx][ind]/Slides[filename][1])*Slides[filename][5])) # we add thr 20 to get the central small patch not the big one then the patch size should be 10px\n", + " Th_Start_pos_X_big[indx].append(\n", + " round((Start_pos_X_fifthLevel[indx][ind] / Slides[filename][1]) * Slides[filename][5])\n", + " ) # we add thr 20 to get the central small patch not the big one then the patch size should be 10px\n", " Th_Start_pos_Y_big.append([])\n", " for ind2, _ in enumerate(Start_pos_Y_fifthLevel[indx]):\n", - " Th_Start_pos_Y_big[indx].append(round((Start_pos_Y_fifthLevel[indx][ind2]/Slides[filename][0])*Slides[filename][4])) # we add thr 20 to get the central small patch not the big one then the patch size should be 10px\n", - "\n", - " return Start_pos_X_small, Start_pos_Y_small, Start_pos_X_big,Start_pos_Y_big, Start_pos_X_fifthLevel, Start_pos_Y_fifthLevel, Th_Start_pos_X_small, Th_Start_pos_Y_small, Th_Start_pos_X_big, Th_Start_pos_Y_big\n", + " Th_Start_pos_Y_big[indx].append(\n", + " round((Start_pos_Y_fifthLevel[indx][ind2] / Slides[filename][0]) * Slides[filename][4])\n", + " ) # we add thr 20 to get the central small patch not the big one then the patch size should be 10px\n", + "\n", + " return (\n", + " Start_pos_X_small,\n", + " Start_pos_Y_small,\n", + " Start_pos_X_big,\n", + " Start_pos_Y_big,\n", + " Start_pos_X_fifthLevel,\n", + " Start_pos_Y_fifthLevel,\n", + " Th_Start_pos_X_small,\n", + " Th_Start_pos_Y_small,\n", + " Th_Start_pos_X_big,\n", + " Th_Start_pos_Y_big,\n", + " )\n", "\n", "\n", "Slides_train = {}\n", "\n", - "get_sizes_into_set(SlNames_train, \"Path/To/WSIs/\", '/Path/To/Thumbnails/', Slides_train)\n", + "get_sizes_into_set(SlNames_train, \"Path/To/WSIs/\", \"/Path/To/Thumbnails/\", Slides_train)\n", "\n", "\n", "Slides_valid = {}\n", "\n", - "get_sizes_into_set(SlNames_valid, \"Path/To/WSIs/\", '/Path/To/Thumbnails/', Slides_valid)\n", - "\n", - "\n", - "\n", - "Start_pos_X_small_train, Start_pos_Y_small_train, Start_pos_X_big_train, Start_pos_Y_big_train, Start_pos_X_fifthLevel_train, Start_pos_Y_fifthLevel_train, Th_Start_pos_X_small_train, Th_Start_pos_Y_small_train, Th_Start_pos_X_big_train, Th_Start_pos_Y_big_train = get_initial_starting_pos(Slides_train)\n", - "Start_pos_X_small_valid, Start_pos_Y_small_valid, Start_pos_X_big_valid, Start_pos_Y_big_valid, Start_pos_X_fifthLevel_valid, Start_pos_Y_fifthLevel_valid, Th_Start_pos_X_small_valid, Th_Start_pos_Y_small_valid, Th_Start_pos_X_big_valid, Th_Start_pos_Y_big_valid = get_initial_starting_pos(Slides_valid)\n", - "\n" + "get_sizes_into_set(SlNames_valid, \"Path/To/WSIs/\", \"/Path/To/Thumbnails/\", Slides_valid)\n", + "\n", + "\n", + "(\n", + " Start_pos_X_small_train,\n", + " Start_pos_Y_small_train,\n", + " Start_pos_X_big_train,\n", + " Start_pos_Y_big_train,\n", + " Start_pos_X_fifthLevel_train,\n", + " Start_pos_Y_fifthLevel_train,\n", + " Th_Start_pos_X_small_train,\n", + " Th_Start_pos_Y_small_train,\n", + " Th_Start_pos_X_big_train,\n", + " Th_Start_pos_Y_big_train,\n", + ") = get_initial_starting_pos(Slides_train)\n", + "(\n", + " Start_pos_X_small_valid,\n", + " Start_pos_Y_small_valid,\n", + " Start_pos_X_big_valid,\n", + " Start_pos_Y_big_valid,\n", + " Start_pos_X_fifthLevel_valid,\n", + " Start_pos_Y_fifthLevel_valid,\n", + " Th_Start_pos_X_small_valid,\n", + " Th_Start_pos_Y_small_valid,\n", + " Th_Start_pos_X_big_valid,\n", + " Th_Start_pos_Y_big_valid,\n", + ") = get_initial_starting_pos(Slides_valid)" ] }, { @@ -165,7 +211,7 @@ "source": [ "# Visualizing the patch results\n", "\n", - "image = cv2.imread('/Path/To/Thumbnails/'+SlNames_train[0]+'.png')\n", + "image = cv2.imread(\"/Path/To/Thumbnails/\" + SlNames_train[0] + \".png\")\n", "\n", "for start_y in Th_Start_pos_Y_small_train[0]:\n", " for start_x in Th_Start_pos_X_small_train[0]:\n", @@ -189,7 +235,7 @@ "metadata": {}, "outputs": [], "source": [ - "#create tissue masks\n", + "# create tissue masks\n", "\n", "# Path to the folder containing the image thumbnails\n", "folder_path = \"/Path/To/Thumbnails/\"\n", @@ -231,15 +277,15 @@ "metadata": {}, "outputs": [], "source": [ - "image = cv2.imread('/Path/To/Thumbnails/'+SlNames_train[0]+'.png')\n", - "tissue = cv2.imread('/Path/To/TissueMask/Results/'+SlNames_train[0]+'.png')\n", + "image = cv2.imread(\"/Path/To/Thumbnails/\" + SlNames_train[0] + \".png\")\n", + "tissue = cv2.imread(\"/Path/To/TissueMask/Results/\" + SlNames_train[0] + \".png\")\n", "\n", "for indx1, start_y in enumerate(Th_Start_pos_Y_small_train[0]):\n", " for indx2, start_x in enumerate(Th_Start_pos_X_small_train[0]):\n", " end_x = start_x + 10\n", " end_y = start_y + 10\n", - " \n", - " tissue_patch = tissue[start_y:end_y,start_x:end_x,:]\n", + "\n", + " tissue_patch = tissue[start_y:end_y, start_x:end_x, :]\n", " if len(tissue_patch[tissue_patch > 40]) > 0.3 * len(tissue_patch[tissue_patch < 300]):\n", " cv2.rectangle(image, (start_x, start_y), (end_x, end_y), (0, 0, 0), 4) # Draws a green rectangle\n", " # patches = load_patches([Start_pos_X[0][indx2]+512], [Start_pos_Y[0][indx1]+512], \"D:/WSIs/WSIs/\" + 'HUS_L_005_IC' + \".vsi\", 3, 256)\n", @@ -247,7 +293,6 @@ " # cv2.imwrite('./test/'+'HUS_L_005_IC'+'X'+str(indx2)+'Y'+str(indx1)+'.png', img)\n", "\n", "\n", - "\n", "plt.imshow(image)\n", "plt.show()" ] @@ -262,12 +307,12 @@ "\n", "\n", "def get_clusters_label(arr):\n", - " \n", + "\n", " unique_values, counts = np.unique(arr, return_counts=True)\n", - " \n", + "\n", " # Find the index of the most dominant value\n", " dominant_index = np.argmax(counts)\n", - " \n", + "\n", " # Check if the most dominant value is zero\n", " if unique_values[dominant_index] == 0:\n", " # Find the next most dominant value by excluding zero from consideration\n", @@ -278,13 +323,23 @@ " else:\n", " # If the most dominant value is not zero, return it\n", " dominant_value = unique_values[dominant_index]\n", - " \n", + "\n", " return dominant_value\n", "\n", "\n", - "def get_final_starting_pos(startingPositions, gts, Slides, Th_Start_pos_Y_small, Th_Start_pos_X_small, Start_pos_X_fifthLevel, Start_pos_Y_fifthLevel, Start_pos_X_small, Start_pos_Y_small):\n", + "def get_final_starting_pos(\n", + " startingPositions,\n", + " gts,\n", + " Slides,\n", + " Th_Start_pos_Y_small,\n", + " Th_Start_pos_X_small,\n", + " Start_pos_X_fifthLevel,\n", + " Start_pos_Y_fifthLevel,\n", + " Start_pos_X_small,\n", + " Start_pos_Y_small,\n", + "):\n", "\n", - " for indx, filename in enumerate(Slides):\n", + " for indx, filename in enumerate(Slides):\n", "\n", " # startingPositions.append([])\n", " # gts.append([])\n", @@ -292,26 +347,33 @@ "\n", " name = str(list(Slides.keys())[indx])\n", "\n", - " tissue = cv2.imread('/Path/To/TissueMask/Results/' + name + '.png')\n", - " gt = cv2.imread('/Path/To/GTs/' + name + '.png')\n", - " clusters = np.load('/Path/To/ClusterResults/' + name + '.npy')\n", - "\n", + " tissue = cv2.imread(\"/Path/To/TissueMask/Results/\" + name + \".png\")\n", + " gt = cv2.imread(\"/Path/To/GTs/\" + name + \".png\")\n", + " clusters = np.load(\"/Path/To/ClusterResults/\" + name + \".npy\")\n", "\n", " for indx1, start_y in enumerate(Th_Start_pos_Y_small[indx]):\n", " for indx2, start_x in enumerate(Th_Start_pos_X_small[indx]):\n", " end_x = start_x + 10\n", " end_y = start_y + 10\n", - " \n", - " tissue_patch = tissue[start_y:end_y,start_x:end_x,:]\n", - " gt_patch = gt[start_y:end_y,start_x:end_x,:]\n", + "\n", + " tissue_patch = tissue[start_y:end_y, start_x:end_x, :]\n", + " gt_patch = gt[start_y:end_y, start_x:end_x, :]\n", "\n", " if len(tissue_patch[tissue_patch > 40]) > 0.6 * len(tissue_patch[tissue_patch < 300]):\n", " # patches = load_patches([Start_pos_X[indx][indx2]+512], [Start_pos_Y[indx][indx1]+512], \"D:/WSIs/WSIs/\" + name + \".vsi\", 3, 256)\n", " # img = np.array(patches[0])\n", " # starting positions = [name, lvl5, lvl3]\n", " label = get_clusters_label(clusters)\n", - " startingPositions.append([ \"Path/To/WSIs/\" + name + \".vsi\", [Start_pos_X_fifthLevel[indx][indx2]], [Start_pos_Y_fifthLevel[indx][indx1]],\n", - " [Start_pos_X_small[indx][indx2]], [Start_pos_Y_small[indx][indx1]], label])\n", + " startingPositions.append(\n", + " [\n", + " \"Path/To/WSIs/\" + name + \".vsi\",\n", + " [Start_pos_X_fifthLevel[indx][indx2]],\n", + " [Start_pos_Y_fifthLevel[indx][indx1]],\n", + " [Start_pos_X_small[indx][indx2]],\n", + " [Start_pos_Y_small[indx][indx1]],\n", + " label,\n", + " ]\n", + " )\n", "\n", " if len(gt_patch[gt_patch > 25]) > 0.25 * len(gt_patch[gt_patch < 300]):\n", " # cv2.imwrite('./test/tumor/'+'HUS_L_005_IC'+'X'+str(indx2)+'Y'+str(indx1)+'.png', img)\n", @@ -326,8 +388,7 @@ " startingPositions.pop()\n", "\n", " # if len(gt_patch[gt_patch < 10]) > 0.8 * len(gt_patch[gt_patch < 300]):\n", - " # cv2.imwrite('./test/healthy/'+'HUS_L_005_IC'+'X'+str(indx2)+'Y'+str(indx1)+'.png', img)\n", - "\n", + " # cv2.imwrite('./test/healthy/'+'HUS_L_005_IC'+'X'+str(indx2)+'Y'+str(indx1)+'.png', img)\n", "\n", "\n", "# starting positions = [[name, lvl5X, lvl5Y, lvl3X, lvl3Y]]\n", @@ -339,8 +400,28 @@ "gts_valid = []\n", "\n", "\n", - "get_final_starting_pos(startingPositions_train, gts_train, Slides_train, Th_Start_pos_Y_small_train, Th_Start_pos_X_small_train, Start_pos_X_fifthLevel_train, Start_pos_Y_fifthLevel_train, Start_pos_X_small_train, Start_pos_Y_small_train)\n", - "get_final_starting_pos(startingPositions_valid, gts_valid, Slides_valid, Th_Start_pos_Y_small_valid, Th_Start_pos_X_small_valid, Start_pos_X_fifthLevel_valid, Start_pos_Y_fifthLevel_valid, Start_pos_X_small_valid, Start_pos_Y_small_valid)\n" + "get_final_starting_pos(\n", + " startingPositions_train,\n", + " gts_train,\n", + " Slides_train,\n", + " Th_Start_pos_Y_small_train,\n", + " Th_Start_pos_X_small_train,\n", + " Start_pos_X_fifthLevel_train,\n", + " Start_pos_Y_fifthLevel_train,\n", + " Start_pos_X_small_train,\n", + " Start_pos_Y_small_train,\n", + ")\n", + "get_final_starting_pos(\n", + " startingPositions_valid,\n", + " gts_valid,\n", + " Slides_valid,\n", + " Th_Start_pos_Y_small_valid,\n", + " Th_Start_pos_X_small_valid,\n", + " Start_pos_X_fifthLevel_valid,\n", + " Start_pos_Y_fifthLevel_valid,\n", + " Start_pos_X_small_valid,\n", + " Start_pos_Y_small_valid,\n", + ")" ] }, { @@ -354,12 +435,12 @@ "import pickle\n", "\n", "# Saving the lists to a file\n", - "with open('PositionsAndGts_train.pkl', 'wb') as file:\n", + "with open(\"PositionsAndGts_train.pkl\", \"wb\") as file:\n", " pickle.dump(startingPositions_train, file)\n", " pickle.dump(gts_train, file)\n", "\n", "\n", - "with open('PositionsAndGts_valid.pkl', 'wb') as file:\n", + "with open(\"PositionsAndGts_valid.pkl\", \"wb\") as file:\n", " pickle.dump(startingPositions_valid, file)\n", " pickle.dump(gts_valid, file)" ] diff --git a/notebooks/TrainingOnWSIPatches.ipynb b/notebooks/TrainingOnWSIPatches.ipynb index f2cb40a..7bffac8 100644 --- a/notebooks/TrainingOnWSIPatches.ipynb +++ b/notebooks/TrainingOnWSIPatches.ipynb @@ -11,13 +11,13 @@ "import pickle\n", "\n", "# Loading the lists from the file for training\n", - "with open('./PositionsAndGts_train.pkl', 'rb') as file:\n", + "with open(\"./PositionsAndGts_train.pkl\", \"rb\") as file:\n", " startingPositions_train = pickle.load(file)\n", " gts_train = pickle.load(file)\n", "\n", "\n", "# Loading the lists from the file for validation\n", - "with open('./PositionsAndGts_valid.pkl', 'rb') as file:\n", + "with open(\"./PositionsAndGts_valid.pkl\", \"rb\") as file:\n", " startingPositions_valid = pickle.load(file)\n", " gts_valid = pickle.load(file)" ] @@ -33,10 +33,10 @@ "import numpy as np\n", "\n", "StartingPositions = []\n", - "Gts =[]\n", + "Gts = []\n", "\n", "StartingPositions_valids = []\n", - "Gts_valids =[]\n", + "Gts_valids = []\n", "\n", "\n", "def shuffle_and_convert(starting_positions, gts):\n", @@ -55,13 +55,13 @@ "\n", " return starting_positions_shuffled, gts_shuffled\n", "\n", + "\n", "# Assuming startingPositions_train, gts_train, startingPositions_valid, gts_valid are defined\n", "StartingPositions_train, Gts_train = shuffle_and_convert(startingPositions_train, gts_train)\n", "StartingPositions_valid, Gts_valid = shuffle_and_convert(startingPositions_valid, gts_valid)\n", "\n", "# print(StartingPositions_train[0]) # Uncomment if checking is needed\n", - "# print(StartingPositions_valid[0]) # Uncomment if checking is needed\n", - "\n" + "# print(StartingPositions_valid[0]) # Uncomment if checking is needed" ] }, { @@ -84,12 +84,14 @@ "import matplotlib.pyplot as plt\n", "\n", "num = 5257\n", - "patch = load_patch(StartingPositions_train[num][3], StartingPositions_train[num][4], StartingPositions_train[num][0],3,256)\n", + "patch = load_patch(\n", + " StartingPositions_train[num][3], StartingPositions_train[num][4], StartingPositions_train[num][0], 3, 256\n", + ")\n", "\n", "f, (ax1) = plt.subplots(1, 1, sharey=True)\n", "ax1.imshow(patch)\n", "print(Gts_train[num])\n", - "print(StartingPositions_train[num][0])\n" + "print(StartingPositions_train[num][0])" ] }, { @@ -112,14 +114,14 @@ "\n", "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\" # Use GPU with index 0\n", "\n", - "previous_model = load_model('./pw_tumour_mobilenetv2_model.h5', compile=False)\n", + "previous_model = load_model(\"./pw_tumour_mobilenetv2_model.h5\", compile=False)\n", "\n", "# Define the input shapes\n", "input_shape = (256, 256, 3)\n", "\n", "# Freeze the layers of level 3 branch\n", "for layer in previous_model.layers[0:-12]:\n", - " layer.trainable = False\n", + " layer.trainable = False\n", "\n", "previous_model.layers.pop()\n", "\n", @@ -135,7 +137,7 @@ "# Create the level 3 branch\n", "level3_branch = previous_model(input_level3)\n", "\n", - "output = Dense(2, activation='softmax')(level3_branch)\n", + "output = Dense(2, activation=\"softmax\")(level3_branch)\n", "\n", "from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint\n", "\n", @@ -144,32 +146,39 @@ "model.summary()\n", "\n", "# Compile the model\n", - "model.compile(optimizer=optimizers.Adam(1e-4), \n", - " loss=CategoricalFocalCrossentropy(), \n", - " metrics=['acc', tfa.metrics.F1Score(num_classes=2, average=\"macro\")])\n", + "model.compile(\n", + " optimizer=optimizers.Adam(1e-4),\n", + " loss=CategoricalFocalCrossentropy(),\n", + " metrics=[\"acc\", tfa.metrics.F1Score(num_classes=2, average=\"macro\")],\n", + ")\n", "\n", "# Callback for early stopping (to stop training when validation loss does not improve)\n", - "early_stopping = EarlyStopping(monitor='val_loss', patience=3, min_delta=0.1)\n", + "early_stopping = EarlyStopping(monitor=\"val_loss\", patience=3, min_delta=0.1)\n", "\n", "# Callback to save the best model based on validation loss\n", "model_checkpoint = ModelCheckpoint(\n", - " 'best_model.h5', # Path where to save the model\n", - " monitor='val_loss', # Metric name to monitor\n", + " \"best_model.h5\", # Path where to save the model\n", + " monitor=\"val_loss\", # Metric name to monitor\n", " save_best_only=True, # Only save the best model\n", - " mode='min', # The best model is the one which minimizes the loss\n", - " verbose=1 # Log a message for each improvement\n", + " mode=\"min\", # The best model is the one which minimizes the loss\n", + " verbose=1, # Log a message for each improvement\n", ")\n", "\n", "# Set the directory path where the data is stored\n", - "data_directory = 'path_to_data_directory'\n", + "data_directory = \"path_to_data_directory\"\n", "\n", "# Create the custom data generator\n", "train_generator = CustomDataGenerator(StartingPositions_train, Gts_train, 4)\n", "valid_generator = CustomDataGenerator(StartingPositions_valid, Gts_valid, 4)\n", "\n", "# Train the model with the callbacks\n", - "model.fit(train_generator, validation_data=valid_generator, batch_size=None, verbose=1, callbacks=[early_stopping, model_checkpoint])\n", - "\n" + "model.fit(\n", + " train_generator,\n", + " validation_data=valid_generator,\n", + " batch_size=None,\n", + " verbose=1,\n", + " callbacks=[early_stopping, model_checkpoint],\n", + ")" ] }, { @@ -184,8 +193,10 @@ "import onnx\n", "\n", "# Load the best model saved during training\n", - "best_model_path = 'best_model.h5'\n", - "model = tf.keras.models.load_model(best_model_path, custom_objects={'F1Score': tfa.metrics.F1Score}) # Ensure to include custom_objects if you used any in your model\n", + "best_model_path = \"best_model.h5\"\n", + "model = tf.keras.models.load_model(\n", + " best_model_path, custom_objects={\"F1Score\": tfa.metrics.F1Score}\n", + ") # Ensure to include custom_objects if you used any in your model\n", "\n", "# Convert the loaded model to ONNX format\n", "# Note: You might need to adjust the opset version based on the requirements of your model and compatibility with ONNX\n", @@ -195,7 +206,7 @@ "onnx_file_path = \"./Transfered1.onnx\"\n", "onnx.save(onnx_model, onnx_file_path)\n", "\n", - "print(f\"ONNX model saved to {onnx_file_path}\")\n" + "print(f\"ONNX model saved to {onnx_file_path}\")" ] } ],