-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
716 lines (593 loc) · 23.4 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
# IMPORTS
"""This script reads output data from MCMICRO and generates a report in the form of an html file."""
import webbrowser as browser
import os
import re
from pathlib import Path
from glob import glob
import timeit
import imageio
import argparse
import yaml
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
from skimage import (
measure,
segmentation,
) # TODO do you need both skimage and cv2??
from cv2 import GaussianBlur, resize
from jinja2 import Environment, FileSystemLoader
# SETUP
# starting a timer to measure how the performance of the script
start = timeit.default_timer()
print("[report] Generating report")
# jinja config
env = Environment(loader=FileSystemLoader("./"))
# parse command line arguments for input and output
parser = argparse.ArgumentParser()
parser.add_argument(
"-i:",
"--input",
type=str,
help="Location of the input data folder to read",
)
parser.add_argument(
"-o:",
"--output",
type=str,
help="Location where to output the report html file",
)
parser.add_argument(
"-m:",
"--marker",
type=str,
help="The name of a marker to be plotted on the report. See MCMICROs 'markers.csv' for a list of valid marker_names",
)
args = parser.parse_args()
# set default parameter for plotly plots
default_plot_layout = {
"height": 500,
"width": 500,
"autosize": False,
"paper_bgcolor": "rgba(0,0,0,0)",
"plot_bgcolor": "rgba(0,0,0,0)",
}
# FUNCTIONS
def plot_core_overlay(image: np.array, mask: np.array) -> go.Figure:
"""Plot an overlay of TMA cores.
Args:
image (Array): the image that should be overlayed, as a numpy array
mask (Array): the image that should be used as a mask, as a numpy array
Returns:
Figure: a plotly figure containing the core overlay
"""
# clean the mask
mask_cleared = segmentation.clear_border(mask)
# prepare the plotly figure
fig = go.Figure()
fig.add_trace(go.Heatmap(z=image, showscale=False, colorscale="Viridis"))
# apply default layout config
fig.update_layout(default_plot_layout)
# axis need to be reversed for the image not to be upside down
fig.update_yaxes(autorange="reversed")
# measure the properties of each core
props = measure.regionprops(mask_cleared.astype(int), image)
properties = ["area", "perimeter", "intensity_mean"]
# determine the amount of cores
min = int(mask[np.nonzero(mask_cleared)].min())
max = int(mask[np.nonzero(mask_cleared)].max())
# For each label, add a filled scatter trace for its contour,
# and display the properties of the label in the hover of this trace.
for i in range(min, max + 1):
try:
cells_per_core = quantification[i - 1]["CellID"].max()
except IndexError:
if i <= max:
cells_per_core = "NOT FOUND"
print(
f"[report] Can not find quantification data for core #{i}. Please check if {Path(quantification_path).resolve()} contains a valid .csv file for each core."
)
else:
pass
try:
# Find contours
y, x = measure.find_contours(mask_cleared == i, 0.5)[0].T
# retrieve corresponding properties
hoverinfo = f"<b>Number of cells: {cells_per_core}</b><br>"
for prop_name in properties:
hoverinfo += f"<b>{prop_name} (downscaled): {getattr(props[index], prop_name):.2f}</b><br>"
# plot the core
fig.add_trace(
go.Scatter(
x=x,
y=y,
name=f"Core #{i}",
mode="lines",
fill="toself",
showlegend=True,
hovertemplate=hoverinfo,
hoveron="points+fills",
)
)
except IndexError:
print(
f"Can not find contour for core {i}. Please check if /dearray/masks in {data_path_abs} contains a valid mask for each core." # TODO more helpful error message with absolute path missing here
)
# return plot
return fig
def plot_cell_contours_plotly(
im: np.array,
mask: np.array,
data: pd.DataFrame,
marker: str,
cutoff: int,
show_bg: bool = True,
color_above_cutoff: str = "orangered",
color_below_cutoff: str = "green",
) -> go.Figure:
"""Plot a segmentation overlay of cells
Args:
im (np.array): an image to plot a cell overlay on
mask (np.array): a valid segmenation mask
data (pd.DataFrame): quantification data provided by MCMICRO
marker (str): a marker, must be quantified in the quantification data
cutoff (int): marker intensity cutoff for plotting cells
show_bg (bool, optional): Wheter to show the image as a background for the plot. Defaults to True.
color_above_cutoff (str, optional): The color of cell outlines with the marker intensity above the cutoff. Defaults to "lightblue".
color_below_cutoff (str, optional): The color of cell outlines with the marker intensity below the cutoff. Defaults to "red".
Returns:
Figure: a plotly figure containing the cells outlines
"""
# Reset index so the CellID is the index
data = data.set_index("CellID")
# Cell
try:
flag = data[marker].apply(lambda x: marker if x >= cutoff else "")
color = data[marker].apply(
lambda x: color_above_cutoff if x >= cutoff else color_below_cutoff
)
except KeyError:
print(
f"[report] ERROR: Marker '{marker}' not found in {Path(quantification_path).resolve()}. Please check 'markers.csv' in {data_path_abs} for a list of valid markers."
)
quit()
# Get range of cells
min = int(mask[np.nonzero(mask)].min())
max = int(mask[np.nonzero(mask)].max())
# Plot masked marker
if show_bg == False:
# remove the rest of the image
fig = px.imshow(im * mask.astype(bool), color_continuous_scale="Viridis")
else:
# show whole image
fig = px.imshow(im, color_continuous_scale="Viridis")
error_counter = 0
fig.update_layout(default_plot_layout)
# Plot contours each cell at a time
for i in range(min, max + 1):
try:
# Find contours
y, x = measure.find_contours(mask == i, 0.8)[0].T
#
fig.add_trace(
go.Scattergl(
x=x,
y=y,
mode="lines",
showlegend=False,
opacity=0.5,
text=flag[i],
line=dict(color=color[i]),
)
)
except IndexError:
# count invalid mask indices
error_counter += 1
# return plot
print(
f"[report] Can not find contours for {error_counter} cells. Please check if /segmentation contains a valid cell segmentation mask." # TODO read path variable in error message
)
return fig
def overlay_images_at_centroid(bg: np.array, fg: np.array, cen_y: float, cen_x: float):
"""Overlay two image arrays at a center point
Args:
bg (np.array): Background image as array
fg (np.array): Foreground overlay as array
cen_y (float): y postion of the central point
cen_x (float): x position of the central point
Returns:
np.array: Overlayed image as array
"""
h1, w1 = fg.shape[:2]
y_offset = max(
0, cen_y - (h1 // 2)
) # calculate the distance from the center point to the outer edge of the image overlay, if it's < 0 set it to 0 using max to compare it to 0
x_offset = max(0, cen_x - (w1 // 2))
overlay_bounds_y = bg[y_offset : y_offset + h1, x_offset : x_offset + w1].shape[
0
] # subset the background array with the offset distance, take it's shape
overlay_bounds_x = bg[y_offset : y_offset + h1, x_offset : x_offset + w1].shape[
1
] # we will use this ensure that the offset distance does not "leave" the bounds of the array, causing errors
bg[
y_offset : y_offset + h1, x_offset : x_offset + w1
] = fg[ # subset the overlay with the bounds to make sure we do not try overlaying outside of the background array
0:overlay_bounds_y, 0:overlay_bounds_x
]
return bg
# READING/SETTING PARAMETERS AND PATHS
# parse input directory
if not args.input == None:
data_path = args.input
data_path_abs = str(Path(data_path).resolve())
else:
data_path = "./data/"
data_path_abs = str(Path(data_path).resolve())
print(
f"No path to data source was provided. Assuming {data_path_abs} as data source."
)
print(f"[report] Reading MCMICRO output from {data_path_abs}")
if not Path(data_path).is_dir():
print(
f"[report] ERROR: Could not find input folder. Please check if {data_path_abs} exists and contains the full valid output of MCMICRO"
)
quit()
# set parameter for max image size, used for dynamic downscaling later on
max_image_size = (
104857600 # 100MB # the actual html ends up being much smaller, how? Plotly magic??
)
# set inital scale factor for dynamic downscaling
scale_factor = 1
# y/x of the high res sample
side_length = 500
# marker
marker_to_plot = args.marker
# find MCMICRO parameter yaml file
params_paths = [
path for path in Path(data_path).rglob("*.yml")
] # globbing just in case there are mulltiple parameter files
# open the parameter yaml
if params_paths == []: # TODO refactor exeption handling, too much redundancy
print(
f"[report] ERROR: MCMICRO parameter file not found or could not be read. Please check the /qc folder in {data_path_abs} for a valid params.yml and rerun the script."
)
quit()
for path in params_paths:
with open(path, "r") as f:
mcmicro_params = yaml.safe_load(f)
# read valid file extensions from parameter file
file_extensions = list(mcmicro_params["singleFormats"].keys()) + list(
mcmicro_params["multiFormats"].keys()
)
# read the name of the image sample
sample_name = mcmicro_params["sampleName"]
# read the MCMICRO modules that were run from the parameter yaml - currently unused, as parameters are not yet displayed
regex = re.compile(r"^.*?:")
modules = {key: value for key, value in mcmicro_params.items() if "module" in key}
segmentation_module_names = [
re.sub(regex, "", module[0]) for module in modules["modulesPM"]
]
# assemble path to segmentation files
segmentation_path = data_path + "/segmentation/"
# assemble path to registration files
registration_path = data_path + "/registration/"
# assemble path to quantification files
quantification_path = data_path + "/quantification/"
# assemble path to dearrayed cores + corresponding files
cores_path = data_path + "/dearray/"
core_masks_path = data_path + "/dearray/masks/"
core_centroid_path = data_path + "/qc/coreo/centroidsY-X.txt"
# Check for dearray folder to determine if TMA or not
if Path(cores_path).is_dir() is True:
print("[report] dearray folder detected. Assuming TMA data.")
tma_mode = True
else:
print("[report] No dearray folder detected. Assuming whole slide data.")
tma_mode = False
# READING FILES
# load quantification csv(s) # TODO account for multiple files
quantification_dict = {}
try:
for module in segmentation_module_names:
# find all image paths
quantification_dict[module] = [
Path(path) for path in glob(f"{quantification_path}{module}*")
]
# read the images
quantification_dict[module] = [
pd.read_csv(path) for path in quantification_dict[module]
]
except (OSError, FileNotFoundError):
print(
f"[report] ERROR: Registration image not found or could not be read. Please check {str(Path(quantification_path))} for valid csv files and rerun the script."
)
quit()
# TEMPORARY SOLUTION UNTIL MULTIPLE IMAGE SELECTION/SEGMENTATION ALGORITHM COMPARISION IS IMPLEMENTED IN THE FRONTEND
# TODO
quantification = list(quantification_dict.values())[0]
# loading images, assuming only one registration image can exist
try:
whole_image = [Path(path) for path in glob(f"{registration_path}*")]
image_file_size = os.path.getsize(
whole_image[0]
) # get file size, used for downsampling later
whole_image = imageio.imread(whole_image[0])
except IndexError: # TODO refactor exeption handling, too much redundancy
print(
f"[report] ERROR: Registration image not found or could not be read. Please check {str(Path(registration_path))} for a valid image and rerun the script."
)
quit()
# load cell segementation masks
# use a dictionary to associate the images with different segmentation algorithms
segmentation_dict_cells = {}
try:
for module in segmentation_module_names:
# find all image paths
segmentation_dict_cells[module] = [
Path(path) for path in glob(f"{segmentation_path}{module}*/cell*")
]
# read the images
segmentation_dict_cells[module] = [
imageio.imread(imagepath) for imagepath in segmentation_dict_cells[module]
]
except (OSError, FileNotFoundError):
print(
f"[report] ERROR: Segmentation images not found or could not be read. Please check {str(Path(segmentation_path))} for valid csv files and rerun the script."
)
quit()
# load nuclear segementation masks
# use a dictionary to associate the images with different segmentation algorithms
segmentation_dict_nuclei = {}
try:
for module in segmentation_module_names:
# find all image paths
segmentation_dict_nuclei[module] = [
Path(path) for path in glob(f"{segmentation_path}{module}*/cell*")
]
# read the images
segmentation_dict_nuclei[module] = [
imageio.imread(imagepath) for imagepath in segmentation_dict_nuclei[module]
]
except (OSError, FileNotFoundError):
print(
f"[report] ERROR: Registration image not found or could not be read. Please check {str(Path(segmentation_path))} for valid csv files and rerun the script."
)
quit()
# read coordinates of the TMA core centroids and format them into an array
if tma_mode:
with open(core_centroid_path) as f:
line = f.readlines()
core_centroids = [string.split() for string in line]
core_centroids = [
[int(float(coordinate)) for coordinate in coordinate_pair]
for coordinate_pair in core_centroids
]
core_centroids = np.array(core_centroids)
# PROCESSING QUANTIFICATION DATA
# determine number of cores
if tma_mode:
num_cores = len(core_centroids)
else:
num_cores = "-"
# ASSMEBLING FULL IMAGE CORE MASK IF TMA
# read the single core files
if tma_mode:
# find the filepaths
cores = [
Path(path) for path in glob(f"{cores_path}[!_mask]*")
] # TODO Can i use the exclude pattern in the path variable
# read the images
cores = [imageio.imread(imagepath) for imagepath in cores]
# read the single core masks
single_core_masks = [Path(path) for path in glob(f"{core_masks_path}*")]
single_core_masks = [imageio.imread(imagepath) for imagepath in single_core_masks]
# scale the (smaller) masks to the size of the actual cores
for index, mask in enumerate(single_core_masks):
single_core_masks[index] = resize(mask, cores[index].shape)
# prepare an empty image to fill with core masks in the next steps
whole_core_mask = np.zeros(whole_image.shape)
# make each core mask distinct by assigning a specific pixel intensity to it, similar to the cell segmentation masks
for index, mask in enumerate(single_core_masks):
mask[mask > 0] = index + 1
# stich the mulitple single core masks together to one large core mask
for index, core_centroid in enumerate(core_centroids):
whole_core_mask = overlay_images_at_centroid(
whole_core_mask,
single_core_masks[index],
core_centroids[index][0],
core_centroids[index][1],
)
else:
whole_core_mask = None
# ASSEMBLING FULL IMAGE SEGMENTATION MASK IF TMA
num_cells = 0
num_nuclei = 0
if tma_mode:
# prepare an empty image to fill with cell/nuclei core in the next steps
whole_core_mask_cells = np.zeros(whole_image.shape)
whole_core_mask_nuclei = np.zeros(whole_image.shape)
# prepare empty dicts to associate the whole image masks with different segmentation algorithms
segmentation_dict_cells_whole = {}
segmentation_dict_nuclei_whole = {}
# loop through the modules
for module in segmentation_dict_cells.keys():
if not segmentation_dict_cells[module] == []:
# loop through the core centroids and stich the mulitple single core masks together to one large core mask for the cells
for index, core_centroid in enumerate(core_centroids):
segmentation_dict_cells_whole[module] = overlay_images_at_centroid(
whole_core_mask_cells,
segmentation_dict_cells[module][index],
core_centroids[index][0],
core_centroids[index][1],
)
# determine number of cells from segmentation masks
num_cells = num_cells + np.amax(segmentation_dict_cells[module][index])
for index, core_centroid in enumerate(core_centroids):
segmentation_dict_nuclei_whole[module] = overlay_images_at_centroid(
whole_core_mask_nuclei,
segmentation_dict_nuclei[module][index],
core_centroids[index][0],
core_centroids[index][1],
)
# determine number of cores from segmentation masks
num_nuclei = num_nuclei + np.amax(
segmentation_dict_nuclei[module][index]
)
# TEMPORARY SOLUTION UNTIL MULTIPLE IMAGE SELECTION/SEGMENTATION ALGORITHM COMPARISION IS IMPLEMENTED IN THE FRONTEND
# TODO
segmentation_cells = list(segmentation_dict_cells_whole.values())[0]
segmentation_nuclei = list(segmentation_dict_nuclei_whole.values())[0]
else:
segmentation_cells = list(segmentation_dict_cells.values())[0][0]
segmentation_nuclei = list(segmentation_dict_nuclei.values())[0][0]
num_cells = np.amax(segmentation_cells)
num_nuclei = np.amax(segmentation_nuclei)
# SCALING IMAGE
# getting image dimensions
image_width = whole_image.shape[0]
image_height = whole_image.shape[1]
image_size = whole_image.size
# file size per pixel
image_factor = image_file_size / image_size
# initialize variables for downscaling
scaled_file_size = image_file_size
scale_factor = 1
# calculate scaling factor to match maximum file size
while scaled_file_size > max_image_size:
scale_factor = scale_factor * 0.98
scaled_width = image_width * scale_factor
scaled_height = image_height * scale_factor
scaled_file_size = scaled_width * scaled_height * image_factor
# scale images and masks
print(f"[report] Scaling image to {int(scaled_width)}x{int(scaled_height)} pixels")
scaled_image_size = (int(scaled_width), int(scaled_height))
whole_image_resized = resize(whole_image, scaled_image_size)
if tma_mode:
whole_core_mask_resized = resize(whole_core_mask, scaled_image_size)
# HIGH RES SAMPLE
blurry_image = GaussianBlur(
whole_image, (5, 5), 0
) # blurring makes it less susceptible to outlier bright pixels
# find the maximum intensity values in the image, get its coordinates
maxLoc = np.unravel_index(np.argmax(blurry_image, axis=None), blurry_image.shape)
# calculate the corners of a square around maxLoc
p1 = (int(maxLoc[1] - (side_length / 2)), int(maxLoc[0] - (side_length / 2)))
p2 = (int(maxLoc[1] + (side_length / 2)), int(maxLoc[0] + (side_length / 2)))
# subset the images + masks with the corner points
high_res_crop = whole_image[p1[1] : p2[1], p1[0] : p2[0]]
high_res_segmentation_mask_cells = segmentation_cells[p1[1] : p2[1], p1[0] : p2[0]]
high_res_segmentation_mask_nuclei = segmentation_nuclei[p1[1] : p2[1], p1[0] : p2[0]]
# PLOTTING
# prepare overview figure
Overview_plot = go.Figure()
Overview_plot.add_trace(go.Heatmap(z=whole_image_resized, colorscale="Viridis"))
# reverse the y axis, so the image is not upside dowsn
Overview_plot.update_yaxes(autorange="reversed")
Overview_plot.update_layout(yaxis=dict(scaleanchor="x"))
# add dropdown
Overview_plot.update_layout(
default_plot_layout,
updatemenus=[
dict(
buttons=list(
[
# Button 1: "Overview"
dict(
label="Overview",
method="update",
# new data to display when the button is chosen
args=[{"z": [whole_image_resized]}],
),
# Button 2: "High res sample"
dict(
label="High res sample",
method="update",
# new data to display when the button is chosen
args=[
{
"z": [high_res_crop],
}
],
),
]
),
# Button layout
direction="down",
# large padding to the left so it looks better on the page
pad={
"l": 250,
},
showactive=True,
x=0,
xanchor="left",
y=1.2,
yanchor="top",
bgcolor="white",
)
],
)
# TMA
if tma_mode:
TMA_plot = plot_core_overlay(whole_image_resized, whole_core_mask_resized)
TMA_text = "Image"
else:
TMA_text = "TMA mode off or no TMA data could be found. Run MCMICRO--tma to process TMA data."
# Segmentation
segmentation_plot = plot_cell_contours_plotly(
high_res_crop,
high_res_segmentation_mask_cells,
quantification[0],
marker_to_plot,
0,
)
# ASSEMBLE/GENERATE HTML
# Overview
Overview_html = Overview_plot.to_html(
config={"doubleClick": "reset", "scrollZoom": True, "displayModeBar": False},
full_html=False,
)
# TMA
if tma_mode:
TMA_html = TMA_plot.to_html(
config={"doubleClick": "reset", "scrollZoom": True, "displayModeBar": False},
full_html=False,
)
else:
TMA_html = ""
# Segmentation
segmentation_html = segmentation_plot.to_html(
config={
"doubleClick": "reset",
"scrollZoom": True,
"displayModeBar": False,
},
full_html=False,
)
# parameters for the jinja template
html_parameters = {
"num_cores": num_cores,
"num_cells": num_cells,
"num_nuclei": num_nuclei,
"Overview": Overview_html,
"TMA": TMA_html,
"Segmentation": segmentation_html,
"TMA_text": TMA_text,
}
# read the jinja template
TEMPLATE_FILE = "./mcmicrogui/template.html" # TODO WARNING THIS PATH IS REQUIRED FOR THE APPLICATION TO RUN IN A CONTAINER. IT DOES NOT WORK WHEN DEBUGGING LOCALLY!
# TEMPLATE_FILE = "template.html" # TODO USE THIS PATH FOR LOCAL DEBUGGING!
template = env.get_template(TEMPLATE_FILE)
# pass the parameters and generate the html
report_html_code = template.render(html_parameters=html_parameters)
# write the report file
output_path = Path(f"{args.output}/report.html")
with open(output_path, "w") as fh:
fh.write(report_html_code)
# open it in the default browser
browser.open(output_path)
# stop the timer
stop = timeit.default_timer()
elapsed = "{:.2f}".format(stop - start)
print(f"[report] Report generated in {elapsed} seconds")