-
Notifications
You must be signed in to change notification settings - Fork 0
/
extract_patches.py
executable file
·100 lines (80 loc) · 3.16 KB
/
extract_patches.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
"""extract_patches.py
Patch extraction script.
"""
import re
import glob
import os
import tqdm
import pathlib
import numpy as np
from misc.patch_extractor import PatchExtractor
from misc.utils import rm_n_mkdir
from dataset import get_dataset
# -------------------------------------------------------------------------------------
if __name__ == "__main__":
# Determines whether to extract type map (only applicable to datasets with class labels).
type_classification = True
win_size = [540, 540]
step_size = [164, 164]
extract_type = "mirror" # Choose 'mirror' or 'valid'. 'mirror'- use padding at borders. 'valid'- only extract from valid regions.
# Name of dataset - use Kumar, CPM17 or CoNSeP.
# This used to get the specific dataset img and ann loading scheme from dataset.py
dataset_name = "tabsap"
save_root = "/data/hovernet_training_data/%s/" % dataset_name
# a dictionary to specify where the dataset path should be
dataset_info = {
"train": {
"img": (".tif", "/data/tabsap/train/images/"),
"ann": (".mat", "/data/tabsap/train/labels/"),
},
"valid": {
"img": (".tif", "/data/tabsap/test/images/"),
"ann": (".mat", "/data/tabsap/test/labels/"),
},
}
patterning = lambda x: re.sub("([\[\]])", "[\\1]", x)
parser = get_dataset(dataset_name)
xtractor = PatchExtractor(win_size, step_size)
for split_name, split_desc in dataset_info.items():
img_ext, img_dir = split_desc["img"]
ann_ext, ann_dir = split_desc["ann"]
out_dir = "%s/%s/%s/%dx%d_%dx%d/" % (
save_root,
dataset_name,
split_name,
win_size[0],
win_size[1],
step_size[0],
step_size[1],
)
file_list = glob.glob(patterning("%s/*%s" % (ann_dir, ann_ext)))
file_list.sort() # ensure same ordering across platform
rm_n_mkdir(out_dir)
pbar_format = "Process File: |{bar}| {n_fmt}/{total_fmt}[{elapsed}<{remaining},{rate_fmt}]"
pbarx = tqdm.tqdm(
total=len(file_list), bar_format=pbar_format, ascii=True, position=0
)
for file_idx, file_path in enumerate(file_list):
base_name = pathlib.Path(file_path).stem
img = parser.load_img("%s/%s%s" % (img_dir, base_name, img_ext))
ann = parser.load_ann(
"%s/%s%s" % (ann_dir, base_name, ann_ext), type_classification
)
# *
img = np.concatenate([img, ann], axis=-1)
sub_patches = xtractor.extract(img, extract_type)
pbar_format = "Extracting : |{bar}| {n_fmt}/{total_fmt}[{elapsed}<{remaining},{rate_fmt}]"
pbar = tqdm.tqdm(
total=len(sub_patches),
leave=False,
bar_format=pbar_format,
ascii=True,
position=1,
)
for idx, patch in enumerate(sub_patches):
np.save("{0}/{1}_{2:03d}.npy".format(out_dir, base_name, idx), patch)
pbar.update()
pbar.close()
# *
pbarx.update()
pbarx.close()