-
Notifications
You must be signed in to change notification settings - Fork 9
/
parse_annotation.py
163 lines (143 loc) · 6.7 KB
/
parse_annotation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import numpy as np
import os, sys
from os import listdir
from os.path import isfile, join, isdir
import cv2
import argparse
import imageio
parser = argparse.ArgumentParser()
parser.add_argument("scene_ind")
args = parser.parse_args()
source_path = 'stanford_campus_dataset/annotations/'
img_path = 'processed'
target_path = 'new_processed'
scene_index = 0
scenes = sorted([d for d in listdir(source_path) if isdir(join(source_path, d))])
categories = {'Biker': 0,
'Pedestrian': 1,
'Skater': 2,
'Cart': 3,
'Car': 4,
'Bus': 5}
categories_colors = {0: [255, 0, 0],
1: [0, 255, 0],
2: [0, 0, 255],
3: [255, 255, 0],
4: [0, 255, 255],
5: [255, 0, 255]}
target_width = 320
target_height = 576
def writeFloat(name, data):
f = open(name, 'wb')
dim=len(data.shape)
# if dim>3:
# raise Exception('bad float file dimension: %d' % dim)
f.write(('float\n').encode('ascii'))
f.write(('%d\n' % dim).encode('ascii'))
if dim == 1:
f.write(('%d\n' % data.shape[0]).encode('ascii'))
else:
f.write(('%d\n' % data.shape[1]).encode('ascii'))
f.write(('%d\n' % data.shape[0]).encode('ascii'))
for i in range(2, dim):
f.write(('%d\n' % data.shape[i]).encode('ascii'))
data = data.astype(np.float32)
if dim==2:
data.tofile(f)
elif dim==3:
np.transpose(data, (2, 0, 1)).tofile(f)
elif dim==4:
np.transpose(data, (3, 2, 0, 1)).tofile(f)
else:
raise Exception('bad float file dimension: %d' % dim)
def get_mask(width, height, bboxs, categs):
result = np.zeros((height, width, 3))
for ind, bbox in enumerate(bboxs):
try:
color = categories_colors[categs[ind]]
for i in range(bbox[0], bbox[2]):
result[bbox[1], i, :] = color
result[bbox[3], i, :] = color
for j in range(bbox[1], bbox[3]):
result[j, bbox[0], :] = color
result[j, bbox[2], :] = color
except IndexError:
print("Oops! ", bbox)
raise
return result
for i in range(len(scenes)):
videos_scene = sorted([d for d in listdir(join(source_path, scenes[i])) if isdir(join(source_path, scenes[i], d))])
for j in range(len(videos_scene)):
if scene_index != int(args.scene_ind):
scene_index += 1
continue
else:
print('processing %s' % join(scenes[i], videos_scene[j]))
if not os.path.exists(join(target_path, 'scene_%03d' % scene_index)):
os.makedirs(join(target_path, 'scene_%03d' % scene_index))
src_path = join(source_path, scenes[i], videos_scene[j])
with open(join(src_path, 'annotations.txt')) as f:
content = [x.strip() for x in f.readlines()]
length = len(content)
obj_id = np.zeros(length, int)
tl_x = np.zeros(length, int)
tl_y = np.zeros(length, int)
br_x = np.zeros(length, int)
br_y = np.zeros(length, int)
frame_id = np.zeros(length, int)
out_of_frame = np.zeros(length, int)
occluded = np.zeros(length, int)
generated = np.zeros(length, int)
category = np.zeros(length, int)
for k in range(length):
# fill the arrays by parsing each line of the annotation file
x = content[k]
res = x.split(' ')
obj_id[k] = int(res[0])
tl_x[k] = int(res[1])
tl_y[k] = int(res[2])
br_x[k] = int(res[3])
br_y[k] = int(res[4])
frame_id[k] = int(res[5])
out_of_frame[k] = int(res[6])
occluded[k] = int(res[7])
generated[k] = int(res[8])
category[k] = categories[str(res[9].replace('"',''))]
for k in range(frame_id.max()):
if k % 5 != 0:
continue
if os.path.exists(join(target_path, 'scene_%03d' % scene_index, '%07d-mask.jpg' % k)):
continue
# read and transpose the image
img = np.transpose(imageio.imread(join(img_path, 'scene_%03d' % scene_index, '%07d-img.jpg' % k)), [1, 0, 2])
height, width = img.shape[0], img.shape[1]
scale_x = width / target_width
scale_y = height / target_height
# filter gts which are occluded or out of view
indexs = np.where(frame_id==k)[0]
filtered_indexes = [ind for ind in indexs if out_of_frame[ind]==0 and occluded[ind]==0]
# compute the labels of each frame (the set of obj_ids corresponding to all objects appearing in this frame)
# compute the features of each frame containing bbox and category
labels = []
features = np.full((1154, 5), None)
bboxes = []
categs = []
for fi in filtered_indexes:
labels.append(obj_id[fi])
bbox = [int((tl_y[fi]) / scale_x),
int((tl_x[fi]) / scale_y),
min(int((br_y[fi]) / scale_x), target_width-1),
min(int((br_x[fi]) / scale_y), target_height-1)]
categs.append(category[fi])
features[obj_id[fi],:] = [bbox[0], bbox[1], bbox[2], bbox[3], category[fi]]
bboxes.append(bbox)
# create a mask image with bbox colored according to categories
mask = get_mask(target_width, target_height, bboxes, categs)
# resize the image
resized_img = cv2.resize(img, dsize=(target_width, target_height), interpolation=cv2.INTER_CUBIC)
# save the outputs: mask, resized_img, labels and features
imageio.imwrite(join(target_path, 'scene_%03d' % scene_index, '%07d-mask.jpg' % k), mask)
imageio.imwrite(join(target_path, 'scene_%03d' % scene_index, '%07d-img-resized.jpg' % k), resized_img)
writeFloat(join(target_path, 'scene_%03d' % scene_index, '%07d-labels.float3' % k), np.array(labels).reshape((len(labels),1)))
writeFloat(join(target_path, 'scene_%03d' % scene_index, '%07d-features.float3' % k), features)
scene_index += 1