-
Notifications
You must be signed in to change notification settings - Fork 16
/
demo.py
executable file
·217 lines (202 loc) · 9.09 KB
/
demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
#!/usr/bin/env python3
import cv2
from math import atan2, degrees
import sys
sys.path.append("../..")
from MovenetDepthai import MovenetDepthai, KEYPOINT_DICT
from MovenetRenderer import MovenetRenderer
import argparse
import numpy as np
def estimate_focus_zone_size(body, scale, score_thresh):
"""
This function estimate the zine of the zone.
We calculate the length of segments from a predefined list. A segment length
is the distance between the 2 endpoints weighted by a coefficient. The weight have been chosen
so that the weighted length length of all segments are roughly equal.
We take the maximal length to estimate the size of the focus zone.
If no segment are visible, we consider the body is very close
to the camera, and therefore there is no need to focus. Return 0
To not have at least one shoulder and one hip visible means the body is also very close
and the estimated size needs to be adjusted (bigger)
"""
segments = [
("left_shoulder", "left_elbow", 2.3),
("left_elbow", "left_wrist", 2.3),
("left_shoulder", "left_hip", 1),
("left_shoulder", "right_shoulder", 1.5),
("right_shoulder", "right_elbow", 2.3),
("right_elbow", "right_wrist", 2.3),
("right_shoulder", "right_hip", 1),
]
lengths = []
for s in segments:
if body.scores[KEYPOINT_DICT[s[0]]] > score_thresh and body.scores[KEYPOINT_DICT[s[1]]] > score_thresh:
l = np.linalg.norm(body.keypoints[KEYPOINT_DICT[s[0]]] - body.keypoints[KEYPOINT_DICT[s[1]]])
lengths.append(l)
if lengths:
if ( body.scores[KEYPOINT_DICT["left_hip"]] < score_thresh and
body.scores[KEYPOINT_DICT["right_hip"]] < score_thresh or
body.scores[KEYPOINT_DICT["left_shoulder"]] < score_thresh and
body.scores[KEYPOINT_DICT["right_shoulder"]] < score_thresh) :
coef = 1.5
else:
coef = 1.0
return 2 * int(coef * scale * max(lengths) / 2) # The size is made even
else:
return 0
def get_focus_zone(body, frame, hand_label, scale, score_thresh, hands_up_only = False):
"""
Return a list of zones around one or 2 hands depending on the value of hand_label.
If hand_label == "left_right", the list contains at most 2 zones, the zone around the left hand,
and the zone around the right hand.
For all othe values of hand_label, the list contains at most one zone.
If hand_value == "left" (resp "right"), this is the zone around the left (resp right) hand.
If hand_value == "higher", this is the zone around the hand closest to the top of the image.
If hand_value == "group", this a larger zone that contains both hands, or only one hand if an hand is not visible.
A zone is a list [left, top, right, bottom] describing the position in pixels of the zone in the image.
"""
def zone_from_center_size(x, y, size):
"""
Calculate the top left corner (x1, y1) and botom right corner (x2, y2) of the zone from its center and size
"""
half_size = size // 2
size = half_size * 2
x1 = x - half_size
x2 = x + half_size -1
if x1 < 0:
x1 = 0
x2 = size - 1
if x2 >= w:
x2 = w - 1
x1 = w - size
y1 = y - half_size
y2 = y + half_size
if y1 < 0:
y1 = 0
y2 = size - 1
if y2 >= h:
y2 = h - 1
y1 = h - size
return [x1, y1, x2, y2]
def get_one_hand_zone(hand_label, scale, hands_up_only):
"""
Determine the zone around the "hand_label" (left of right) hand.
Return [left, top, right, bottom] of the zone
or None if the zone could not be determined
"""
wrist_kp = hand_label + "_wrist"
wrist_score = body.scores[KEYPOINT_DICT[wrist_kp]]
if wrist_score < score_thresh:
return None
x, y = body.keypoints[KEYPOINT_DICT[wrist_kp]]
if hands_up_only:
# We want to detect only hands where the wrist is above the elbow (when visible)
elbow_kp = hand_label + "_elbow"
if body.scores[KEYPOINT_DICT[elbow_kp]] > score_thresh and \
body.keypoints[KEYPOINT_DICT[elbow_kp]][1] < body.keypoints[KEYPOINT_DICT[wrist_kp]][1]:
return None
# Let's evaluate the size of the focus zone
size = estimate_focus_zone_size(body, scale, score_thresh)
if size == 0: return [0, 0, frame_size-1, frame_size-1] # The hand is too close. No need to focus
return zone_from_center_size(x, y, size)
h,w = frame.shape[:2]
frame_size = max(h, w)
zone_list = []
if hand_label == "group":
zonel = get_one_hand_zone("left", scale, hands_up_only)
if zonel:
zoner = get_one_hand_zone("right", scale, hands_up_only)
if zoner:
xl1, yl1, xl2, yl2 = zonel
xr1, yr1, xr2, yr2 = zoner
x1 = min(xl1, xr1)
y1 = min(yl1, yr1)
x2 = max(xl2, xr2)
y2 = max(yl2, yr2)
# Center (x,y)
x = int((x1+x2)/2)
y = int((y1+y2)/2)
size_x = x2-x1
size_y = y2-y1
size = 2 * (max(size_x, size_y) // 2)
zone_list.append([zone_from_center_size(x, y, size), "group"])
else:
zone_list.append([zonel, "left"])
else:
zoner = get_one_hand_zone("right", scale, hands_up_only)
if zoner:
zone_list.append([zoner, "right"])
elif hand_label == "higher":
if body.scores[KEYPOINT_DICT["left_wrist"]] > score_thresh:
if body.scores[KEYPOINT_DICT["right_wrist"]] > score_thresh:
if body.keypoints[KEYPOINT_DICT["left_wrist"]][1] > body.keypoints[KEYPOINT_DICT["right_wrist"]][1]:
hand_label = "right"
else:
hand_label = "left"
else:
hand_label = "left"
else:
if body.scores[KEYPOINT_DICT["right_wrist"]] > score_thresh:
hand_label = "right"
else:
return []
zone = get_one_hand_zone(hand_label, scale, hands_up_only)
if zone: zone_list.append([zone, hand_label])
elif hand_label == "left_right":
zoner = get_one_hand_zone("right", scale, hands_up_only)
if zoner: zone_list.append([zoner, "right"])
zonel = get_one_hand_zone("left", scale, hands_up_only)
if zonel: zone_list.append([zonel, "left"])
else: # "left" or "right"
zone_list.append([get_one_hand_zone(hand_label, scale, hands_up_only), hand_label])
return zone_list
parser = argparse.ArgumentParser()
parser.add_argument("-m", "--model", type=str, choices=['lightning', 'thunder'], default='thunder',
help="Model to use (default=%(default)s)")
parser.add_argument("-f", "--focus", type=str, choices=['left', 'right', 'group', 'higher', 'left_right'], default='higher',
help="Find square zone(s) around hand(s) (default=%(default)s)")
parser.add_argument("-s", "--scale", type=float, default=1.0,
help="Zone scaling factor (default=%(default)f)")
parser.add_argument('-u', '--hands_up_only', action="store_true",
help="Take into considerations only the hands where the wrist is above the elbow")
parser.add_argument('-c', '--crop', action="store_true",
help="Center cropping frames to a square shape (smaller size of original frame)")
parser.add_argument('-nsc', '--no_smart_crop', action="store_true",
help="Disable smart cropping from previous frame detection")
parser.add_argument('-i', '--input', type=str, default='rgb',
help="'rgb' or 'rgb_laconic' or path to video/image file to use as input (default: %(default)s)")
parser.add_argument("-o","--output",
help="Path to output video file")
args = parser.parse_args()
pose = MovenetDepthai(
input_src=args.input,
model=args.model,
crop=args.crop,
smart_crop=not args.no_smart_crop
)
score_thresh = pose.score_thresh
renderer = MovenetRenderer(pose, output=args.output)
renderer.show_fps = False
nb = 0
while True:
# Run blazepose on next frame
frame, body = pose.next_frame()
if frame is None: break
# Get the focus zone around the hand or hands we are interested in
result = get_focus_zone(body, frame, args.focus, args.scale, score_thresh, args.hands_up_only)
for zone, hand_label in result:
if zone:
if hand_label == "group":
color = (255,0,0)
elif hand_label == "right":
color = (0,0,255)
else: # left
color = (0,255,0)
cv2.rectangle(frame, tuple(zone[:2]), tuple(zone[2:]), color, 3)
# Draw 2d skeleton
frame = renderer.draw(frame, body)
key = renderer.waitKey(delay=1)
if key == 27 or key == ord('q'):
break
renderer.exit()
pose.exit()