forked from VishnuDuttSharma/deep-multirobot-task
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_coverage.py
290 lines (228 loc) · 11.1 KB
/
test_coverage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
import time
import pandas as pd
from test_model_loader import *
from constants import *
from gnn_setup import *
def sequential_greedy_action_finder(grid, robot_pos, fov, get_mask=False):
'''
Function to greedily find actions for all robot.
For a robot, find an action which results in covring most number of targets.
Remove the targets from the grid.
Repeat for next robot.
Parameters
----------
grid: 2D grid containing rewards
robot_pos: Current position for each robot on the grid (NUM_ROBOTx2 size vector)
fov: Field of View in each direction. fov=2 results in 5x5 grid centered at robot location
Returns
-------
robot_acts: List of actions for each robot
reward: Reward over the greedy
'''
grid_size = grid.shape[0]
# Get the number of robots
n_rob = robot_pos.shape[0] #NUM_ROBOT
# Copy teh original grid for later use
orig_grid = grid.copy()
# List of actions
act_list = ['Stay', 'Up', 'Down', 'Left', 'Right']
# List to save the actiosn for each robot
robot_acts = []
# Array to save the robot locations
new_pos = np.zeros(robot_pos.shape, dtype=robot_pos.dtype)
# mask to help with calucating the reward
mask = np.zeros(grid.shape, dtype=int)
# copying grid to avoid overwriting
grid_copy = grid.copy()
# Iterate over each robot
for i_rob in range(len(robot_pos)):
# List to save reward for each action
temp_reward_list = []
temp_mask_list = []
temp_grid_list = []
# iterate over each action
for i_act, act in enumerate(act_list):
# Copy the mask to avoid overwriting
temp_mask = np.zeros(grid.shape, dtype=int)#mask.copy()
# Copy the grid to avoid overwriting
temp_grid = grid_copy.copy()
# copy current pos
c_pos = robot_pos[i_rob]
# Move robot to new location according to the action
n_pos = c_pos + DIR_DICT[i_act]
# Keep the robot within the grid by limiting x-y coordinated in range [0,grid size)
n_pos = n_pos.clip(min=0, max=grid_size-1)
# Calculate the bounding box ranges for the box generated by robot moving from the current location (c_pos) to new location (n_pos)
# This box has a padding of size FOV on each size
r_lim_lef = max(0, min(c_pos[0]-fov, n_pos[0]-fov))
c_lim_top = max(0, min(c_pos[1]-fov, n_pos[1]-fov))
r_lim_rgt = min(max(c_pos[0]+fov+1, n_pos[0]+fov+1), grid_size)
c_lim_bot = min(max(c_pos[1]+fov+1, n_pos[1]+fov+1), grid_size)
# Set the locations withing mask (i.e. witing robot's vision when it moved) to 1
temp_mask[r_lim_lef:r_lim_rgt, c_lim_top:c_lim_bot] = 1
# Find reward. It is the number of total targets in robot's FOV
reward = np.sum(temp_grid*temp_mask)
# Add the reward to the list
temp_reward_list.append(reward)
# Add the temporary mask to the list
temp_mask_list.append(temp_mask)
# Remove targets and add teh updated grid to the list
temp_grid[r_lim_lef:r_lim_rgt, c_lim_top:c_lim_bot] = 0
temp_grid_list.append(temp_grid)
# Find the best action = argmax(reward list)
best_act = np.argmax(temp_reward_list)
# Save this action as the Robot's action as per greedy algorithm
robot_acts.append(best_act)
# Update the mask
# mask = temp_mask_list[best_act].copy()
# update the grid
grid_copy = temp_grid_list[best_act].copy()
# Find the reward of the actions
reward = calculate_reward(orig_grid, robot_pos, robot_acts, fov=fov)
if get_mask:
return robot_acts, reward, mask
# return robot_acts, new_pos, reward
return robot_acts, reward
def decent_greedy_action_finder(grid, robot_pos, adj_mat, nFilterTaps, feat_vec, fov=FOV, step=STEP, comm_range=COMM_RANGE, verbose=False):
# Get list of K hop neighbours (cutoff) for robot id r_idx and sort them
n_robots = len(robot_pos) # Number of robots
robot_rel_rwd_list = []
robot_nbr_pos_list = []
group_time_list = []
## Processing data for each robot
processsing_start_time_list = []
for i_rob in range(n_robots):
processsing_start_time = time.time()
rwd_dist_rel, rob_dist_rel = feat_vec[i_rob,:20], feat_vec[i_rob,20:]
rwd_dist_rel = rwd_dist_rel[(rwd_dist_rel != np.array([-1,-1])).all(axis=1)] * (fov+step)
robot_rel_rwd_list.append(rwd_dist_rel)
rob_dist_rel = rob_dist_rel[(rob_dist_rel != np.array([-1,-1])).all(axis=1)] * comm_range
robot_nbr_pos_list.append(rob_dist_rel)
processsing_start_time_list.append(time.time() - processsing_start_time)
processsing_start_time = np.max(processsing_start_time_list)
action_lists = [] # Placeholder list to save action for each robot
for r_idx in range(n_robots): # iterate over each robot
nbr_id_list = []
# Creating a list of reward positions
local_reward_map = robot_pos[r_idx]+robot_rel_rwd_list[r_idx].copy()
nbr_comm_time_list = [0]
# Getting list of rewards from the neighbors
for nbr_dist in robot_nbr_pos_list[r_idx]:
# Getting neighbor ID to find the corresponding rewards list
nbr_id = np.where((robot_pos == robot_pos[r_idx] + nbr_dist).all(axis=1))[0][0]
nbr_id_list.append(nbr_id)
# Getting the list of rewards from the neighbors and adding it
nbr_comm_start_time = time.time()
nbr_rwd_dist_rel = robot_rel_rwd_list[nbr_id]
local_reward_map = np.concatenate([local_reward_map, nbr_rwd_dist_rel+nbr_dist+robot_pos[r_idx]])
nbr_comm_time_list.append(time.time() - nbr_comm_start_time)
nbr_comm_time = np.max(nbr_comm_time_list)
local_reward_map = local_reward_map.astype(int)
# Getting the order of search
search_order = sorted(nbr_id_list)
# Add the current robot ID to the end
nbr_idx = search_order + [r_idx]
if verbose:
print(f'Robot #{r_idx} has {len(nbr_idx)} neighbors with {nFilterTaps}-hops after sorting and filtering' )
# Create a subset of locations for the K-hop group. It is an array of 2-D location of size nbr_idx
k_hop_nbr_pos = robot_pos[nbr_idx]
# Run centralized greedt on this subset. Grid and FoV are same
algo_start_time = time.time()
#Copy data into grid
local_grid = 0*grid.copy()
local_grid[local_reward_map[:,0], local_reward_map[:,1]] = 1
k_greedy_acts, k_greedy_rwd, mask = sequential_greedy_action_finder(grid=grid, robot_pos=k_hop_nbr_pos, fov=fov, get_mask=True)
algo_time = time.time() - algo_start_time
# print(f' Rob#{r_idx}, Algo time: {algo_time}, nbr: {nbr_idx}')
group_time_list.append(nbr_comm_time + algo_time)
if verbose:
mask_list.append(mask)
if verbose:
print(f'Algo gave {len(k_greedy_acts)} actions')
# r_idx robot is the first in the search order due to sorting and filtering
action_lists.append(k_greedy_acts[-1])
# Calculate the total reward by decentralized greedy actions
decent_reward = calculate_reward(grid, robot_pos, action_list=action_lists, fov=fov)
if verbose:
return action_lists, decent_reward, mask_list
return action_lists, decent_reward, processsing_start_time + np.max(group_time_list)
arg_parser.add_argument('--data_path', type=str, help='Location where the pickle files for the coverage test data is saved.')
arg_parser.add_argument('--algo', type=str, default='all', help="Select the algorithm to run: `Centrl-gre`, `Random`, `Decen-gre`, `GNN`. Default option is `all` to run all the algorithms.")
args = arg_parser.parse_args()
config = process_config(args)
num_robot = args.num_agents
#### Loading data files
robx_grid = pickle.load(open(f'{args.data_path}/grid_data.pkl', 'rb'))
robx_robot_pos = pickle.load(open(f'{args.data_path}/robot_pos_data.pkl', 'rb'))
robx_feat_data = pickle.load(open(f'{args.data_path}/model_data.pkl', 'rb'))
robx_action = pickle.load(open(f'{args.data_path}/action_data.pkl', 'rb'))
if args.algo == 'Centrl-gre' or args.algo == 'all':
time_list = []
action_list = []
reward_list = []
for idx in tqdm(range(len(robx_grid))):
grid = robx_grid[idx]
robot_pos = robx_robot_pos[idx]
start_time = time.time()
cent_action, cent_rwd = centralized_greedy_action_finder(grid, robot_pos, fov=FOV)
run_time = (time.time() - start_time)
time_list.append(run_time)
reward_list.append(cent_rwd)
print('Centralized Greedy')
print(f'Avg. Target Covered: {np.mean(reward_list)}')
print(f'Avg. Time: {np.mean(time_list)}')
print()
if args.algo == 'Random' or args.algo == 'all':
time_list = []
action_list = []
reward_list = []
for idx in tqdm(range(len(robx_grid))):
grid = robx_grid[idx]
robot_pos = robx_robot_pos[idx]
start_time = time.time()
rand_act = np.random.randint(1,5, (num_robot,))
rand_rwd = calculate_reward(grid, robot_pos, rand_act, fov=FOV, get_mask=False)
run_time = (time.time() - start_time)/num_robot
time_list.append(run_time)
reward_list.append(rand_rwd)
print('Random')
print(f'Avg. Target Covered: {np.mean(reward_list)}')
print(f'Avg. Time: {np.mean(time_list)}')
print()
if args.algo == 'Decent-gre' or args.algo == 'all':
time_list = []
action_list = []
reward_list = []
for idx in tqdm(range(len(robx_grid))):
grid = robx_grid[idx]
robot_pos = robx_robot_pos[idx]
adj_mat = robx_feat_data[1][idx]
feat_vec = robx_feat_data[0][idx]
decent_action, decent_rwd, run_time = decent_greedy_action_finder(grid, robot_pos, adj_mat, feat_vec=feat_vec, nFilterTaps=args.nGraphFilterTaps-1, verbose=False)
time_list.append(run_time)
reward_list.append(decent_rwd)
print('Decentralized Greedy')
print(f'Avg. Target Covered: {np.mean(reward_list)}')
print(f'Avg. Time: {np.mean(time_list)}')
print()
if args.algo == 'GNN' or args.algo == 'all':
time_list = []
action_list = []
reward_list = []
if config.cuda:
config.device = torch.device('cuda:0')
else:
config.device = torch.device('cpu')
pred_action_list, pred_time_list = get_prediction(config=config, feat_raw=robx_feat_data[0], adj_mat=robx_feat_data[1], device=config.device)
pred_rwd_list = []
for i in range(robx_grid.shape[0]):
grid = robx_grid[i]
robot_pos = robx_robot_pos[i]
rwd = calculate_reward(grid, robot_pos, pred_action_list[i][0], fov=FOV, get_mask=False)
pred_rwd_list.append(rwd)
time_list = pred_time_list
reward_list = pred_rwd_list
print('GNN')
print(f'Avg. Target Covered: {np.mean(reward_list)}')
print(f'Avg. Time: {np.mean(time_list)}')
print()