test_coverage.py

import time
import pandas as pd

from test_model_loader import *

from constants import *
from gnn_setup import *

def sequential_greedy_action_finder(grid, robot_pos, fov, get_mask=False):
    '''
    Function to greedily find actions for all robot.
    For a robot, find an action which results in covring most number of targets.
    Remove the targets from the grid.
    Repeat for next robot.
    Parameters
    ----------
        grid: 2D grid containing rewards
        robot_pos: Current position for each robot on the grid (NUM_ROBOTx2 size vector)
        fov: Field of View in each direction. fov=2 results in 5x5 grid centered at robot location
        
    Returns
    -------
        robot_acts: List of actions for each robot
        reward: Reward over the greedy 
    '''
    
    grid_size = grid.shape[0]
    # Get the number of robots
    n_rob = robot_pos.shape[0] #NUM_ROBOT
    # Copy teh original grid for later use
    orig_grid = grid.copy()
    # List of actions
    act_list = ['Stay', 'Up', 'Down', 'Left', 'Right']
    # List to save the actiosn for each robot
    robot_acts = []
    # Array to save the robot locations
    new_pos = np.zeros(robot_pos.shape, dtype=robot_pos.dtype)
    
    # mask to help with calucating the reward
    mask = np.zeros(grid.shape, dtype=int)
    
    # copying grid to avoid overwriting
    grid_copy = grid.copy()
    
    # Iterate over each robot
    for i_rob in range(len(robot_pos)):
        # List to save reward for each action
        temp_reward_list = []
        temp_mask_list  = []
        temp_grid_list = []
        # iterate over each action
        for i_act, act in enumerate(act_list):
            # Copy the mask to avoid overwriting
            temp_mask = np.zeros(grid.shape, dtype=int)#mask.copy()
            # Copy the grid to avoid overwriting
            temp_grid = grid_copy.copy()
            # copy current pos
            c_pos = robot_pos[i_rob]
            # Move robot to new location according to the action
            n_pos = c_pos + DIR_DICT[i_act]
            # Keep the robot within the grid by limiting x-y coordinated in range [0,grid size)
            n_pos = n_pos.clip(min=0, max=grid_size-1)
            
            # Calculate the bounding box ranges for the box generated by robot moving from the current location (c_pos) to new location (n_pos)
            # This box has a padding of size FOV on each size
            r_lim_lef = max(0, min(c_pos[0]-fov, n_pos[0]-fov))
            c_lim_top = max(0, min(c_pos[1]-fov, n_pos[1]-fov))
            r_lim_rgt = min(max(c_pos[0]+fov+1, n_pos[0]+fov+1), grid_size)
            c_lim_bot = min(max(c_pos[1]+fov+1, n_pos[1]+fov+1), grid_size)
            
            # Set the locations withing mask (i.e. witing robot's vision when it moved) to 1
            temp_mask[r_lim_lef:r_lim_rgt, c_lim_top:c_lim_bot] = 1
            
            # Find reward. It is the number of total targets in robot's FOV
            reward = np.sum(temp_grid*temp_mask)
            # Add the reward to the list
            temp_reward_list.append(reward)
            # Add the temporary mask to the list
            temp_mask_list.append(temp_mask)
            # Remove targets and add teh updated grid to the list
            temp_grid[r_lim_lef:r_lim_rgt, c_lim_top:c_lim_bot] = 0
            temp_grid_list.append(temp_grid)
            
        # Find the best action = argmax(reward list)
        best_act = np.argmax(temp_reward_list)
        # Save this action as the Robot's action as per greedy algorithm
        robot_acts.append(best_act)
        # Update the mask
#         mask = temp_mask_list[best_act].copy()
        # update the grid
        grid_copy = temp_grid_list[best_act].copy()
    
    # Find the reward of the actions
    reward = calculate_reward(orig_grid, robot_pos, robot_acts, fov=fov)
    
    if get_mask:
        return robot_acts, reward, mask
    # return robot_acts, new_pos, reward
    return robot_acts, reward

def decent_greedy_action_finder(grid, robot_pos, adj_mat, nFilterTaps, feat_vec, fov=FOV, step=STEP, comm_range=COMM_RANGE, verbose=False):
    # Get list of K hop neighbours (cutoff) for robot id r_idx and sort them
    n_robots = len(robot_pos) # Number of robots 
    robot_rel_rwd_list = []
    robot_nbr_pos_list = []
    group_time_list = []
    ## Processing data for each robot
    processsing_start_time_list = []
    for i_rob in range(n_robots):
        processsing_start_time = time.time()
        rwd_dist_rel, rob_dist_rel = feat_vec[i_rob,:20], feat_vec[i_rob,20:]
        rwd_dist_rel = rwd_dist_rel[(rwd_dist_rel != np.array([-1,-1])).all(axis=1)] * (fov+step)
        robot_rel_rwd_list.append(rwd_dist_rel)

        rob_dist_rel = rob_dist_rel[(rob_dist_rel != np.array([-1,-1])).all(axis=1)] * comm_range
        robot_nbr_pos_list.append(rob_dist_rel)
        processsing_start_time_list.append(time.time() - processsing_start_time)
    
    processsing_start_time = np.max(processsing_start_time_list)
    action_lists = [] # Placeholder list to save action for each robot
    for r_idx in range(n_robots): # iterate over each robot
        
        nbr_id_list = []

        # Creating a list of reward positions
        local_reward_map =  robot_pos[r_idx]+robot_rel_rwd_list[r_idx].copy()

        nbr_comm_time_list = [0]
        # Getting list of rewards from the neighbors
        for nbr_dist in robot_nbr_pos_list[r_idx]:
            # Getting neighbor ID to find the corresponding rewards list
            nbr_id = np.where((robot_pos == robot_pos[r_idx] + nbr_dist).all(axis=1))[0][0]
            nbr_id_list.append(nbr_id)

            # Getting the list of rewards from the neighbors and adding it
            nbr_comm_start_time = time.time()
            nbr_rwd_dist_rel = robot_rel_rwd_list[nbr_id]
            local_reward_map = np.concatenate([local_reward_map, nbr_rwd_dist_rel+nbr_dist+robot_pos[r_idx]])
            nbr_comm_time_list.append(time.time() - nbr_comm_start_time) 

        nbr_comm_time = np.max(nbr_comm_time_list)
        
        local_reward_map = local_reward_map.astype(int)
        # Getting the order of search
        search_order = sorted(nbr_id_list)

        # Add the current robot ID to the end
        nbr_idx = search_order + [r_idx]
        if verbose:
            print(f'Robot #{r_idx} has {len(nbr_idx)} neighbors with {nFilterTaps}-hops after sorting and filtering' )

        # Create a subset of locations for the K-hop group. It is an array of 2-D location of size nbr_idx
        k_hop_nbr_pos = robot_pos[nbr_idx]

        # Run centralized greedt on this subset. Grid and FoV are same
        algo_start_time = time.time()
        
        #Copy data into grid
        local_grid = 0*grid.copy()
        local_grid[local_reward_map[:,0], local_reward_map[:,1]] = 1
        
        k_greedy_acts, k_greedy_rwd, mask = sequential_greedy_action_finder(grid=grid, robot_pos=k_hop_nbr_pos, fov=fov, get_mask=True)
        
        algo_time = time.time() - algo_start_time
        # print(f' Rob#{r_idx}, Algo time: {algo_time}, nbr: {nbr_idx}')
        group_time_list.append(nbr_comm_time + algo_time)

        if verbose:
            mask_list.append(mask)

        if verbose:
            print(f'Algo gave {len(k_greedy_acts)} actions')

        # r_idx robot is the first in the search order due to sorting and filtering
        action_lists.append(k_greedy_acts[-1])

    # Calculate the total reward by decentralized greedy actions
    decent_reward = calculate_reward(grid, robot_pos, action_list=action_lists, fov=fov) 

    if verbose:
        return action_lists, decent_reward, mask_list
    return action_lists, decent_reward, processsing_start_time + np.max(group_time_list)


arg_parser.add_argument('--data_path', type=str, help='Location where the pickle files for the coverage test data is saved.')
arg_parser.add_argument('--algo', type=str, default='all', help="Select the algorithm to run: `Centrl-gre`, `Random`, `Decen-gre`, `GNN`. Default option is `all` to run all the algorithms.")


args = arg_parser.parse_args()
config = process_config(args)

num_robot = args.num_agents
#### Loading data files
robx_grid = pickle.load(open(f'{args.data_path}/grid_data.pkl', 'rb'))
robx_robot_pos = pickle.load(open(f'{args.data_path}/robot_pos_data.pkl', 'rb'))
robx_feat_data = pickle.load(open(f'{args.data_path}/model_data.pkl', 'rb'))
robx_action = pickle.load(open(f'{args.data_path}/action_data.pkl', 'rb'))


if args.algo == 'Centrl-gre' or args.algo == 'all':
    time_list = []
    action_list = []
    reward_list = []

    for idx in tqdm(range(len(robx_grid))):
        grid = robx_grid[idx]
        robot_pos = robx_robot_pos[idx]
        start_time = time.time()
        cent_action, cent_rwd = centralized_greedy_action_finder(grid, robot_pos, fov=FOV)
        run_time = (time.time() - start_time)

        time_list.append(run_time)
        reward_list.append(cent_rwd)

    print('Centralized Greedy')
    print(f'Avg. Target Covered: {np.mean(reward_list)}')
    print(f'Avg. Time: {np.mean(time_list)}')
    print()

if args.algo == 'Random' or args.algo == 'all':
    time_list = []
    action_list = []
    reward_list = []

    for idx in tqdm(range(len(robx_grid))):
        grid = robx_grid[idx]
        robot_pos = robx_robot_pos[idx]
        
        start_time = time.time()
        rand_act = np.random.randint(1,5, (num_robot,))
        rand_rwd = calculate_reward(grid, robot_pos, rand_act, fov=FOV, get_mask=False)
        run_time = (time.time() - start_time)/num_robot

        time_list.append(run_time)
        reward_list.append(rand_rwd)

    print('Random')
    print(f'Avg. Target Covered: {np.mean(reward_list)}')
    print(f'Avg. Time: {np.mean(time_list)}')
    print()
    
if args.algo == 'Decent-gre' or args.algo == 'all':
    time_list = []
    action_list = []
    reward_list = []

    for idx in tqdm(range(len(robx_grid))):
        grid = robx_grid[idx]
        robot_pos = robx_robot_pos[idx]
        adj_mat = robx_feat_data[1][idx]
        feat_vec = robx_feat_data[0][idx]
        
        decent_action, decent_rwd, run_time = decent_greedy_action_finder(grid, robot_pos, adj_mat, feat_vec=feat_vec, nFilterTaps=args.nGraphFilterTaps-1, verbose=False)

        time_list.append(run_time)
        reward_list.append(decent_rwd)

    print('Decentralized Greedy')
    print(f'Avg. Target Covered: {np.mean(reward_list)}')
    print(f'Avg. Time: {np.mean(time_list)}')
    print()


if args.algo == 'GNN' or args.algo == 'all':
    time_list = []
    action_list = []
    reward_list = []
    if config.cuda:
        config.device = torch.device('cuda:0')
    else:
        config.device = torch.device('cpu')    
        
    pred_action_list, pred_time_list = get_prediction(config=config, feat_raw=robx_feat_data[0], adj_mat=robx_feat_data[1], device=config.device)
    
    pred_rwd_list = []
    for i in range(robx_grid.shape[0]):
        grid = robx_grid[i]
        robot_pos = robx_robot_pos[i]
        
        rwd = calculate_reward(grid, robot_pos, pred_action_list[i][0], fov=FOV, get_mask=False)
        pred_rwd_list.append(rwd)

    time_list = pred_time_list
    reward_list = pred_rwd_list

    print('GNN')
    print(f'Avg. Target Covered: {np.mean(reward_list)}')
    print(f'Avg. Time: {np.mean(time_list)}')
    print()