Cleaning the features file

PrimeF · Mar 28, 2021 · 569af52 · 569af52
1 parent 21d9930
commit 569af52
Show file tree

Hide file tree

Showing 4 changed files with 7 additions and 52 deletions.
diff --git a/agent_code/auto_bomber/feature_engineering.py b/agent_code/auto_bomber/feature_engineering.py
@@ -15,13 +15,6 @@ def state_to_features(game_state: dict) -> np.array:
     :param game_state:  A dictionary describing the current game board.
     :return: np.array
     """
-    #############
-    #   NOTES   #
-    #############
-    # Coins zones signal very weak! -> Used softmax, which keeps 0.0 by using -np.inf
-    # Add coins to crates --> not good, need to know where crates are, are distinct from coins as need to be exploded
-
-    # This is the dict before the game begins and after it ends
     if game_state is None:
         # todo we need another representation for final state here!
         return np.random.rand(21)
@@ -32,7 +25,6 @@ def state_to_features(game_state: dict) -> np.array:
     agent_position = np.asarray(game_state['self'][3], dtype='int')
     agent_bomb_action = np.asarray(game_state['self'][2], dtype='int')
     bombs_position = np.atleast_2d(np.asarray([list(bomb[0]) for bomb in game_state['bombs']], dtype='int'))
-    bombs_countdown = np.asarray([bomb[1] for bomb in game_state['bombs']])
     explosions_position = np.argwhere(game_state['explosion_map'] > 0)
     coins_position = np.atleast_2d(np.array(game_state['coins'], dtype='int'))
     relevant_coins_position = coins_position[~np.isin(coins_position, agent_position).all(axis=1)]
@@ -43,51 +35,38 @@ def state_to_features(game_state: dict) -> np.array:
     opponents_bomb_action = np.asarray([player[2] for player in game_state['others']])
     opponents_bomb_action = np.where(opponents_bomb_action, weight_opponents_with_bomb, 1.0)
 
-    # TODO HUUUUUUUUUGE!!!!!!! --> Switch distances from euclidean to a path finding algorithm
-    # https://pypi.org/project/pathfinding/
-
-    # TODO Make BOMB_POWER dynamic from settings.py
+    #############################################
+    #                 DISCARDED                 #
+    # Bombs zones logic:                        #
+    # Due to bad performance in empirical tests #
+    #############################################
+    #
+    #
     # bombs_zones = _compute_zones_heatmap(agent_position, bombs_position, 0.0,
     #                                      lambda v, w: np.where(v > 0., v[(3 + w) - v >= 0] ** w[(3 + w) - v >= 0], 0.0),
     #                                      bombs_countdown,
     #                                      lambda v: np.mean(v[v != 0.0]) if v[v != 0.0].size != 0 else 0.0,
     #                                      lambda v: -1 * np.divide(1, v, out=np.zeros_like(v), where=v != 0))
 
-    # TODO Does not account for how many coins there are in the zone
     coins_zones = _compute_zones_heatmap(agent_position, relevant_coins_position, 0.0,
-                                         # aggregation_func=lambda v: np.mean(v[v != 0.0]) if v[v != 0.0].size != 0 else 0.0,
                                          aggregation_func=lambda v: np.mean(v) if v.size != 0 else 0.0,
-                                         # normalization_func=lambda v: softmax(
-                                         #     np.divide(1, v, out=np.full_like(v, -np.inf), where=v != 0)) if np.all(
-                                         #     v != 0.0) else v)
                                          normalization_func=lambda v: np.divide(1, v, out=np.zeros_like(v), where=v != 0))
     crates_zones = _compute_zones_heatmap(agent_position, crates_position, 0.0,
                                           aggregation_func=lambda v: np.mean(v) if v.size != 0 else 0.0,
-                                          # normalization_func=lambda v: softmax(
-                                          #     np.divide(1, v, out=np.full_like(v, -np.inf), where=v != 0)))
                                           normalization_func=lambda v: np.divide(1, v, out=np.zeros_like(v), where=v != 0))
     opponents_zones = _compute_zones_heatmap(agent_position, opponents_position, 0.0,
                                              weighting_func=lambda v, w: v * w,
                                              weights=opponents_bomb_action,
                                              aggregation_func=lambda v: np.mean(v) if v.size != 0 else 0.0,
-                                             # normalization_func=lambda v: np.divide(v, np.max(v), out=np.zeros_like(v), where=v != 0))
                                              normalization_func=lambda v: np.divide(1, v, out=np.zeros_like(v), where=v != 0))
 
-    # TODO Evaluate if weighting bombs also here by their countdown
-    # TODO Exclude bombs which are not relevant (!!!!)
-
-    # TODO Field of view, not only says unwanted position but also says go towards position
     bombs_field_of_view = _object_in_field_of_view(agent_position, bombs_position, 0.0,
                                                    lambda v, w: -1 * np.divide(1, v, out=np.zeros_like(v),
                                                                                where=v != 0),
                                                    None)
     explosion_field_of_view = _object_in_field_of_view(agent_position, explosions_position, 0.0)
     coins_field_of_view = _object_in_field_of_view(agent_position, relevant_coins_position, 0.0,
                                                    lambda v, w: np.divide(1, v, out=np.zeros_like(v), where=v != 0),
-                                                   # lambda v, w: softmax(
-                                                   #     np.divide(1, v, out=np.full_like(v, -np.inf),
-                                                   #               where=v != 0)) if np.all(
-                                                   #     v != 0.0) else v,
                                                    None)
     crates_field_of_view = _object_in_field_of_view(agent_position, crates_position, 0.0,
                                                     lambda v, w: np.divide(1, v, out=np.zeros_like(v), where=v != 0),
@@ -97,9 +76,6 @@ def state_to_features(game_state: dict) -> np.array:
                                                         None)
     walls_field_of_view = _object_in_field_of_view(agent_position, walls_position, 0.0)
 
-    # OPTION: Incorporate obstacles in features (by setting direction of obstacle = -1)
-    #         Issue: Unable to distinguish when in front of walls or in front of crates --> important distinction for
-    #                bombs dropping
     f_obstacles = np.zeros((4,))
     f_obstacles[walls_field_of_view == 1.] = -1.
     f_obstacles[explosion_field_of_view == 1.] = -1.
@@ -115,18 +91,11 @@ def state_to_features(game_state: dict) -> np.array:
     new_bombs_field_of_view[bombs_field_of_view == -1.] = -1.
     f_bombs = new_bombs_field_of_view
 
-    # f_bombs = np.sum(np.vstack((bombs_zones, new_bombs_field_of_view)), axis=0)
-    # if not np.all((f_bombs == 0.0)):
-    #     f_bombs = np.where(f_bombs == 0.0, np.inf, f_bombs)
-    #     f_bombs = -1 * softmax(-1 * f_bombs)
-    # f_bombs[new_bombs_field_of_view == -1.0] = -1.0
-
     f_coins = np.sum(np.vstack((coins_zones, 5 * coins_field_of_view)), axis=0)
     f_coins[walls_field_of_view == 1.] = 0.
     if not np.all((f_coins == 0.)):
         f_coins = np.where(f_coins == 0., -np.inf, f_coins)
         f_coins = softmax(f_coins)
-    # f_coins[walls_field_of_view == 1.] = -1.
 
     f_crates = np.sum(np.vstack((crates_zones, 5 * crates_field_of_view)), axis=0)
     f_crates[walls_field_of_view == 1.] = 0.
@@ -181,7 +150,6 @@ def _compute_zones_heatmap(agent_position, objects_position, initial, weighting_
     if objects_position.size == 0:
         return zones
 
-    # distances = np.linalg.norm(agent_position - objects_position, axis=1)
     agent_position = np.atleast_2d(agent_position)
     distances = cdist(agent_position, objects_position, 'cityblock').squeeze(axis=0)
     agent_position = agent_position[0]
@@ -246,23 +214,19 @@ def _object_in_field_of_view(agent_position, objects_position, initial, normaliz
     # Directions are actual directions, i.e. after translation of framework fields
     objects_down = objects_on_x[np.where(objects_on_x[:, 1] >= agent_position[0, 1])]
     if not objects_down.size == 0:
-        # field_of_view[1] = np.linalg.norm(agent_position - objects_down, axis=1).min()
         field_of_view[1] = cdist(agent_position, objects_down, 'cityblock').squeeze(axis=0).min()
     objects_up = objects_on_x[np.where(objects_on_x[:, 1] <= agent_position[0, 1])]
     if not objects_up.size == 0:
-        # field_of_view[3] = np.linalg.norm(agent_position - objects_up, axis=1).min()
         field_of_view[3] = cdist(agent_position, objects_up, 'cityblock').squeeze(axis=0).min()
 
     # Coordinate y is as of the framework field
     objects_on_y = objects_position[np.where(objects_position[:, 1] == agent_position[0, 1])]
     # Directions are actual directions, i.e. after translation of framework fields
     objects_right = objects_on_y[np.where(objects_on_y[:, 0] >= agent_position[0, 0])]
     if not objects_right.size == 0:
-        # field_of_view[0] = np.linalg.norm(agent_position - objects_right, axis=1).min()
         field_of_view[0] = cdist(agent_position, objects_right, 'cityblock').squeeze(axis=0).min()
     objects_left = objects_on_y[np.where(objects_on_y[:, 0] <= agent_position[0, 0])]
     if not objects_left.size == 0:
-        # field_of_view[2] = np.linalg.norm(agent_position - objects_left, axis=1).min()
         field_of_view[2] = cdist(agent_position, objects_left, 'cityblock').squeeze(axis=0).min()
 
     if normalization_func:

diff --git a/agent_code/auto_bomber/production/42/config.py b/agent_code/auto_bomber/production/42/config.py
diff --git a/agent_code/auto_bomber/production/42/weights.pt b/agent_code/auto_bomber/production/42/weights.pt
diff --git a/agent_code/auto_bomber/train.py b/agent_code/auto_bomber/train.py
@@ -85,8 +85,6 @@ def reward_from_events(self, events: List[str]) -> int:
     Here you can modify the rewards your agent get so as to en/discourage
     certain behavior.
     """
-    # q: how to determine the winner?
-
     rewards_dict = self.model.hyper_parameters["game_rewards"]
     reward_sum = 0
     for event in events: