From 569af526cc7470dd59f90dcae727d9ad684e9f7f Mon Sep 17 00:00:00 2001
From: Fabrizio Primerano <fabri.96p@gmail.com>
Date: Sun, 28 Mar 2021 14:14:32 +0200
Subject: [PATCH] Cleaning the features file

---
 agent_code/auto_bomber/feature_engineering.py |  50 +++---------------
 .../auto_bomber/production/42/config.py       |   7 ---
 .../auto_bomber/production/42/weights.pt      | Bin 776 -> 0 bytes
 agent_code/auto_bomber/train.py               |   2 -
 4 files changed, 7 insertions(+), 52 deletions(-)
 delete mode 100644 agent_code/auto_bomber/production/42/config.py
 delete mode 100644 agent_code/auto_bomber/production/42/weights.pt

diff --git a/agent_code/auto_bomber/feature_engineering.py b/agent_code/auto_bomber/feature_engineering.py
index 42e2d0c22..1c34e9ae5 100644
--- a/agent_code/auto_bomber/feature_engineering.py
+++ b/agent_code/auto_bomber/feature_engineering.py
@@ -15,13 +15,6 @@ def state_to_features(game_state: dict) -> np.array:
     :param game_state:  A dictionary describing the current game board.
     :return: np.array
     """
-    #############
-    #   NOTES   #
-    #############
-    # Coins zones signal very weak! -> Used softmax, which keeps 0.0 by using -np.inf
-    # Add coins to crates --> not good, need to know where crates are, are distinct from coins as need to be exploded
-
-    # This is the dict before the game begins and after it ends
     if game_state is None:
         # todo we need another representation for final state here!
         return np.random.rand(21)
@@ -32,7 +25,6 @@ def state_to_features(game_state: dict) -> np.array:
     agent_position = np.asarray(game_state['self'][3], dtype='int')
     agent_bomb_action = np.asarray(game_state['self'][2], dtype='int')
     bombs_position = np.atleast_2d(np.asarray([list(bomb[0]) for bomb in game_state['bombs']], dtype='int'))
-    bombs_countdown = np.asarray([bomb[1] for bomb in game_state['bombs']])
     explosions_position = np.argwhere(game_state['explosion_map'] > 0)
     coins_position = np.atleast_2d(np.array(game_state['coins'], dtype='int'))
     relevant_coins_position = coins_position[~np.isin(coins_position, agent_position).all(axis=1)]
@@ -43,40 +35,31 @@ def state_to_features(game_state: dict) -> np.array:
     opponents_bomb_action = np.asarray([player[2] for player in game_state['others']])
     opponents_bomb_action = np.where(opponents_bomb_action, weight_opponents_with_bomb, 1.0)
 
-    # TODO HUUUUUUUUUGE!!!!!!! --> Switch distances from euclidean to a path finding algorithm
-    # https://pypi.org/project/pathfinding/
-
-    # TODO Make BOMB_POWER dynamic from settings.py
+    #############################################
+    #                 DISCARDED                 #
+    # Bombs zones logic:                        #
+    # Due to bad performance in empirical tests #
+    #############################################
+    #
+    #
     # bombs_zones = _compute_zones_heatmap(agent_position, bombs_position, 0.0,
     #                                      lambda v, w: np.where(v > 0., v[(3 + w) - v >= 0] ** w[(3 + w) - v >= 0], 0.0),
     #                                      bombs_countdown,
     #                                      lambda v: np.mean(v[v != 0.0]) if v[v != 0.0].size != 0 else 0.0,
     #                                      lambda v: -1 * np.divide(1, v, out=np.zeros_like(v), where=v != 0))
 
-    # TODO Does not account for how many coins there are in the zone
     coins_zones = _compute_zones_heatmap(agent_position, relevant_coins_position, 0.0,
-                                         # aggregation_func=lambda v: np.mean(v[v != 0.0]) if v[v != 0.0].size != 0 else 0.0,
                                          aggregation_func=lambda v: np.mean(v) if v.size != 0 else 0.0,
-                                         # normalization_func=lambda v: softmax(
-                                         #     np.divide(1, v, out=np.full_like(v, -np.inf), where=v != 0)) if np.all(
-                                         #     v != 0.0) else v)
                                          normalization_func=lambda v: np.divide(1, v, out=np.zeros_like(v), where=v != 0))
     crates_zones = _compute_zones_heatmap(agent_position, crates_position, 0.0,
                                           aggregation_func=lambda v: np.mean(v) if v.size != 0 else 0.0,
-                                          # normalization_func=lambda v: softmax(
-                                          #     np.divide(1, v, out=np.full_like(v, -np.inf), where=v != 0)))
                                           normalization_func=lambda v: np.divide(1, v, out=np.zeros_like(v), where=v != 0))
     opponents_zones = _compute_zones_heatmap(agent_position, opponents_position, 0.0,
                                              weighting_func=lambda v, w: v * w,
                                              weights=opponents_bomb_action,
                                              aggregation_func=lambda v: np.mean(v) if v.size != 0 else 0.0,
-                                             # normalization_func=lambda v: np.divide(v, np.max(v), out=np.zeros_like(v), where=v != 0))
                                              normalization_func=lambda v: np.divide(1, v, out=np.zeros_like(v), where=v != 0))
 
-    # TODO Evaluate if weighting bombs also here by their countdown
-    # TODO Exclude bombs which are not relevant (!!!!)
-
-    # TODO Field of view, not only says unwanted position but also says go towards position
     bombs_field_of_view = _object_in_field_of_view(agent_position, bombs_position, 0.0,
                                                    lambda v, w: -1 * np.divide(1, v, out=np.zeros_like(v),
                                                                                where=v != 0),
@@ -84,10 +67,6 @@ def state_to_features(game_state: dict) -> np.array:
     explosion_field_of_view = _object_in_field_of_view(agent_position, explosions_position, 0.0)
     coins_field_of_view = _object_in_field_of_view(agent_position, relevant_coins_position, 0.0,
                                                    lambda v, w: np.divide(1, v, out=np.zeros_like(v), where=v != 0),
-                                                   # lambda v, w: softmax(
-                                                   #     np.divide(1, v, out=np.full_like(v, -np.inf),
-                                                   #               where=v != 0)) if np.all(
-                                                   #     v != 0.0) else v,
                                                    None)
     crates_field_of_view = _object_in_field_of_view(agent_position, crates_position, 0.0,
                                                     lambda v, w: np.divide(1, v, out=np.zeros_like(v), where=v != 0),
@@ -97,9 +76,6 @@ def state_to_features(game_state: dict) -> np.array:
                                                         None)
     walls_field_of_view = _object_in_field_of_view(agent_position, walls_position, 0.0)
 
-    # OPTION: Incorporate obstacles in features (by setting direction of obstacle = -1)
-    #         Issue: Unable to distinguish when in front of walls or in front of crates --> important distinction for
-    #                bombs dropping
     f_obstacles = np.zeros((4,))
     f_obstacles[walls_field_of_view == 1.] = -1.
     f_obstacles[explosion_field_of_view == 1.] = -1.
@@ -115,18 +91,11 @@ def state_to_features(game_state: dict) -> np.array:
     new_bombs_field_of_view[bombs_field_of_view == -1.] = -1.
     f_bombs = new_bombs_field_of_view
 
-    # f_bombs = np.sum(np.vstack((bombs_zones, new_bombs_field_of_view)), axis=0)
-    # if not np.all((f_bombs == 0.0)):
-    #     f_bombs = np.where(f_bombs == 0.0, np.inf, f_bombs)
-    #     f_bombs = -1 * softmax(-1 * f_bombs)
-    # f_bombs[new_bombs_field_of_view == -1.0] = -1.0
-
     f_coins = np.sum(np.vstack((coins_zones, 5 * coins_field_of_view)), axis=0)
     f_coins[walls_field_of_view == 1.] = 0.
     if not np.all((f_coins == 0.)):
         f_coins = np.where(f_coins == 0., -np.inf, f_coins)
         f_coins = softmax(f_coins)
-    # f_coins[walls_field_of_view == 1.] = -1.
 
     f_crates = np.sum(np.vstack((crates_zones, 5 * crates_field_of_view)), axis=0)
     f_crates[walls_field_of_view == 1.] = 0.
@@ -181,7 +150,6 @@ def _compute_zones_heatmap(agent_position, objects_position, initial, weighting_
     if objects_position.size == 0:
         return zones
 
-    # distances = np.linalg.norm(agent_position - objects_position, axis=1)
     agent_position = np.atleast_2d(agent_position)
     distances = cdist(agent_position, objects_position, 'cityblock').squeeze(axis=0)
     agent_position = agent_position[0]
@@ -246,11 +214,9 @@ def _object_in_field_of_view(agent_position, objects_position, initial, normaliz
     # Directions are actual directions, i.e. after translation of framework fields
     objects_down = objects_on_x[np.where(objects_on_x[:, 1] >= agent_position[0, 1])]
     if not objects_down.size == 0:
-        # field_of_view[1] = np.linalg.norm(agent_position - objects_down, axis=1).min()
         field_of_view[1] = cdist(agent_position, objects_down, 'cityblock').squeeze(axis=0).min()
     objects_up = objects_on_x[np.where(objects_on_x[:, 1] <= agent_position[0, 1])]
     if not objects_up.size == 0:
-        # field_of_view[3] = np.linalg.norm(agent_position - objects_up, axis=1).min()
         field_of_view[3] = cdist(agent_position, objects_up, 'cityblock').squeeze(axis=0).min()
 
     # Coordinate y is as of the framework field
@@ -258,11 +224,9 @@ def _object_in_field_of_view(agent_position, objects_position, initial, normaliz
     # Directions are actual directions, i.e. after translation of framework fields
     objects_right = objects_on_y[np.where(objects_on_y[:, 0] >= agent_position[0, 0])]
     if not objects_right.size == 0:
-        # field_of_view[0] = np.linalg.norm(agent_position - objects_right, axis=1).min()
         field_of_view[0] = cdist(agent_position, objects_right, 'cityblock').squeeze(axis=0).min()
     objects_left = objects_on_y[np.where(objects_on_y[:, 0] <= agent_position[0, 0])]
     if not objects_left.size == 0:
-        # field_of_view[2] = np.linalg.norm(agent_position - objects_left, axis=1).min()
         field_of_view[2] = cdist(agent_position, objects_left, 'cityblock').squeeze(axis=0).min()
 
     if normalization_func:
diff --git a/agent_code/auto_bomber/production/42/config.py b/agent_code/auto_bomber/production/42/config.py
deleted file mode 100644
index 8f4730c95..000000000
--- a/agent_code/auto_bomber/production/42/config.py
+++ /dev/null
@@ -1,7 +0,0 @@
-MODELS_ROOT = "./models"
-# MODEL_DIR = "./models/41"
-MODEL_DIR = None
-ACTIONS = ['UP', 'RIGHT', 'DOWN', 'LEFT', 'WAIT', 'BOMB']
-EPSILON = 0.2
-DISCOUNT = 0.4
-LEARNING_RATE = 0.0003
diff --git a/agent_code/auto_bomber/production/42/weights.pt b/agent_code/auto_bomber/production/42/weights.pt
deleted file mode 100644
index 55db47681919288a284dd7a8fb751b784927c093..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 776
zcmXAnZ7kFQ0LF1tdNUzinQEs<buE&VdN>hX^foDF=3XrGa(0fGwNkm*{(Y$XcUzl`
zG*mQn*_FD#ma@E@bFa-?cQ+iVv<gQk>G|+{d4A9Ke0Z+f-<`9wSrp-xk)58I$K(ox
zd}exfnkXenC``&DiIs^$K39;DB@$+HMWloz92P;6Sen86k4Q+iO(7X(%OP))r$`^R
zEqe)jm6*IpBOG|5yiESTl3nsXvbgAfP9toC$(Wd!qkk5(Xlxr1$th-KE^M)LX)2M0
zR-quM>Q%@xHEvv5&K#UG!Tjz_<x1la1~w&!uo}!L8<)*ko|&=jX=hqN^eAqmGPtEt
z-(b`Z<O=Flc&yo3_u_RE+((LB`#Bvba|ub<`(25?Ynui)y7f}G&8qN}dtWfx8Qpkj
zwFdIH#JnZm1o4VpMz3@Of;mp1Iiq8kcQ4=mbKV5*me%9eh+#}~=&}nd`fzPWgEZb>
z56vv^I*)3@>K~*va;_Qoz2%=0oVxMs^hc|^Rza~O4C_OI76wKGOw}W3mwhPC$nVB;
zy6$S1ixB|{oBOpd^=NFRy}zaWiis-2*()JN9K0?59OXNLwP9`i<YpbVOE<j9^>~k2
zOSqMxt$}L=y@GM91A;tpMGs9&DcU!OKJqc*#i2U>^sf#$<-|TcDYQV^)f-sos{=Ez
z{6`BhjIr@cg~2=fQ6pi=MYOM&%x}!1N`|rCFDU7yRfTn`(y2TC9hkGka$3Y{JeZah
zJW{q}&xsx0(MQx&^Ni#9@klk=%6OH8-6uo@%-^lm8{m8~tX4CjgYtfHO^!n!s-2H}
z4p1g2!n1r!eN3=b_Iw+OGh?GnAnImn;4v^htCqY%uh3hbTicATC-Ou^Zx_~lkQypa
zwovpUUBZ*kJy7_Wx5k-!P*L~V@z+s391rZaM1AQ-(P7ui{c00rR{I%)bP#J&Jp;X_
zEU4e_d9I{>2x;`+LHdX95Li8y8W*8MEDL)k8BIv6uL@HHs4zYB*mL5Rjv^PH$NUS|
CsffY=

diff --git a/agent_code/auto_bomber/train.py b/agent_code/auto_bomber/train.py
index c007dd554..3ca64047b 100644
--- a/agent_code/auto_bomber/train.py
+++ b/agent_code/auto_bomber/train.py
@@ -85,8 +85,6 @@ def reward_from_events(self, events: List[str]) -> int:
     Here you can modify the rewards your agent get so as to en/discourage
     certain behavior.
     """
-    # q: how to determine the winner?
-
     rewards_dict = self.model.hyper_parameters["game_rewards"]
     reward_sum = 0
     for event in events: