From 569af526cc7470dd59f90dcae727d9ad684e9f7f Mon Sep 17 00:00:00 2001 From: Fabrizio Primerano Date: Sun, 28 Mar 2021 14:14:32 +0200 Subject: [PATCH] Cleaning the features file --- agent_code/auto_bomber/feature_engineering.py | 50 +++--------------- .../auto_bomber/production/42/config.py | 7 --- .../auto_bomber/production/42/weights.pt | Bin 776 -> 0 bytes agent_code/auto_bomber/train.py | 2 - 4 files changed, 7 insertions(+), 52 deletions(-) delete mode 100644 agent_code/auto_bomber/production/42/config.py delete mode 100644 agent_code/auto_bomber/production/42/weights.pt diff --git a/agent_code/auto_bomber/feature_engineering.py b/agent_code/auto_bomber/feature_engineering.py index 42e2d0c22..1c34e9ae5 100644 --- a/agent_code/auto_bomber/feature_engineering.py +++ b/agent_code/auto_bomber/feature_engineering.py @@ -15,13 +15,6 @@ def state_to_features(game_state: dict) -> np.array: :param game_state: A dictionary describing the current game board. :return: np.array """ - ############# - # NOTES # - ############# - # Coins zones signal very weak! -> Used softmax, which keeps 0.0 by using -np.inf - # Add coins to crates --> not good, need to know where crates are, are distinct from coins as need to be exploded - - # This is the dict before the game begins and after it ends if game_state is None: # todo we need another representation for final state here! return np.random.rand(21) @@ -32,7 +25,6 @@ def state_to_features(game_state: dict) -> np.array: agent_position = np.asarray(game_state['self'][3], dtype='int') agent_bomb_action = np.asarray(game_state['self'][2], dtype='int') bombs_position = np.atleast_2d(np.asarray([list(bomb[0]) for bomb in game_state['bombs']], dtype='int')) - bombs_countdown = np.asarray([bomb[1] for bomb in game_state['bombs']]) explosions_position = np.argwhere(game_state['explosion_map'] > 0) coins_position = np.atleast_2d(np.array(game_state['coins'], dtype='int')) relevant_coins_position = coins_position[~np.isin(coins_position, agent_position).all(axis=1)] @@ -43,40 +35,31 @@ def state_to_features(game_state: dict) -> np.array: opponents_bomb_action = np.asarray([player[2] for player in game_state['others']]) opponents_bomb_action = np.where(opponents_bomb_action, weight_opponents_with_bomb, 1.0) - # TODO HUUUUUUUUUGE!!!!!!! --> Switch distances from euclidean to a path finding algorithm - # https://pypi.org/project/pathfinding/ - - # TODO Make BOMB_POWER dynamic from settings.py + ############################################# + # DISCARDED # + # Bombs zones logic: # + # Due to bad performance in empirical tests # + ############################################# + # + # # bombs_zones = _compute_zones_heatmap(agent_position, bombs_position, 0.0, # lambda v, w: np.where(v > 0., v[(3 + w) - v >= 0] ** w[(3 + w) - v >= 0], 0.0), # bombs_countdown, # lambda v: np.mean(v[v != 0.0]) if v[v != 0.0].size != 0 else 0.0, # lambda v: -1 * np.divide(1, v, out=np.zeros_like(v), where=v != 0)) - # TODO Does not account for how many coins there are in the zone coins_zones = _compute_zones_heatmap(agent_position, relevant_coins_position, 0.0, - # aggregation_func=lambda v: np.mean(v[v != 0.0]) if v[v != 0.0].size != 0 else 0.0, aggregation_func=lambda v: np.mean(v) if v.size != 0 else 0.0, - # normalization_func=lambda v: softmax( - # np.divide(1, v, out=np.full_like(v, -np.inf), where=v != 0)) if np.all( - # v != 0.0) else v) normalization_func=lambda v: np.divide(1, v, out=np.zeros_like(v), where=v != 0)) crates_zones = _compute_zones_heatmap(agent_position, crates_position, 0.0, aggregation_func=lambda v: np.mean(v) if v.size != 0 else 0.0, - # normalization_func=lambda v: softmax( - # np.divide(1, v, out=np.full_like(v, -np.inf), where=v != 0))) normalization_func=lambda v: np.divide(1, v, out=np.zeros_like(v), where=v != 0)) opponents_zones = _compute_zones_heatmap(agent_position, opponents_position, 0.0, weighting_func=lambda v, w: v * w, weights=opponents_bomb_action, aggregation_func=lambda v: np.mean(v) if v.size != 0 else 0.0, - # normalization_func=lambda v: np.divide(v, np.max(v), out=np.zeros_like(v), where=v != 0)) normalization_func=lambda v: np.divide(1, v, out=np.zeros_like(v), where=v != 0)) - # TODO Evaluate if weighting bombs also here by their countdown - # TODO Exclude bombs which are not relevant (!!!!) - - # TODO Field of view, not only says unwanted position but also says go towards position bombs_field_of_view = _object_in_field_of_view(agent_position, bombs_position, 0.0, lambda v, w: -1 * np.divide(1, v, out=np.zeros_like(v), where=v != 0), @@ -84,10 +67,6 @@ def state_to_features(game_state: dict) -> np.array: explosion_field_of_view = _object_in_field_of_view(agent_position, explosions_position, 0.0) coins_field_of_view = _object_in_field_of_view(agent_position, relevant_coins_position, 0.0, lambda v, w: np.divide(1, v, out=np.zeros_like(v), where=v != 0), - # lambda v, w: softmax( - # np.divide(1, v, out=np.full_like(v, -np.inf), - # where=v != 0)) if np.all( - # v != 0.0) else v, None) crates_field_of_view = _object_in_field_of_view(agent_position, crates_position, 0.0, lambda v, w: np.divide(1, v, out=np.zeros_like(v), where=v != 0), @@ -97,9 +76,6 @@ def state_to_features(game_state: dict) -> np.array: None) walls_field_of_view = _object_in_field_of_view(agent_position, walls_position, 0.0) - # OPTION: Incorporate obstacles in features (by setting direction of obstacle = -1) - # Issue: Unable to distinguish when in front of walls or in front of crates --> important distinction for - # bombs dropping f_obstacles = np.zeros((4,)) f_obstacles[walls_field_of_view == 1.] = -1. f_obstacles[explosion_field_of_view == 1.] = -1. @@ -115,18 +91,11 @@ def state_to_features(game_state: dict) -> np.array: new_bombs_field_of_view[bombs_field_of_view == -1.] = -1. f_bombs = new_bombs_field_of_view - # f_bombs = np.sum(np.vstack((bombs_zones, new_bombs_field_of_view)), axis=0) - # if not np.all((f_bombs == 0.0)): - # f_bombs = np.where(f_bombs == 0.0, np.inf, f_bombs) - # f_bombs = -1 * softmax(-1 * f_bombs) - # f_bombs[new_bombs_field_of_view == -1.0] = -1.0 - f_coins = np.sum(np.vstack((coins_zones, 5 * coins_field_of_view)), axis=0) f_coins[walls_field_of_view == 1.] = 0. if not np.all((f_coins == 0.)): f_coins = np.where(f_coins == 0., -np.inf, f_coins) f_coins = softmax(f_coins) - # f_coins[walls_field_of_view == 1.] = -1. f_crates = np.sum(np.vstack((crates_zones, 5 * crates_field_of_view)), axis=0) f_crates[walls_field_of_view == 1.] = 0. @@ -181,7 +150,6 @@ def _compute_zones_heatmap(agent_position, objects_position, initial, weighting_ if objects_position.size == 0: return zones - # distances = np.linalg.norm(agent_position - objects_position, axis=1) agent_position = np.atleast_2d(agent_position) distances = cdist(agent_position, objects_position, 'cityblock').squeeze(axis=0) agent_position = agent_position[0] @@ -246,11 +214,9 @@ def _object_in_field_of_view(agent_position, objects_position, initial, normaliz # Directions are actual directions, i.e. after translation of framework fields objects_down = objects_on_x[np.where(objects_on_x[:, 1] >= agent_position[0, 1])] if not objects_down.size == 0: - # field_of_view[1] = np.linalg.norm(agent_position - objects_down, axis=1).min() field_of_view[1] = cdist(agent_position, objects_down, 'cityblock').squeeze(axis=0).min() objects_up = objects_on_x[np.where(objects_on_x[:, 1] <= agent_position[0, 1])] if not objects_up.size == 0: - # field_of_view[3] = np.linalg.norm(agent_position - objects_up, axis=1).min() field_of_view[3] = cdist(agent_position, objects_up, 'cityblock').squeeze(axis=0).min() # Coordinate y is as of the framework field @@ -258,11 +224,9 @@ def _object_in_field_of_view(agent_position, objects_position, initial, normaliz # Directions are actual directions, i.e. after translation of framework fields objects_right = objects_on_y[np.where(objects_on_y[:, 0] >= agent_position[0, 0])] if not objects_right.size == 0: - # field_of_view[0] = np.linalg.norm(agent_position - objects_right, axis=1).min() field_of_view[0] = cdist(agent_position, objects_right, 'cityblock').squeeze(axis=0).min() objects_left = objects_on_y[np.where(objects_on_y[:, 0] <= agent_position[0, 0])] if not objects_left.size == 0: - # field_of_view[2] = np.linalg.norm(agent_position - objects_left, axis=1).min() field_of_view[2] = cdist(agent_position, objects_left, 'cityblock').squeeze(axis=0).min() if normalization_func: diff --git a/agent_code/auto_bomber/production/42/config.py b/agent_code/auto_bomber/production/42/config.py deleted file mode 100644 index 8f4730c95..000000000 --- a/agent_code/auto_bomber/production/42/config.py +++ /dev/null @@ -1,7 +0,0 @@ -MODELS_ROOT = "./models" -# MODEL_DIR = "./models/41" -MODEL_DIR = None -ACTIONS = ['UP', 'RIGHT', 'DOWN', 'LEFT', 'WAIT', 'BOMB'] -EPSILON = 0.2 -DISCOUNT = 0.4 -LEARNING_RATE = 0.0003 diff --git a/agent_code/auto_bomber/production/42/weights.pt b/agent_code/auto_bomber/production/42/weights.pt deleted file mode 100644 index 55db47681919288a284dd7a8fb751b784927c093..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 776 zcmXAnZ7kFQ0LF1tdNUzinQEshX^foDF=3XrGa(0fGwNkm*{(Y$XcUzl` zG*mQn*_FD#ma@E@bFa-?cQ+iVvG|+{d4A9Ke0Z+f-<`9wSrp-xk)58I$K(ox zd}exfnkXenC``&DiIs^$K39;DB@$+HMWloz92P;6Sen86k4Q+iO(7X(%OP))r$`^R zEqe)jm6*IpBOG|5yiESTl3nsXvbgAfP9toC$(Wd!qkk5(Xlxr1$th-KE^M)LX)2M0 zR-quM>Q%@xHEvv5&K#UG!Tjz_ z56vv^I*)3@>K~*va;_Qoz2%=0oVxMs^hc|^Rza~O4C_OI76wKGOw}W3mwhPC$nVB; zy6$S1ixB|{oBOpd^=NFRy}zaWiis-2*()JN9K0?59OXNLwP9`i~k2 zOSqMxt$}L=y@GM91A;tpMGs9&DcU!OKJqc*#i2U>^sf#$<-|TcDYQV^)f-sos{=Ez z{6`BhjIr@cg~2=fQ6pi=MYOM&%x}!1N`|rCFDU7yRfTn`(y2TC9hkGka$3Y{JeZah zJW{q}&xsx0(MQx&^Ni#9@klk=%6OH8-6uo@%-^lm8{m8~tX4CjgYtfHO^!n!s-2H} z4p1g2!n1r!eN3=b_Iw+OGh?GnAnImn;4v^htCqY%uh3hbTicATC-Ou^Zx_~lkQypa zwovpUUBZ*kJy7_Wx5k-!P*L~V@z+s391rZaM1AQ-(P7ui{c00rR{I%)bP#J&Jp;X_ zEU4e_d9I{>2x;`+LHdX95Li8y8W*8MEDL)k8BIv6uL@HHs4zYB*mL5Rjv^PH$NUS| CsffY= diff --git a/agent_code/auto_bomber/train.py b/agent_code/auto_bomber/train.py index c007dd554..3ca64047b 100644 --- a/agent_code/auto_bomber/train.py +++ b/agent_code/auto_bomber/train.py @@ -85,8 +85,6 @@ def reward_from_events(self, events: List[str]) -> int: Here you can modify the rewards your agent get so as to en/discourage certain behavior. """ - # q: how to determine the winner? - rewards_dict = self.model.hyper_parameters["game_rewards"] reward_sum = 0 for event in events: