Skip to content

Commit

Permalink
Cleaning the features file
Browse files Browse the repository at this point in the history
  • Loading branch information
PrimeF committed Mar 28, 2021
1 parent 21d9930 commit 569af52
Show file tree
Hide file tree
Showing 4 changed files with 7 additions and 52 deletions.
50 changes: 7 additions & 43 deletions agent_code/auto_bomber/feature_engineering.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,6 @@ def state_to_features(game_state: dict) -> np.array:
:param game_state: A dictionary describing the current game board.
:return: np.array
"""
#############
# NOTES #
#############
# Coins zones signal very weak! -> Used softmax, which keeps 0.0 by using -np.inf
# Add coins to crates --> not good, need to know where crates are, are distinct from coins as need to be exploded

# This is the dict before the game begins and after it ends
if game_state is None:
# todo we need another representation for final state here!
return np.random.rand(21)
Expand All @@ -32,7 +25,6 @@ def state_to_features(game_state: dict) -> np.array:
agent_position = np.asarray(game_state['self'][3], dtype='int')
agent_bomb_action = np.asarray(game_state['self'][2], dtype='int')
bombs_position = np.atleast_2d(np.asarray([list(bomb[0]) for bomb in game_state['bombs']], dtype='int'))
bombs_countdown = np.asarray([bomb[1] for bomb in game_state['bombs']])
explosions_position = np.argwhere(game_state['explosion_map'] > 0)
coins_position = np.atleast_2d(np.array(game_state['coins'], dtype='int'))
relevant_coins_position = coins_position[~np.isin(coins_position, agent_position).all(axis=1)]
Expand All @@ -43,51 +35,38 @@ def state_to_features(game_state: dict) -> np.array:
opponents_bomb_action = np.asarray([player[2] for player in game_state['others']])
opponents_bomb_action = np.where(opponents_bomb_action, weight_opponents_with_bomb, 1.0)

# TODO HUUUUUUUUUGE!!!!!!! --> Switch distances from euclidean to a path finding algorithm
# https://pypi.org/project/pathfinding/

# TODO Make BOMB_POWER dynamic from settings.py
#############################################
# DISCARDED #
# Bombs zones logic: #
# Due to bad performance in empirical tests #
#############################################
#
#
# bombs_zones = _compute_zones_heatmap(agent_position, bombs_position, 0.0,
# lambda v, w: np.where(v > 0., v[(3 + w) - v >= 0] ** w[(3 + w) - v >= 0], 0.0),
# bombs_countdown,
# lambda v: np.mean(v[v != 0.0]) if v[v != 0.0].size != 0 else 0.0,
# lambda v: -1 * np.divide(1, v, out=np.zeros_like(v), where=v != 0))

# TODO Does not account for how many coins there are in the zone
coins_zones = _compute_zones_heatmap(agent_position, relevant_coins_position, 0.0,
# aggregation_func=lambda v: np.mean(v[v != 0.0]) if v[v != 0.0].size != 0 else 0.0,
aggregation_func=lambda v: np.mean(v) if v.size != 0 else 0.0,
# normalization_func=lambda v: softmax(
# np.divide(1, v, out=np.full_like(v, -np.inf), where=v != 0)) if np.all(
# v != 0.0) else v)
normalization_func=lambda v: np.divide(1, v, out=np.zeros_like(v), where=v != 0))
crates_zones = _compute_zones_heatmap(agent_position, crates_position, 0.0,
aggregation_func=lambda v: np.mean(v) if v.size != 0 else 0.0,
# normalization_func=lambda v: softmax(
# np.divide(1, v, out=np.full_like(v, -np.inf), where=v != 0)))
normalization_func=lambda v: np.divide(1, v, out=np.zeros_like(v), where=v != 0))
opponents_zones = _compute_zones_heatmap(agent_position, opponents_position, 0.0,
weighting_func=lambda v, w: v * w,
weights=opponents_bomb_action,
aggregation_func=lambda v: np.mean(v) if v.size != 0 else 0.0,
# normalization_func=lambda v: np.divide(v, np.max(v), out=np.zeros_like(v), where=v != 0))
normalization_func=lambda v: np.divide(1, v, out=np.zeros_like(v), where=v != 0))

# TODO Evaluate if weighting bombs also here by their countdown
# TODO Exclude bombs which are not relevant (!!!!)

# TODO Field of view, not only says unwanted position but also says go towards position
bombs_field_of_view = _object_in_field_of_view(agent_position, bombs_position, 0.0,
lambda v, w: -1 * np.divide(1, v, out=np.zeros_like(v),
where=v != 0),
None)
explosion_field_of_view = _object_in_field_of_view(agent_position, explosions_position, 0.0)
coins_field_of_view = _object_in_field_of_view(agent_position, relevant_coins_position, 0.0,
lambda v, w: np.divide(1, v, out=np.zeros_like(v), where=v != 0),
# lambda v, w: softmax(
# np.divide(1, v, out=np.full_like(v, -np.inf),
# where=v != 0)) if np.all(
# v != 0.0) else v,
None)
crates_field_of_view = _object_in_field_of_view(agent_position, crates_position, 0.0,
lambda v, w: np.divide(1, v, out=np.zeros_like(v), where=v != 0),
Expand All @@ -97,9 +76,6 @@ def state_to_features(game_state: dict) -> np.array:
None)
walls_field_of_view = _object_in_field_of_view(agent_position, walls_position, 0.0)

# OPTION: Incorporate obstacles in features (by setting direction of obstacle = -1)
# Issue: Unable to distinguish when in front of walls or in front of crates --> important distinction for
# bombs dropping
f_obstacles = np.zeros((4,))
f_obstacles[walls_field_of_view == 1.] = -1.
f_obstacles[explosion_field_of_view == 1.] = -1.
Expand All @@ -115,18 +91,11 @@ def state_to_features(game_state: dict) -> np.array:
new_bombs_field_of_view[bombs_field_of_view == -1.] = -1.
f_bombs = new_bombs_field_of_view

# f_bombs = np.sum(np.vstack((bombs_zones, new_bombs_field_of_view)), axis=0)
# if not np.all((f_bombs == 0.0)):
# f_bombs = np.where(f_bombs == 0.0, np.inf, f_bombs)
# f_bombs = -1 * softmax(-1 * f_bombs)
# f_bombs[new_bombs_field_of_view == -1.0] = -1.0

f_coins = np.sum(np.vstack((coins_zones, 5 * coins_field_of_view)), axis=0)
f_coins[walls_field_of_view == 1.] = 0.
if not np.all((f_coins == 0.)):
f_coins = np.where(f_coins == 0., -np.inf, f_coins)
f_coins = softmax(f_coins)
# f_coins[walls_field_of_view == 1.] = -1.

f_crates = np.sum(np.vstack((crates_zones, 5 * crates_field_of_view)), axis=0)
f_crates[walls_field_of_view == 1.] = 0.
Expand Down Expand Up @@ -181,7 +150,6 @@ def _compute_zones_heatmap(agent_position, objects_position, initial, weighting_
if objects_position.size == 0:
return zones

# distances = np.linalg.norm(agent_position - objects_position, axis=1)
agent_position = np.atleast_2d(agent_position)
distances = cdist(agent_position, objects_position, 'cityblock').squeeze(axis=0)
agent_position = agent_position[0]
Expand Down Expand Up @@ -246,23 +214,19 @@ def _object_in_field_of_view(agent_position, objects_position, initial, normaliz
# Directions are actual directions, i.e. after translation of framework fields
objects_down = objects_on_x[np.where(objects_on_x[:, 1] >= agent_position[0, 1])]
if not objects_down.size == 0:
# field_of_view[1] = np.linalg.norm(agent_position - objects_down, axis=1).min()
field_of_view[1] = cdist(agent_position, objects_down, 'cityblock').squeeze(axis=0).min()
objects_up = objects_on_x[np.where(objects_on_x[:, 1] <= agent_position[0, 1])]
if not objects_up.size == 0:
# field_of_view[3] = np.linalg.norm(agent_position - objects_up, axis=1).min()
field_of_view[3] = cdist(agent_position, objects_up, 'cityblock').squeeze(axis=0).min()

# Coordinate y is as of the framework field
objects_on_y = objects_position[np.where(objects_position[:, 1] == agent_position[0, 1])]
# Directions are actual directions, i.e. after translation of framework fields
objects_right = objects_on_y[np.where(objects_on_y[:, 0] >= agent_position[0, 0])]
if not objects_right.size == 0:
# field_of_view[0] = np.linalg.norm(agent_position - objects_right, axis=1).min()
field_of_view[0] = cdist(agent_position, objects_right, 'cityblock').squeeze(axis=0).min()
objects_left = objects_on_y[np.where(objects_on_y[:, 0] <= agent_position[0, 0])]
if not objects_left.size == 0:
# field_of_view[2] = np.linalg.norm(agent_position - objects_left, axis=1).min()
field_of_view[2] = cdist(agent_position, objects_left, 'cityblock').squeeze(axis=0).min()

if normalization_func:
Expand Down
7 changes: 0 additions & 7 deletions agent_code/auto_bomber/production/42/config.py

This file was deleted.

Binary file removed agent_code/auto_bomber/production/42/weights.pt
Binary file not shown.
2 changes: 0 additions & 2 deletions agent_code/auto_bomber/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,6 @@ def reward_from_events(self, events: List[str]) -> int:
Here you can modify the rewards your agent get so as to en/discourage
certain behavior.
"""
# q: how to determine the winner?

rewards_dict = self.model.hyper_parameters["game_rewards"]
reward_sum = 0
for event in events:
Expand Down

0 comments on commit 569af52

Please sign in to comment.