Skip to content

Commit

Permalink
Submission files for test
Browse files Browse the repository at this point in the history
  • Loading branch information
PrimeF committed Mar 29, 2021
1 parent 7d0bfeb commit 63c9b28
Show file tree
Hide file tree
Showing 5 changed files with 273 additions and 1 deletion.
Binary file added agent_code/auto_bomber/avatar.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion agent_code/auto_bomber/model_path.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
MODELS_DEFAULT_ROOT = "./models"
TF_BOARD_DIR = "./runs/opponents"
MODEL_DIR = None
MODEL_DIR = "./production/rew4_IANN_eps0.25_temp0.9_disc0.9_lr0.0003"
Original file line number Diff line number Diff line change
@@ -0,0 +1,271 @@
import numpy as np
from scipy.special import softmax
from scipy.spatial.distance import cdist


def state_to_features(game_state: dict) -> np.array:
"""
Converts the game state to the input of your model, i.e.
a feature vector.
You can find out about the state of the game environment via game_state,
which is a dictionary. Consult 'get_state_for_agent' in environment.py to see
what it contains.
:param game_state: A dictionary describing the current game board.
:return: np.array
"""
#############
# NOTES #
#############
# Coins zones signal very weak! -> Used softmax, which keeps 0.0 by using -np.inf
# Add coins to crates --> not good, need to know where crates are, are distinct from coins as need to be exploded

# This is the dict before the game begins and after it ends
if game_state is None:
# todo we need another representation for final state here!
return np.random.rand(21)

field_width, field_height = game_state['field'].shape
assert field_width == field_height, "Field is not rectangular, some assumptions do not hold. Abort!"

agent_position = np.asarray(game_state['self'][3], dtype='int')
agent_bomb_action = np.asarray(game_state['self'][2], dtype='int')
bombs_position = np.atleast_2d(np.asarray([list(bomb[0]) for bomb in game_state['bombs']], dtype='int'))
bombs_countdown = np.asarray([bomb[1] for bomb in game_state['bombs']])
explosions_position = np.argwhere(game_state['explosion_map'] > 0)
coins_position = np.atleast_2d(np.array(game_state['coins'], dtype='int'))
relevant_coins_position = coins_position[~np.isin(coins_position, agent_position).all(axis=1)]
crates_position = np.argwhere(game_state['field'] == 1)
walls_position = np.argwhere(game_state['field'] == -1)
weight_opponents_with_bomb = 0.8
opponents_position = np.atleast_2d(np.asarray([list(player[3]) for player in game_state['others']], dtype='int'))
opponents_bomb_action = np.asarray([player[2] for player in game_state['others']])
opponents_bomb_action = np.where(opponents_bomb_action, weight_opponents_with_bomb, 1.0)

# TODO HUUUUUUUUUGE!!!!!!! --> Switch distances from euclidean to a path finding algorithm
# https://pypi.org/project/pathfinding/

# TODO Make BOMB_POWER dynamic from settings.py
# bombs_zones = _compute_zones_heatmap(agent_position, bombs_position, 0.0,
# lambda v, w: np.where(v > 0., v[(3 + w) - v >= 0] ** w[(3 + w) - v >= 0], 0.0),
# bombs_countdown,
# lambda v: np.mean(v[v != 0.0]) if v[v != 0.0].size != 0 else 0.0,
# lambda v: -1 * np.divide(1, v, out=np.zeros_like(v), where=v != 0))

# TODO Does not account for how many coins there are in the zone
coins_zones = _compute_zones_heatmap(agent_position, relevant_coins_position, 0.0,
# aggregation_func=lambda v: np.mean(v[v != 0.0]) if v[v != 0.0].size != 0 else 0.0,
aggregation_func=lambda v: np.mean(v) if v.size != 0 else 0.0,
# normalization_func=lambda v: softmax(
# np.divide(1, v, out=np.full_like(v, -np.inf), where=v != 0)) if np.all(
# v != 0.0) else v)
normalization_func=lambda v: np.divide(1, v, out=np.zeros_like(v), where=v != 0))
crates_zones = _compute_zones_heatmap(agent_position, crates_position, 0.0,
aggregation_func=lambda v: np.mean(v) if v.size != 0 else 0.0,
# normalization_func=lambda v: softmax(
# np.divide(1, v, out=np.full_like(v, -np.inf), where=v != 0)))
normalization_func=lambda v: np.divide(1, v, out=np.zeros_like(v), where=v != 0))
opponents_zones = _compute_zones_heatmap(agent_position, opponents_position, 0.0,
weighting_func=lambda v, w: v * w,
weights=opponents_bomb_action,
aggregation_func=lambda v: np.mean(v) if v.size != 0 else 0.0,
# normalization_func=lambda v: np.divide(v, np.max(v), out=np.zeros_like(v), where=v != 0))
normalization_func=lambda v:np.divide(1, v, out=np.zeros_like(v), where=v != 0))

# TODO Evaluate if weighting bombs also here by their countdown
# TODO Exclude bombs which are not relevant (!!!!)

# TODO Field of view, not only says unwanted position but also says go towards position
bombs_field_of_view = _object_in_field_of_view(agent_position, bombs_position, 0.0,
lambda v, w: -1 * np.divide(1, v, out=np.zeros_like(v),
where=v != 0),
None)
explosion_field_of_view = _object_in_field_of_view(agent_position, explosions_position, 0.0)
coins_field_of_view = _object_in_field_of_view(agent_position, relevant_coins_position, 0.0,
lambda v, w: np.divide(1, v, out=np.zeros_like(v), where=v != 0),
# lambda v, w: softmax(
# np.divide(1, v, out=np.full_like(v, -np.inf),
# where=v != 0)) if np.all(
# v != 0.0) else v,
None)
crates_field_of_view = _object_in_field_of_view(agent_position, crates_position, 0.0,
lambda v, w: np.divide(1, v, out=np.zeros_like(v), where=v != 0),
None)
opponents_field_of_view = _object_in_field_of_view(agent_position, opponents_position, 0.0,
lambda v, w: np.divide(1, v, out=np.zeros_like(v), where=v != 0),
None)
walls_field_of_view = _object_in_field_of_view(agent_position, walls_position, 0.0)

# OPTION: Incorporate obstacles in features (by setting direction of obstacle = -1)
# Issue: Unable to distinguish when in front of walls or in front of crates --> important distinction for
# bombs dropping
f_obstacles = np.zeros((4,))
f_obstacles[walls_field_of_view == 1.] = -1.
f_obstacles[explosion_field_of_view == 1.] = -1.
f_obstacles[bombs_field_of_view == -1.] = -1.
f_obstacles[crates_field_of_view == 1.] = -1.

new_bombs_field_of_view = np.copy(bombs_field_of_view)
if bombs_position.size != 0 and np.isin(bombs_position, agent_position).all(axis=1).any():
new_bombs_field_of_view = np.array([0.25, 0.25, 0.25, 0.25])
else:
for i in range(4):
new_bombs_field_of_view[i] += np.sum(-1 * np.delete(bombs_field_of_view / 3., i))
new_bombs_field_of_view[bombs_field_of_view == -1.] = -1.
f_bombs = new_bombs_field_of_view

# f_bombs = np.sum(np.vstack((bombs_zones, new_bombs_field_of_view)), axis=0)
# if not np.all((f_bombs == 0.0)):
# f_bombs = np.where(f_bombs == 0.0, np.inf, f_bombs)
# f_bombs = -1 * softmax(-1 * f_bombs)
# f_bombs[new_bombs_field_of_view == -1.0] = -1.0

f_coins = np.sum(np.vstack((coins_zones, 5 * coins_field_of_view)), axis=0)
f_coins[walls_field_of_view == 1.] = 0.
if not np.all((f_coins == 0.)):
f_coins = np.where(f_coins == 0., -np.inf, f_coins)
f_coins = softmax(f_coins)
# f_coins[walls_field_of_view == 1.] = -1.

f_crates = np.sum(np.vstack((crates_zones, 5 * crates_field_of_view)), axis=0)
f_crates[walls_field_of_view == 1.] = 0.
if not np.all((f_crates == 0.)):
f_crates = np.where(f_crates == 0., -np.inf, f_crates)
f_crates = softmax(f_crates)
f_crates[crates_field_of_view == 1.] = -1.

f_opponents = np.sum(np.vstack((opponents_zones, 5 * opponents_field_of_view)), axis=0)
f_opponents[walls_field_of_view == 1.] = 0.
if not np.all((f_opponents == 0.)):
f_opponents = np.where(f_opponents == 0., -np.inf, f_opponents)
f_opponents = softmax(f_opponents)
f_opponents[opponents_field_of_view == 1.] = -1.

features = np.concatenate((f_coins, f_crates, f_bombs, f_opponents, f_obstacles, agent_bomb_action), axis=None)

return features


def _compute_zones_heatmap(agent_position, objects_position, initial, weighting_func=None, weights=None,
aggregation_func=None, normalization_func=None):
"""
Computes the distance of given objects from the agent and determines their position relative to the agent.
The game field is divided in 4 quadrants relative to the agent's position, each covering an angle of 90 degrees.
The quadrants, i.e. zones, are thus above, left, below, right of the agent.
An optional weighting can be applied to the objects.
Parameters
----------
agent_position : np.array
Position of the agent (x, y)
objects_position : np.array
Position of the objects on the field
weighting_func : callable, optional
Function to additionally weight the objects
weights : np.array
Weights to apply to the objects' distance
normalization_func : callable, optional
Function to normalize (or scale) the aggregated value in the zones
Returns
-------
list
A list with 4 values (right, down, left, up) representing the (weighted) density
of the specified objects in the quadrants around the agent
"""
zones = np.full(shape=(4,), fill_value=initial)

if objects_position.size == 0:
return zones

# distances = np.linalg.norm(agent_position - objects_position, axis=1)
agent_position = np.atleast_2d(agent_position)
distances = cdist(agent_position, objects_position, 'cityblock').squeeze(axis=0)
agent_position = agent_position[0]
if weighting_func:
distances = weighting_func(distances, weights)
angles = np.degrees(
np.arctan2(objects_position[:, 1] - agent_position[1], objects_position[:, 0] - agent_position[0]))
angles = (angles + 360) % 360

# TODO Evaluate if: map object to two zones if it is in-between
# Computed: RIGHT; Actual: RIGHT
zones[0] = aggregation_func(
distances[np.where(((angles >= 0) & (angles < 45)) | ((angles >= 315) & (angles <= 360)))])
# Computed: UP; Actual: DOWN
zones[1] = aggregation_func(distances[np.where((angles >= 45) & (angles < 135))])
# Computed: LEFT; Actual: LEFT
zones[2] = aggregation_func(distances[np.where((angles >= 135) & (angles < 225))])
# Computed: DOWN; Actual: UP
zones[3] = aggregation_func(distances[np.where((angles >= 225) & (angles < 315))])

if normalization_func:
zones = normalization_func(zones)

return zones


def _object_in_field_of_view(agent_position, objects_position, initial, normalization_func=None, norm_constant=None):
"""
Specifies the field of view w.r.t the given objects.
When computing the distance of the agent to the objects, the agent's own position
is included, i.e. if the agent is ON the object the distance is 0.0 .
Parameters
----------
agent_position : np.array
Position of the agent (x, y)
objects_position : np.array
Position of the objects on the field
normalization_func : callable, optional
Function to normalize (or scale) the distances on the 4 directions
norm_constant :
Constant used for the normalization
Returns
-------
list
A list with 4 values (right, down, left, up) representing the distance
of the agent to the nearest object (if any) below, left, above, right of it.
"""
# TODO Maybe scale values: small distance -> high value, high distance -> small value
field_of_view = np.full(shape=(4,), fill_value=initial)

if objects_position.size == 0:
return field_of_view

agent_position = np.atleast_2d(agent_position)

# Coordinate x is as of the framework field
objects_on_x = objects_position[np.where(objects_position[:, 0] == agent_position[0, 0])]
# Directions are actual directions, i.e. after translation of framework fields
objects_down = objects_on_x[np.where(objects_on_x[:, 1] >= agent_position[0, 1])]
if not objects_down.size == 0:
# field_of_view[1] = np.linalg.norm(agent_position - objects_down, axis=1).min()
field_of_view[1] = cdist(agent_position, objects_down, 'cityblock').squeeze(axis=0).min()
objects_up = objects_on_x[np.where(objects_on_x[:, 1] <= agent_position[0, 1])]
if not objects_up.size == 0:
# field_of_view[3] = np.linalg.norm(agent_position - objects_up, axis=1).min()
field_of_view[3] = cdist(agent_position, objects_up, 'cityblock').squeeze(axis=0).min()

# Coordinate y is as of the framework field
objects_on_y = objects_position[np.where(objects_position[:, 1] == agent_position[0, 1])]
# Directions are actual directions, i.e. after translation of framework fields
objects_right = objects_on_y[np.where(objects_on_y[:, 0] >= agent_position[0, 0])]
if not objects_right.size == 0:
# field_of_view[0] = np.linalg.norm(agent_position - objects_right, axis=1).min()
field_of_view[0] = cdist(agent_position, objects_right, 'cityblock').squeeze(axis=0).min()
objects_left = objects_on_y[np.where(objects_on_y[:, 0] <= agent_position[0, 0])]
if not objects_left.size == 0:
# field_of_view[2] = np.linalg.norm(agent_position - objects_left, axis=1).min()
field_of_view[2] = cdist(agent_position, objects_left, 'cityblock').squeeze(axis=0).min()

if normalization_func:
field_of_view = normalization_func(field_of_view, norm_constant)

return field_of_view
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"actions": ["UP", "RIGHT", "DOWN", "LEFT", "WAIT", "BOMB"], "epsilon": 0.25, "discount": 0.9, "learning_rate": 0.0003, "policy": "IANN", "temperature": 0.9, "region_size": 2, "region_time_tolerance": 6, "game_rewards": {"CRATE_DESTROYED": 80, "COIN_FOUND": 20, "COIN_COLLECTED": 50, "KILLED_OPPONENT": 150, "INVALID_ACTION": -100, "KILLED_SELF": -200, "GOT_KILLED": -100, "SURVIVED_ROUND": 80, "WAITED": -5, "BOMB_DROPPED": 10}}
Binary file not shown.

0 comments on commit 63c9b28

Please sign in to comment.