Skip to content

Commit

Permalink
Update to single-process code, and different timing and callbacks
Browse files Browse the repository at this point in the history
  • Loading branch information
Felix Draxler committed Feb 12, 2021
1 parent 53b6c93 commit 0c27846
Show file tree
Hide file tree
Showing 14 changed files with 1,266 additions and 956 deletions.
10 changes: 10 additions & 0 deletions agent_code/peaceful_agent/callbacks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import numpy as np


def setup(self):
np.random.seed()


def act(agent, game_state: dict):
agent.logger.info('Pick action at random, but no bombs.')
agent.next_action = np.random.choice(['RIGHT', 'LEFT', 'UP', 'DOWN'])
12 changes: 3 additions & 9 deletions agent_code/random_agent/callbacks.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,10 @@

import numpy as np


def setup(agent):
def setup(self):
np.random.seed()

def act(agent):

def act(agent, game_state: dict):
agent.logger.info('Pick action at random')
agent.next_action = np.random.choice(['RIGHT', 'LEFT', 'UP', 'DOWN', 'BOMB'], p=[.23, .23, .23, .23, .08])

def reward_update(agent):
pass

def end_of_episode(agent):
pass
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@

import numpy as np
from random import shuffle
from time import time, sleep
from collections import deque
from random import shuffle

from settings import s
import numpy as np


def look_for_targets(free_space, start, targets, logger=None):
Expand Down Expand Up @@ -42,7 +39,7 @@ def look_for_targets(free_space, start, targets, logger=None):
break
# Add unexplored free neighboring tiles to the queue in a random order
x, y = current
neighbors = [(x,y) for (x,y) in [(x+1,y), (x-1,y), (x,y+1), (x,y-1)] if free_space[x,y]]
neighbors = [(x, y) for (x, y) in [(x + 1, y), (x - 1, y), (x, y + 1), (x, y - 1)] if free_space[x, y]]
shuffle(neighbors)
for neighbor in neighbors:
if neighbor not in parent_dict:
Expand Down Expand Up @@ -75,10 +72,11 @@ def setup(self):
self.ignore_others_timer = 0


def act(self):
"""Called each game step to determine the agent's next action.
def act(self, game_state):
"""
Called each game step to determine the agent's next action.
You can find out about the state of the game environment via self.game_state,
You can find out about the state of the game environment via game_state,
which is a dictionary. Consult 'get_state_for_agent' in environment.py to see
what it contains.
Expand All @@ -91,42 +89,42 @@ def act(self):
self.logger.info('Picking action according to rule set')

# Gather information about the game state
arena = self.game_state['arena']
x, y, _, bombs_left, score = self.game_state['self']
bombs = self.game_state['bombs']
bomb_xys = [(x,y) for (x,y,t) in bombs]
others = [(x,y) for (x,y,n,b,s) in self.game_state['others']]
coins = self.game_state['coins']
arena = game_state['field']
_, score, bombs_left, (x, y) = game_state['self']
bombs = game_state['bombs']
bomb_xys = [xy for (xy, t) in bombs]
others = [xy for (n, s, b, xy) in game_state['others']]
coins = game_state['coins']
bomb_map = np.ones(arena.shape) * 5
for xb,yb,t in bombs:
for (i,j) in [(xb+h, yb) for h in range(-3,4)] + [(xb, yb+h) for h in range(-3,4)]:
for (xb, yb), t in bombs:
for (i, j) in [(xb + h, yb) for h in range(-3, 4)] + [(xb, yb + h) for h in range(-3, 4)]:
if (0 < i < bomb_map.shape[0]) and (0 < j < bomb_map.shape[1]):
bomb_map[i,j] = min(bomb_map[i,j], t)
bomb_map[i, j] = min(bomb_map[i, j], t)

# If agent has been in the same location three times recently, it's a loop
if self.coordinate_history.count((x,y)) > 2:
if self.coordinate_history.count((x, y)) > 2:
self.ignore_others_timer = 5
else:
self.ignore_others_timer -= 1
self.coordinate_history.append((x,y))
self.coordinate_history.append((x, y))

# Check which moves make sense at all
directions = [(x,y), (x+1,y), (x-1,y), (x,y+1), (x,y-1)]
directions = [(x, y), (x + 1, y), (x - 1, y), (x, y + 1), (x, y - 1)]
valid_tiles, valid_actions = [], []
for d in directions:
if ((arena[d] == 0) and
(self.game_state['explosions'][d] <= 1) and
(bomb_map[d] > 0) and
(not d in others) and
(not d in bomb_xys)):
(game_state['explosion_map'][d] <= 1) and
(bomb_map[d] > 0) and
(not d in others) and
(not d in bomb_xys)):
valid_tiles.append(d)
if (x-1,y) in valid_tiles: valid_actions.append('LEFT')
if (x+1,y) in valid_tiles: valid_actions.append('RIGHT')
if (x,y-1) in valid_tiles: valid_actions.append('UP')
if (x,y+1) in valid_tiles: valid_actions.append('DOWN')
if (x,y) in valid_tiles: valid_actions.append('WAIT')
if (x - 1, y) in valid_tiles: valid_actions.append('LEFT')
if (x + 1, y) in valid_tiles: valid_actions.append('RIGHT')
if (x, y - 1) in valid_tiles: valid_actions.append('UP')
if (x, y + 1) in valid_tiles: valid_actions.append('DOWN')
if (x, y) in valid_tiles: valid_actions.append('WAIT')
# Disallow the BOMB action if agent dropped a bomb in the same spot recently
if (bombs_left > 0) and (x,y) not in self.bomb_history: valid_actions.append('BOMB')
if (bombs_left > 0) and (x, y) not in self.bomb_history: valid_actions.append('BOMB')
self.logger.debug(f'Valid actions: {valid_actions}')

# Collect basic action proposals in a queue
Expand All @@ -135,9 +133,9 @@ def act(self):
shuffle(action_ideas)

# Compile a list of 'targets' the agent should head towards
dead_ends = [(x,y) for x in range(1,16) for y in range(1,16) if (arena[x,y] == 0)
and ([arena[x+1,y], arena[x-1,y], arena[x,y+1], arena[x,y-1]].count(0) == 1)]
crates = [(x,y) for x in range(1,16) for y in range(1,16) if (arena[x,y] == 1)]
dead_ends = [(x, y) for x in range(1, 16) for y in range(1, 16) if (arena[x, y] == 0)
and ([arena[x + 1, y], arena[x - 1, y], arena[x, y + 1], arena[x, y - 1]].count(0) == 1)]
crates = [(x, y) for x in range(1, 16) for y in range(1, 16) if (arena[x, y] == 1)]
targets = coins + dead_ends + crates
# Add other agents as targets if in hunting mode or no crates/coins left
if self.ignore_others_timer <= 0 or (len(crates) + len(coins) == 0):
Expand All @@ -151,76 +149,53 @@ def act(self):
if self.ignore_others_timer > 0:
for o in others:
free_space[o] = False
d = look_for_targets(free_space, (x,y), targets, self.logger)
if d == (x,y-1): action_ideas.append('UP')
if d == (x,y+1): action_ideas.append('DOWN')
if d == (x-1,y): action_ideas.append('LEFT')
if d == (x+1,y): action_ideas.append('RIGHT')
d = look_for_targets(free_space, (x, y), targets, self.logger)
if d == (x, y - 1): action_ideas.append('UP')
if d == (x, y + 1): action_ideas.append('DOWN')
if d == (x - 1, y): action_ideas.append('LEFT')
if d == (x + 1, y): action_ideas.append('RIGHT')
if d is None:
self.logger.debug('All targets gone, nothing to do anymore')
action_ideas.append('WAIT')

# Add proposal to drop a bomb if at dead end
if (x,y) in dead_ends:
if (x, y) in dead_ends:
action_ideas.append('BOMB')
# Add proposal to drop a bomb if touching an opponent
if len(others) > 0:
if (min(abs(xy[0] - x) + abs(xy[1] - y) for xy in others)) <= 1:
action_ideas.append('BOMB')
# Add proposal to drop a bomb if arrived at target and touching crate
if d == (x,y) and ([arena[x+1,y], arena[x-1,y], arena[x,y+1], arena[x,y-1]].count(1) > 0):
if d == (x, y) and ([arena[x + 1, y], arena[x - 1, y], arena[x, y + 1], arena[x, y - 1]].count(1) > 0):
action_ideas.append('BOMB')

# Add proposal to run away from any nearby bomb about to blow
for xb,yb,t in bombs:
if (xb == x) and (abs(yb-y) < 4):
for (xb, yb), t in bombs:
if (xb == x) and (abs(yb - y) < 4):
# Run away
if (yb > y): action_ideas.append('UP')
if (yb < y): action_ideas.append('DOWN')
# If possible, turn a corner
action_ideas.append('LEFT')
action_ideas.append('RIGHT')
if (yb == y) and (abs(xb-x) < 4):
if (yb == y) and (abs(xb - x) < 4):
# Run away
if (xb > x): action_ideas.append('LEFT')
if (xb < x): action_ideas.append('RIGHT')
# If possible, turn a corner
action_ideas.append('UP')
action_ideas.append('DOWN')
# Try random direction if directly on top of a bomb
for xb,yb,t in bombs:
for (xb, yb), t in bombs:
if xb == x and yb == y:
action_ideas.extend(action_ideas[:4])

# Pick last action added to the proposals list that is also valid
while len(action_ideas) > 0:
a = action_ideas.pop()
if a in valid_actions:
self.next_action = a
break

# Keep track of chosen action for cycle detection
if self.next_action == 'BOMB':
self.bomb_history.append((x,y))
# Keep track of chosen action for cycle detection
if a == 'BOMB':
self.bomb_history.append((x, y))


def reward_update(self):
"""Called once per step to allow intermediate rewards based on game events.
When this method is called, self.events will contain a list of all game
events relevant to your agent that occured during the previous step. Consult
settings.py to see what events are tracked. You can hand out rewards to your
agent based on these events and your knowledge of the (new) game state. In
contrast to act, this method has no time limit.
"""
self.logger.debug(f'Encountered {len(self.events)} game event(s)')


def end_of_episode(self):
"""Called at the end of each game to hand out final rewards and do training.
This is similar to reward_update, except it is only called at the end of a
game. self.events will contain all events that occured during your agent's
final step. You should place your actual learning code in this method.
"""
self.logger.debug(f'Encountered {len(self.events)} game event(s) in final step')
return a
79 changes: 79 additions & 0 deletions agent_code/tpl_agent/callbacks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import os
import pickle
import random

import numpy as np


ACTIONS = ['UP', 'RIGHT', 'DOWN', 'LEFT', 'WAIT', 'BOMB']


def setup(self):
"""
Setup your code. This is called once when loading each agent.
Make sure that you prepare everything such that act(...) can be called.
When in training mode, the separate `setup_training` in train.py is called
after this method. This separation allows you to share your trained agent
with other students, without revealing your training code.
In this example, our model is a set of probabilities over actions
that are is independent of the game state.
:param self: This object is passed to all callbacks and you can set arbitrary values.
"""
if self.train or not os.path.isfile("my-saved-model.pt"):
self.logger.info("Setting up model from scratch.")
weights = np.random.rand(len(ACTIONS))
self.model = weights / weights.sum()
else:
self.logger.info("Loading model from saved state.")
with open("my-saved-model.pt", "rb") as file:
self.model = pickle.load(file)


def act(self, game_state: dict) -> str:
"""
Your agent should parse the input, think, and take a decision.
When not in training mode, the maximum execution time for this method is 0.5s.
:param self: The same object that is passed to all of your callbacks.
:param game_state: The dictionary that describes everything on the board.
:return: The action to take as a string.
"""
# todo Exploration vs exploitation
random_prob = .1
if self.train and random.random() < random_prob:
self.logger.debug("Choosing action purely at random.")
# 80%: walk in any direction. 10% wait. 10% bomb.
return np.random.choice(ACTIONS, p=[.2, .2, .2, .2, .1, .1])

self.logger.debug("Querying model for action.")
return np.random.choice(ACTIONS, p=self.model)


def state_to_features(game_state: dict) -> np.array:
"""
*This is not a required function, but an idea to structure your code.*
Converts the game state to the input of your model, i.e.
a feature vector.
You can find out about the state of the game environment via game_state,
which is a dictionary. Consult 'get_state_for_agent' in environment.py to see
what it contains.
:param game_state: A dictionary describing the current game board.
:return: np.array
"""
# This is the dict before the game begins and after it ends
if game_state is None:
return None

# For example, you could construct several channels of equal shape, ...
channels = []
channels.append(...)
# concatenate them as a feature tensor (they must have the same shape), ...
stacked_channels = np.stack(channels)
# and return them as a vector
return stacked_channels.reshape(-1)
Loading

0 comments on commit 0c27846

Please sign in to comment.