Update to single-process code, and different timing and callbacks

PrimeF · Feb 12, 2021 · 0c27846 · 0c27846
1 parent 53b6c93
commit 0c27846
Show file tree

Hide file tree

Showing 14 changed files with 1,266 additions and 956 deletions.
diff --git a/agent_code/peaceful_agent/callbacks.py b/agent_code/peaceful_agent/callbacks.py
@@ -0,0 +1,10 @@
+import numpy as np
+
+
+def setup(self):
+    np.random.seed()
+
+
+def act(agent, game_state: dict):
+    agent.logger.info('Pick action at random, but no bombs.')
+    agent.next_action = np.random.choice(['RIGHT', 'LEFT', 'UP', 'DOWN'])
diff --git a/agent_code/random_agent/callbacks.py b/agent_code/random_agent/callbacks.py
@@ -1,16 +1,10 @@
-
 import numpy as np
 
 
-def setup(agent):
+def setup(self):
     np.random.seed()
 
-def act(agent):
+
+def act(agent, game_state: dict):
     agent.logger.info('Pick action at random')
     agent.next_action = np.random.choice(['RIGHT', 'LEFT', 'UP', 'DOWN', 'BOMB'], p=[.23, .23, .23, .23, .08])
-
-def reward_update(agent):
-    pass
-
-def end_of_episode(agent):
-    pass
diff --git a/agent_code/simple_agent/callbacks.py → agent_code/rule_based_agent/callbacks.py b/agent_code/simple_agent/callbacks.py → agent_code/rule_based_agent/callbacks.py
@@ -1,10 +1,7 @@
-
-import numpy as np
-from random import shuffle
-from time import time, sleep
 from collections import deque
+from random import shuffle
 
-from settings import s
+import numpy as np
 
 
 def look_for_targets(free_space, start, targets, logger=None):
@@ -42,7 +39,7 @@ def look_for_targets(free_space, start, targets, logger=None):
             break
         # Add unexplored free neighboring tiles to the queue in a random order
         x, y = current
-        neighbors = [(x,y) for (x,y) in [(x+1,y), (x-1,y), (x,y+1), (x,y-1)] if free_space[x,y]]
+        neighbors = [(x, y) for (x, y) in [(x + 1, y), (x - 1, y), (x, y + 1), (x, y - 1)] if free_space[x, y]]
         shuffle(neighbors)
         for neighbor in neighbors:
             if neighbor not in parent_dict:
@@ -75,10 +72,11 @@ def setup(self):
     self.ignore_others_timer = 0
 
 
-def act(self):
-    """Called each game step to determine the agent's next action.
+def act(self, game_state):
+    """
+    Called each game step to determine the agent's next action.
 
-    You can find out about the state of the game environment via self.game_state,
+    You can find out about the state of the game environment via game_state,
     which is a dictionary. Consult 'get_state_for_agent' in environment.py to see
     what it contains.
 
@@ -91,42 +89,42 @@ def act(self):
     self.logger.info('Picking action according to rule set')
 
     # Gather information about the game state
-    arena = self.game_state['arena']
-    x, y, _, bombs_left, score = self.game_state['self']
-    bombs = self.game_state['bombs']
-    bomb_xys = [(x,y) for (x,y,t) in bombs]
-    others = [(x,y) for (x,y,n,b,s) in self.game_state['others']]
-    coins = self.game_state['coins']
+    arena = game_state['field']
+    _, score, bombs_left, (x, y) = game_state['self']
+    bombs = game_state['bombs']
+    bomb_xys = [xy for (xy, t) in bombs]
+    others = [xy for (n, s, b, xy) in game_state['others']]
+    coins = game_state['coins']
     bomb_map = np.ones(arena.shape) * 5
-    for xb,yb,t in bombs:
-        for (i,j) in [(xb+h, yb) for h in range(-3,4)] + [(xb, yb+h) for h in range(-3,4)]:
+    for (xb, yb), t in bombs:
+        for (i, j) in [(xb + h, yb) for h in range(-3, 4)] + [(xb, yb + h) for h in range(-3, 4)]:
             if (0 < i < bomb_map.shape[0]) and (0 < j < bomb_map.shape[1]):
-                bomb_map[i,j] = min(bomb_map[i,j], t)
+                bomb_map[i, j] = min(bomb_map[i, j], t)
 
     # If agent has been in the same location three times recently, it's a loop
-    if self.coordinate_history.count((x,y)) > 2:
+    if self.coordinate_history.count((x, y)) > 2:
         self.ignore_others_timer = 5
     else:
         self.ignore_others_timer -= 1
-    self.coordinate_history.append((x,y))
+    self.coordinate_history.append((x, y))
 
     # Check which moves make sense at all
-    directions = [(x,y), (x+1,y), (x-1,y), (x,y+1), (x,y-1)]
+    directions = [(x, y), (x + 1, y), (x - 1, y), (x, y + 1), (x, y - 1)]
     valid_tiles, valid_actions = [], []
     for d in directions:
         if ((arena[d] == 0) and
-            (self.game_state['explosions'][d] <= 1) and
-            (bomb_map[d] > 0) and
-            (not d in others) and
-            (not d in bomb_xys)):
+                (game_state['explosion_map'][d] <= 1) and
+                (bomb_map[d] > 0) and
+                (not d in others) and
+                (not d in bomb_xys)):
             valid_tiles.append(d)
-    if (x-1,y) in valid_tiles: valid_actions.append('LEFT')
-    if (x+1,y) in valid_tiles: valid_actions.append('RIGHT')
-    if (x,y-1) in valid_tiles: valid_actions.append('UP')
-    if (x,y+1) in valid_tiles: valid_actions.append('DOWN')
-    if (x,y)   in valid_tiles: valid_actions.append('WAIT')
+    if (x - 1, y) in valid_tiles: valid_actions.append('LEFT')
+    if (x + 1, y) in valid_tiles: valid_actions.append('RIGHT')
+    if (x, y - 1) in valid_tiles: valid_actions.append('UP')
+    if (x, y + 1) in valid_tiles: valid_actions.append('DOWN')
+    if (x, y) in valid_tiles: valid_actions.append('WAIT')
     # Disallow the BOMB action if agent dropped a bomb in the same spot recently
-    if (bombs_left > 0) and (x,y) not in self.bomb_history: valid_actions.append('BOMB')
+    if (bombs_left > 0) and (x, y) not in self.bomb_history: valid_actions.append('BOMB')
     self.logger.debug(f'Valid actions: {valid_actions}')
 
     # Collect basic action proposals in a queue
@@ -135,9 +133,9 @@ def act(self):
     shuffle(action_ideas)
 
     # Compile a list of 'targets' the agent should head towards
-    dead_ends = [(x,y) for x in range(1,16) for y in range(1,16) if (arena[x,y] == 0)
-                    and ([arena[x+1,y], arena[x-1,y], arena[x,y+1], arena[x,y-1]].count(0) == 1)]
-    crates = [(x,y) for x in range(1,16) for y in range(1,16) if (arena[x,y] == 1)]
+    dead_ends = [(x, y) for x in range(1, 16) for y in range(1, 16) if (arena[x, y] == 0)
+                 and ([arena[x + 1, y], arena[x - 1, y], arena[x, y + 1], arena[x, y - 1]].count(0) == 1)]
+    crates = [(x, y) for x in range(1, 16) for y in range(1, 16) if (arena[x, y] == 1)]
     targets = coins + dead_ends + crates
     # Add other agents as targets if in hunting mode or no crates/coins left
     if self.ignore_others_timer <= 0 or (len(crates) + len(coins) == 0):
@@ -151,76 +149,53 @@ def act(self):
     if self.ignore_others_timer > 0:
         for o in others:
             free_space[o] = False
-    d = look_for_targets(free_space, (x,y), targets, self.logger)
-    if d == (x,y-1): action_ideas.append('UP')
-    if d == (x,y+1): action_ideas.append('DOWN')
-    if d == (x-1,y): action_ideas.append('LEFT')
-    if d == (x+1,y): action_ideas.append('RIGHT')
+    d = look_for_targets(free_space, (x, y), targets, self.logger)
+    if d == (x, y - 1): action_ideas.append('UP')
+    if d == (x, y + 1): action_ideas.append('DOWN')
+    if d == (x - 1, y): action_ideas.append('LEFT')
+    if d == (x + 1, y): action_ideas.append('RIGHT')
     if d is None:
         self.logger.debug('All targets gone, nothing to do anymore')
         action_ideas.append('WAIT')
 
     # Add proposal to drop a bomb if at dead end
-    if (x,y) in dead_ends:
+    if (x, y) in dead_ends:
         action_ideas.append('BOMB')
     # Add proposal to drop a bomb if touching an opponent
     if len(others) > 0:
         if (min(abs(xy[0] - x) + abs(xy[1] - y) for xy in others)) <= 1:
             action_ideas.append('BOMB')
     # Add proposal to drop a bomb if arrived at target and touching crate
-    if d == (x,y) and ([arena[x+1,y], arena[x-1,y], arena[x,y+1], arena[x,y-1]].count(1) > 0):
+    if d == (x, y) and ([arena[x + 1, y], arena[x - 1, y], arena[x, y + 1], arena[x, y - 1]].count(1) > 0):
         action_ideas.append('BOMB')
 
     # Add proposal to run away from any nearby bomb about to blow
-    for xb,yb,t in bombs:
-        if (xb == x) and (abs(yb-y) < 4):
+    for (xb, yb), t in bombs:
+        if (xb == x) and (abs(yb - y) < 4):
             # Run away
             if (yb > y): action_ideas.append('UP')
             if (yb < y): action_ideas.append('DOWN')
             # If possible, turn a corner
             action_ideas.append('LEFT')
             action_ideas.append('RIGHT')
-        if (yb == y) and (abs(xb-x) < 4):
+        if (yb == y) and (abs(xb - x) < 4):
             # Run away
             if (xb > x): action_ideas.append('LEFT')
             if (xb < x): action_ideas.append('RIGHT')
             # If possible, turn a corner
             action_ideas.append('UP')
             action_ideas.append('DOWN')
     # Try random direction if directly on top of a bomb
-    for xb,yb,t in bombs:
+    for (xb, yb), t in bombs:
         if xb == x and yb == y:
             action_ideas.extend(action_ideas[:4])
 
     # Pick last action added to the proposals list that is also valid
     while len(action_ideas) > 0:
         a = action_ideas.pop()
         if a in valid_actions:
-            self.next_action = a
-            break
-
-    # Keep track of chosen action for cycle detection
-    if self.next_action == 'BOMB':
-        self.bomb_history.append((x,y))
+            # Keep track of chosen action for cycle detection
+            if a == 'BOMB':
+                self.bomb_history.append((x, y))
 
-
-def reward_update(self):
-    """Called once per step to allow intermediate rewards based on game events.
-
-    When this method is called, self.events will contain a list of all game
-    events relevant to your agent that occured during the previous step. Consult
-    settings.py to see what events are tracked. You can hand out rewards to your
-    agent based on these events and your knowledge of the (new) game state. In
-    contrast to act, this method has no time limit.
-    """
-    self.logger.debug(f'Encountered {len(self.events)} game event(s)')
-
-
-def end_of_episode(self):
-    """Called at the end of each game to hand out final rewards and do training.
-
-    This is similar to reward_update, except it is only called at the end of a
-    game. self.events will contain all events that occured during your agent's
-    final step. You should place your actual learning code in this method.
-    """
-    self.logger.debug(f'Encountered {len(self.events)} game event(s) in final step')
+            return a
diff --git a/agent_code/tpl_agent/callbacks.py b/agent_code/tpl_agent/callbacks.py
@@ -0,0 +1,79 @@
+import os
+import pickle
+import random
+
+import numpy as np
+
+
+ACTIONS = ['UP', 'RIGHT', 'DOWN', 'LEFT', 'WAIT', 'BOMB']
+
+
+def setup(self):
+    """
+    Setup your code. This is called once when loading each agent.
+    Make sure that you prepare everything such that act(...) can be called.
+
+    When in training mode, the separate `setup_training` in train.py is called
+    after this method. This separation allows you to share your trained agent
+    with other students, without revealing your training code.
+
+    In this example, our model is a set of probabilities over actions
+    that are is independent of the game state.
+
+    :param self: This object is passed to all callbacks and you can set arbitrary values.
+    """
+    if self.train or not os.path.isfile("my-saved-model.pt"):
+        self.logger.info("Setting up model from scratch.")
+        weights = np.random.rand(len(ACTIONS))
+        self.model = weights / weights.sum()
+    else:
+        self.logger.info("Loading model from saved state.")
+        with open("my-saved-model.pt", "rb") as file:
+            self.model = pickle.load(file)
+
+
+def act(self, game_state: dict) -> str:
+    """
+    Your agent should parse the input, think, and take a decision.
+    When not in training mode, the maximum execution time for this method is 0.5s.
+
+    :param self: The same object that is passed to all of your callbacks.
+    :param game_state: The dictionary that describes everything on the board.
+    :return: The action to take as a string.
+    """
+    # todo Exploration vs exploitation
+    random_prob = .1
+    if self.train and random.random() < random_prob:
+        self.logger.debug("Choosing action purely at random.")
+        # 80%: walk in any direction. 10% wait. 10% bomb.
+        return np.random.choice(ACTIONS, p=[.2, .2, .2, .2, .1, .1])
+
+    self.logger.debug("Querying model for action.")
+    return np.random.choice(ACTIONS, p=self.model)
+
+
+def state_to_features(game_state: dict) -> np.array:
+    """
+    *This is not a required function, but an idea to structure your code.*
+
+    Converts the game state to the input of your model, i.e.
+    a feature vector.
+
+    You can find out about the state of the game environment via game_state,
+    which is a dictionary. Consult 'get_state_for_agent' in environment.py to see
+    what it contains.
+
+    :param game_state:  A dictionary describing the current game board.
+    :return: np.array
+    """
+    # This is the dict before the game begins and after it ends
+    if game_state is None:
+        return None
+
+    # For example, you could construct several channels of equal shape, ...
+    channels = []
+    channels.append(...)
+    # concatenate them as a feature tensor (they must have the same shape), ...
+    stacked_channels = np.stack(channels)
+    # and return them as a vector
+    return stacked_channels.reshape(-1)