forked from ukoethe/bomberman_rl
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Update to single-process code, and different timing and callbacks
- Loading branch information
Felix Draxler
committed
Feb 12, 2021
1 parent
53b6c93
commit 0c27846
Showing
14 changed files
with
1,266 additions
and
956 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
import numpy as np | ||
|
||
|
||
def setup(self): | ||
np.random.seed() | ||
|
||
|
||
def act(agent, game_state: dict): | ||
agent.logger.info('Pick action at random, but no bombs.') | ||
agent.next_action = np.random.choice(['RIGHT', 'LEFT', 'UP', 'DOWN']) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,16 +1,10 @@ | ||
|
||
import numpy as np | ||
|
||
|
||
def setup(agent): | ||
def setup(self): | ||
np.random.seed() | ||
|
||
def act(agent): | ||
|
||
def act(agent, game_state: dict): | ||
agent.logger.info('Pick action at random') | ||
agent.next_action = np.random.choice(['RIGHT', 'LEFT', 'UP', 'DOWN', 'BOMB'], p=[.23, .23, .23, .23, .08]) | ||
|
||
def reward_update(agent): | ||
pass | ||
|
||
def end_of_episode(agent): | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
import os | ||
import pickle | ||
import random | ||
|
||
import numpy as np | ||
|
||
|
||
ACTIONS = ['UP', 'RIGHT', 'DOWN', 'LEFT', 'WAIT', 'BOMB'] | ||
|
||
|
||
def setup(self): | ||
""" | ||
Setup your code. This is called once when loading each agent. | ||
Make sure that you prepare everything such that act(...) can be called. | ||
When in training mode, the separate `setup_training` in train.py is called | ||
after this method. This separation allows you to share your trained agent | ||
with other students, without revealing your training code. | ||
In this example, our model is a set of probabilities over actions | ||
that are is independent of the game state. | ||
:param self: This object is passed to all callbacks and you can set arbitrary values. | ||
""" | ||
if self.train or not os.path.isfile("my-saved-model.pt"): | ||
self.logger.info("Setting up model from scratch.") | ||
weights = np.random.rand(len(ACTIONS)) | ||
self.model = weights / weights.sum() | ||
else: | ||
self.logger.info("Loading model from saved state.") | ||
with open("my-saved-model.pt", "rb") as file: | ||
self.model = pickle.load(file) | ||
|
||
|
||
def act(self, game_state: dict) -> str: | ||
""" | ||
Your agent should parse the input, think, and take a decision. | ||
When not in training mode, the maximum execution time for this method is 0.5s. | ||
:param self: The same object that is passed to all of your callbacks. | ||
:param game_state: The dictionary that describes everything on the board. | ||
:return: The action to take as a string. | ||
""" | ||
# todo Exploration vs exploitation | ||
random_prob = .1 | ||
if self.train and random.random() < random_prob: | ||
self.logger.debug("Choosing action purely at random.") | ||
# 80%: walk in any direction. 10% wait. 10% bomb. | ||
return np.random.choice(ACTIONS, p=[.2, .2, .2, .2, .1, .1]) | ||
|
||
self.logger.debug("Querying model for action.") | ||
return np.random.choice(ACTIONS, p=self.model) | ||
|
||
|
||
def state_to_features(game_state: dict) -> np.array: | ||
""" | ||
*This is not a required function, but an idea to structure your code.* | ||
Converts the game state to the input of your model, i.e. | ||
a feature vector. | ||
You can find out about the state of the game environment via game_state, | ||
which is a dictionary. Consult 'get_state_for_agent' in environment.py to see | ||
what it contains. | ||
:param game_state: A dictionary describing the current game board. | ||
:return: np.array | ||
""" | ||
# This is the dict before the game begins and after it ends | ||
if game_state is None: | ||
return None | ||
|
||
# For example, you could construct several channels of equal shape, ... | ||
channels = [] | ||
channels.append(...) | ||
# concatenate them as a feature tensor (they must have the same shape), ... | ||
stacked_channels = np.stack(channels) | ||
# and return them as a vector | ||
return stacked_channels.reshape(-1) |
Oops, something went wrong.