Skip to content

Commit

Permalink
Added toggle to deactivate top 3 random selection for predictions
Browse files Browse the repository at this point in the history
  • Loading branch information
tkrieger committed Mar 21, 2021
1 parent fd2d5d7 commit db8ee49
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 5 deletions.
1 change: 1 addition & 0 deletions agent_code/auto_bomber/auto_bomber_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
EPSILON = 0.25
DISCOUNT = 0.6
LEARNING_RATE = 0.00009
TOP_3_RAND = True
13 changes: 8 additions & 5 deletions agent_code/auto_bomber/model.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import shutil
import pickle
import shutil
from pathlib import Path

import numpy as np
Expand Down Expand Up @@ -47,10 +47,13 @@ def select_best_action(self, game_state: dict, agent_self):

q_action_values = np.dot(self.weights, features_x)

top_3_actions = q_action_values.argsort()[-3:][::-1]
# lets keep a little bit randomness here
choice = np.random.choice(top_3_actions, p=[0.9, 0.05, 0.05])
return config.ACTIONS[choice]
if config.TOP_3_RAND:
top_3_actions = q_action_values.argsort()[-3:][::-1]
# lets keep a little bit randomness here
choice = np.random.choice(top_3_actions, p=[0.9, 0.05, 0.05])
return config.ACTIONS[choice]
else:
return np.argmax(q_action_values)

def fit_model_with_transition_batch(self, transitions: Transitions, round: int):
loss = []
Expand Down

0 comments on commit db8ee49

Please sign in to comment.