Added toggle to deactivate top 3 random selection for predictions

PrimeF · Mar 21, 2021 · db8ee49 · db8ee49
1 parent fd2d5d7
commit db8ee49
Show file tree

Hide file tree

Showing 2 changed files with 9 additions and 5 deletions.
diff --git a/agent_code/auto_bomber/auto_bomber_config.py b/agent_code/auto_bomber/auto_bomber_config.py
@@ -5,3 +5,4 @@
 EPSILON = 0.25
 DISCOUNT = 0.6
 LEARNING_RATE = 0.00009
+TOP_3_RAND = True
diff --git a/agent_code/auto_bomber/model.py b/agent_code/auto_bomber/model.py
@@ -1,5 +1,5 @@
-import shutil
 import pickle
+import shutil
 from pathlib import Path
 
 import numpy as np
@@ -47,10 +47,13 @@ def select_best_action(self, game_state: dict, agent_self):
 
         q_action_values = np.dot(self.weights, features_x)
 
-        top_3_actions = q_action_values.argsort()[-3:][::-1]
-        # lets keep a little bit randomness here
-        choice = np.random.choice(top_3_actions, p=[0.9, 0.05, 0.05])
-        return config.ACTIONS[choice]
+        if config.TOP_3_RAND:
+            top_3_actions = q_action_values.argsort()[-3:][::-1]
+            # lets keep a little bit randomness here
+            choice = np.random.choice(top_3_actions, p=[0.9, 0.05, 0.05])
+            return config.ACTIONS[choice]
+        else:
+            return np.argmax(q_action_values)
 
     def fit_model_with_transition_batch(self, transitions: Transitions, round: int):
         loss = []