diff --git a/.gitignore b/.gitignore
index 31f717221..e3f06f830 100644
--- a/.gitignore
+++ b/.gitignore
@@ -114,4 +114,4 @@ replays/
 agent_code/auto_bomber/models
 agent_code/auto_bomber/*.pt
 agent_code/auto_bomber/runs
-.DS_Store
\ No newline at end of file
+.DS_Store
diff --git a/agent_code/auto_bomber/auto_bomber_config.py b/agent_code/auto_bomber/auto_bomber_config.py
new file mode 100644
index 000000000..efd547fc0
--- /dev/null
+++ b/agent_code/auto_bomber/auto_bomber_config.py
@@ -0,0 +1,28 @@
+import events as e
+from agent_code.auto_bomber import custom_events as ce
+
+
+MODELS_ROOT = "./models"
+# MODEL_DIR = "./models/41"
+MODEL_DIR = None
+ACTIONS = ['UP', 'RIGHT', 'DOWN', 'LEFT', 'WAIT', 'BOMB']
+EPSILON = 0.25
+DISCOUNT = 0.5
+LEARNING_RATE = 0.1
+POLICY = 'IANN'
+TEMPERATURE = 0.5
+REGION_SIZE = 2
+REGION_TIME_TOLERANCE = 8
+
+game_rewards = {
+    e.CRATE_DESTROYED: 10,
+    e.BOMB_DROPPED: 20,
+    e.COIN_FOUND: 10,
+    e.COIN_COLLECTED: 50,
+    e.KILLED_OPPONENT: 200,
+    e.INVALID_ACTION: -1,
+    e.KILLED_SELF: -300,
+    e.GOT_KILLED: -200,
+    e.SURVIVED_ROUND: 300,
+    ce.SAME_REGION: -20
+}
diff --git a/agent_code/auto_bomber/callbacks.py b/agent_code/auto_bomber/callbacks.py
index 7edf25619..4ebf44f29 100644
--- a/agent_code/auto_bomber/callbacks.py
+++ b/agent_code/auto_bomber/callbacks.py
@@ -5,7 +5,6 @@
 from agent_code.auto_bomber.feature_engineering import state_to_features
 from agent_code.auto_bomber.model import LinearAutoBomberModel
 
-
 def setup(self):
     """
     Setup your code. This is called once when loading each agent.
@@ -34,11 +33,15 @@ def act(self, game_state: dict) -> str:
     """
 
     hyper_parameters = self.model.hyper_parameters
-    # todo right now epsilon-greedy - change to softmax to avoid local maxima
-    if self.train and random.random() < hyper_parameters["epsilon"]:
-        self.logger.debug("Choosing action purely at random.")
-        # 80%: walk in any direction. 10% wait. 10% bomb.
-        return np.random.choice(hyper_parameters["actions"], p=[.2, .2, .2, .2, .1, .1])
+    if self.train and config.POLICY == 'SOFTMAX':
+        self.model.select_best_action(game_state, self, softmax=True)
+    elif self.train and random.random() < hyper_parameters["epsilon"]:
+        if hyper_parameters["policy"]: == 'GREEDY':
+            self.logger.debug("Choosing action purely at random.")
+            # 80%: walk in any direction. 10% wait. 10% bomb.
+            return np.random.choice(hyper_parameters["actions"], p=[.2, .2, .2, .2, .1, .1])
+        elif hyper_parameters["policy"] == 'IANN':
+            self.model.select_best_action(game_state, self, softmax=True)
     else:
         self.logger.debug("Querying model for action.")
         return self.model.select_best_action(game_state, self)
diff --git a/agent_code/auto_bomber/custom_events.py b/agent_code/auto_bomber/custom_events.py
new file mode 100644
index 000000000..588cdb43d
--- /dev/null
+++ b/agent_code/auto_bomber/custom_events.py
@@ -0,0 +1 @@
+SAME_REGION = 'SAME_REGION'
\ No newline at end of file
diff --git a/agent_code/auto_bomber/feature_engineering.py b/agent_code/auto_bomber/feature_engineering.py
index 9c39f9695..09f1d44ee 100644
--- a/agent_code/auto_bomber/feature_engineering.py
+++ b/agent_code/auto_bomber/feature_engineering.py
@@ -1,6 +1,5 @@
 import numpy as np
-
-from agent_code.auto_bomber.utils import softmax
+from scipy.special import softmax
 
 
 def state_to_features(game_state: dict, weight_opponents_no_bomb=0.0) -> np.array:
@@ -17,15 +16,22 @@ def state_to_features(game_state: dict, weight_opponents_no_bomb=0.0) -> np.arra
                                     without BOMB action available
     :return: np.array
     """
+    #############
+    #   NOTES   #
+    #############
+    # Coins zones signal very weak! -> Used softmax, which keeps 0.0 by using -np.inf
+    # Add coins to crates --> not good, need to know where crates are, are distinct from coins as need to be exploded
+
     # This is the dict before the game begins and after it ends
     if game_state is None:
         # todo we need another representation for final state here!
-        return np.random.rand(4)
+        return np.random.rand(13)
 
     field_width, field_height = game_state['field'].shape
     assert field_width == field_height, "Field is not rectangular, some assumptions do not hold. Abort!"
 
     agent_position = np.asarray(game_state['self'][3], dtype='int')
+    agent_bomb_action = np.asarray(game_state['self'][2], dtype='int')
     bombs_position = np.asarray([list(bomb[0]) for bomb in game_state['bombs']], dtype='int')
     bombs_countdown = np.asarray([bomb[1] for bomb in game_state['bombs']])
     explosions_position = np.argwhere(game_state['explosion_map'] > 0)
@@ -36,50 +42,68 @@ def state_to_features(game_state: dict, weight_opponents_no_bomb=0.0) -> np.arra
     opponents_bomb_action = np.where(opponents_bomb_action, 1.0, weight_opponents_no_bomb)
     walls_position = np.argwhere(game_state['field'] == -1)
 
-    # TODO Evaluate normalization/scaling
-    bomb_danger_zones = _compute_zones_heatmap(agent_position, bombs_position, 1.0,
-                                               # lambda v, w: np.divide(1, v * w, out=np.ones_like(v), where=(v*w) != 0),
-                                               lambda v, w: v * w,
-                                               bombs_countdown,
-                                               # lambda v: v / np.max(v)
-                                               lambda v: np.sum(v),
-                                               lambda v: np.divide(v, np.max(v), out=np.zeros_like(v), where=v != 0))
-    # TODO Coins zones signal very weak! -> Used softmax, which keeps 0.0 by using -np.inf
+    # TODO HUUUUUUUUUGE!!!!!!! --> Switch distances from euclidean to a path finding algorithm
+    # https://pypi.org/project/pathfinding/
+
+    # TODO Make BOMB_POWER dynamic from settings.py
+    bombs_zones = _compute_zones_heatmap(agent_position, bombs_position, 0.0,
+                                         lambda v, w: np.where(v > 0., v[(3 + w) - v >= 0] ** w[(3 + w) - v >= 0], 0.0),
+                                         bombs_countdown,
+                                         lambda v: np.mean(v) if v.size != 0 else 0.0,
+                                         lambda v: -1 * np.divide(1, v, out=np.zeros_like(v), where=v != 0))
+
     # TODO Does not account for how many coins there are in the zone
     coins_zones = _compute_zones_heatmap(agent_position, coins_position, 0.0,
                                          aggregation_func=lambda v: np.mean(v) if v.size != 0 else 0.0,
-                                         normalization_func=lambda v: softmax(np.divide(1, v, out=np.full_like(v, -np.inf), where=v != 0)))  # v / np.max(v))
-    crates_zones = _compute_zones_heatmap(agent_position, crates_position, 0.0, aggregation_func=lambda v: np.mean(v),
-                                          normalization_func=lambda v: np.divide(1, v, out=np.zeros_like(v), where=v != 0))  # v / np.max(v))
+                                         normalization_func=lambda v: softmax(
+                                             np.divide(1, v, out=np.full_like(v, -np.inf), where=v != 0)) if np.all(
+                                             v != 0.0) else v)
+    crates_zones = _compute_zones_heatmap(agent_position, crates_position, 0.0,
+                                          aggregation_func=lambda v: np.mean(v) if v.size != 0 else 0.0,
+                                          normalization_func=lambda v: softmax(
+                                              np.divide(1, v, out=np.full_like(v, -np.inf), where=v != 0)))
     opponents_zones = _compute_zones_heatmap(agent_position, opponents_position, 0.0, lambda v, w: v * w,
                                              opponents_bomb_action,
                                              lambda v: np.sum(v),
                                              lambda v: np.divide(v, np.max(v), out=np.zeros_like(v), where=v != 0))
 
-    explosion_field_of_view = _object_in_field_of_view(agent_position, explosions_position, -1., lambda v, w: v / w,
-                                                       field_width)
+    # TODO Evaluate if weighting bombs also here by their countdown
+    # TODO Exclude bombs which are not relevant (!!!!)
+    bombs_field_of_view = _object_in_field_of_view(agent_position, explosions_position, 0.0,
+                                                   lambda v, w: -1 * np.divide(1, v, out=np.zeros_like(v),
+                                                                               where=v != 0),
+                                                   None)
+    explosion_field_of_view = _object_in_field_of_view(agent_position, explosions_position, 1.0,
+                                                       lambda v, w: np.where(v == 1.0, 0.0, 1.0), None)
     coins_field_of_view = _object_in_field_of_view(agent_position, coins_position, 0.0,
                                                    lambda v, w: np.divide(1, v, out=np.zeros_like(v), where=v != 0),
                                                    None)
-    crates_field_of_view = _object_in_field_of_view(agent_position, crates_position, -1., lambda v, w: v / w, field_width)
-    # walls_field_of_view = _object_in_field_of_view(agent_position, walls_position, lambda v, w: v / w, field_width)
-    walls_field_of_view = _object_in_field_of_view(agent_position, walls_position, 0.0,
+    crates_field_of_view = _object_in_field_of_view(agent_position, crates_position, 0.0,
+                                                    lambda v, w: np.divide(1, v, out=np.zeros_like(v), where=v != 0),
+                                                    None)
+    walls_field_of_view = _object_in_field_of_view(agent_position, walls_position, 1.0,
                                                    lambda v, w: np.where(v == 1.0, 0.0, 1.0), None)
 
-    # TODO Set auxiliary reward for moving away from a danger zone
-    # TODO Negative reward for staying multiple steps in same position
-    # TODO Negative reward repetition of moves
-
-    # return np.concatenate((bomb_danger_zones, coins_zones, crates_zones, opponents_zones,
-    #                        explosion_field_of_view, coins_field_of_view, crates_field_of_view,
-    #                        walls_field_of_view), axis=None)
-    features = softmax(np.sum(np.concatenate((coins_zones, coins_field_of_view), axis=None).reshape(2, 4), axis=0))
-    # return np.concatenate((coins_zones, coins_field_of_view, walls_field_of_view), axis=None)
-    # return np.concatenate((coins_zones, coins_field_of_view), axis=None)
-    # return np.concatenate((bomb_danger_zones, coins_zones, crates_zones, opponents_zones), axis=None)
-    # return np.concatenate((coins_field_of_view, walls_field_of_view), axis=None)
-
-    features[walls_field_of_view == 0.] = -1.0
+    f_bombs = np.sum(np.vstack((bombs_zones, bombs_field_of_view)), axis=0)
+    if not np.all((f_bombs == 0.0)):
+        f_bombs = np.where(f_bombs == 0.0, np.inf, f_bombs)
+        f_bombs = -1 * softmax(-1 * f_bombs)
+
+    f_coins = np.sum(np.vstack((coins_zones, coins_field_of_view)), axis=0)
+    if not np.all((f_coins == 0.0)):
+        f_coins = np.where(f_coins == 0.0, -np.inf, f_coins)
+        f_coins = softmax(f_coins)
+    f_coins[walls_field_of_view == 0.] = -1.0
+    f_coins[explosion_field_of_view == 0.] = -1.0
+
+    f_crates = np.sum(np.vstack((crates_zones, crates_field_of_view)), axis=0)
+    if not np.all((f_crates == 0.0)):
+        f_crates = np.where(f_crates == 0.0, -np.inf, f_crates)
+        f_crates = softmax(f_crates)
+    f_crates[walls_field_of_view == 0.] = -1.0
+    f_crates[explosion_field_of_view == 0.] = -1.0
+
+    features = np.concatenate((f_coins, f_crates, f_bombs, agent_bomb_action), axis=None)
 
     return features
 
diff --git a/agent_code/auto_bomber/model.py b/agent_code/auto_bomber/model.py
index 32c5f58cb..b0f2d65e9 100644
--- a/agent_code/auto_bomber/model.py
+++ b/agent_code/auto_bomber/model.py
@@ -21,6 +21,8 @@ def __init__(self, train, feature_extractor):
         elif model_path.MODEL_DIR and not Path(model_path.MODEL_DIR).is_dir():
             raise FileNotFoundError("The specified model directory does not exist!\nIf you wish to train a NEW model"
                                     "set parameter to None, otherwise specify a valid model directory.")
+        elif not self.train and not model_path.MODEL_DIR:
+            raise ValueError("No model directory has been specified.\n A model directory is required for inference.")
         else:
             root_dir = Path(model_path.MODELS_ROOT)
             root_dir.mkdir(parents=True, exist_ok=True)
@@ -50,19 +52,20 @@ def store(self):
         with self.weights_path.open(mode="wb") as file:
             pickle.dump(self.weights, file)
 
-    def select_best_action(self, game_state: dict, agent_self):
+    def select_best_action(self, game_state: dict, agent_self, softmax=False):
         features_x = self.feature_extractor(game_state)
         self.init_if_needed(features_x, agent_self)
 
         q_action_values = np.dot(self.weights, features_x)
 
-        if self.hyper_parameters["top_3_rand"]:
+        if softmax:
+            sort_actions = q_action_values.argsort()
+            p = np.exp(sort_actions / self.hyper_parameters["temp"]) / np.sum(np.exp(sort_actions / self.hyper_parameters["temp"]))
+            choice = np.random.choice(sort_actions, p=p)
+        else:
             top_3_actions = q_action_values.argsort()[-3:][::-1]
-            # lets keep a little bit randomness here
             choice = np.random.choice(top_3_actions, p=[0.9, 0.05, 0.05])
-            return self.hyper_parameters["actions"][choice]
-        else:
-            return np.argmax(q_action_values)
+        return self.hyper_parameters["actions"][choice]
 
     def fit_model_with_transition_batch(self, transitions: Transitions, round: int):
         loss = []
diff --git a/agent_code/auto_bomber/requirements.txt b/agent_code/auto_bomber/requirements.txt
index 59e1163a5..74d41f6fc 100644
--- a/agent_code/auto_bomber/requirements.txt
+++ b/agent_code/auto_bomber/requirements.txt
@@ -3,4 +3,4 @@ pygame==2.0.1
 tqdm==4.58.0
 tensorboardX==2.1
 tensorboard==2.4.1
-
+scipy==1.6.1
\ No newline at end of file
diff --git a/agent_code/auto_bomber/train.py b/agent_code/auto_bomber/train.py
index 7a5420b59..0b53b5375 100644
--- a/agent_code/auto_bomber/train.py
+++ b/agent_code/auto_bomber/train.py
@@ -2,11 +2,13 @@
 from collections import namedtuple, defaultdict
 from typing import List
 
-import events as e
 from agent_code.auto_bomber.feature_engineering import state_to_features
+from agent_code.auto_bomber import custom_events as ce
 
 # This is only an example!
 from agent_code.auto_bomber.transitions import Transitions
+import agent_code.auto_bomber.auto_bomber_config as config
+from queue import Queue
 
 
 def setup_training(self):
@@ -19,7 +21,7 @@ def setup_training(self):
     """
     # Example: Setup an array that will note transition tuples
     self.transitions = Transitions(state_to_features)
-
+    self.q = Queue(maxsize=config.REGION_TIME_TOLERANCE)
 
 def game_events_occurred(self, old_game_state: dict, last_action: str, new_game_state: dict, events: List[str]):
     """
@@ -43,6 +45,14 @@ def game_events_occurred(self, old_game_state: dict, last_action: str, new_game_
 
     # state_to_features is defined in callbacks.py
     self.transitions.add_transition(old_game_state, last_action, new_game_state, reward_from_events(self, events))
+    # Punishment, if agent is still in the same radius after certain time steps
+    new_position = new_game_state["self"][3]
+    if self.q.full():
+        old_position = self.q.get()
+        if (old_position[0] - config.REGION_SIZE <= new_position[0] <= old_position[0] + config.REGION_SIZE) \
+                or (old_position[1] - config.REGION_SIZE <= new_position[1] <= old_position[1] + config.REGION_SIZE):
+            events.append(ce.SAME_REGION)
+    self.q.put(new_position)
 
 
 def end_of_round(self, last_game_state: dict, last_action: str, events: List[str]):
@@ -76,31 +86,11 @@ def reward_from_events(self, events: List[str]) -> int:
     Here you can modify the rewards your agent get so as to en/discourage
     certain behavior.
     """
-    # todo reward definition (right now only first sketch):
     # q: how to determine the winner?
-    game_rewards = {
-        e.COIN_COLLECTED: 100,
-        e.KILLED_OPPONENT: 50,
-        e.INVALID_ACTION: -100,
-        e.KILLED_SELF: -300,
-        e.GOT_KILLED: -50,
-        e.WAITED: -10,
-        e.SURVIVED_ROUND: 5
-    }
-
-    # game_rewards = {
-    #     e.COIN_COLLECTED: 20,
-    #     e.KILLED_OPPONENT: 40,
-    #     e.INVALID_ACTION: -10,
-    #     e.KILLED_SELF: -50,
-    #     e.GOT_KILLED: -30,
-    #     e.WAITED: -5,
-    #     e.SURVIVED_ROUND: -1
-    # }
 
     reward_sum = 0
     for event in events:
-        if event in game_rewards:
-            reward_sum += game_rewards[event]
+        if event in config.game_rewards:
+            reward_sum += config.game_rewards[event]
     self.logger.info(f"Awarded {reward_sum} for events {', '.join(events)}")
     return reward_sum
diff --git a/agent_code/auto_bomber/utils.py b/agent_code/auto_bomber/utils.py
deleted file mode 100644
index 28034a55c..000000000
--- a/agent_code/auto_bomber/utils.py
+++ /dev/null
@@ -1,46 +0,0 @@
-import numpy as np
-
-
-def softmax(X, theta=1.0, axis=None):
-    """
-    Compute the softmax of each element along an axis of X.
-
-    Parameters
-    ----------
-    X: ND-Array. Probably should be floats.
-    theta (optional): float parameter, used as a multiplier
-        prior to exponentiation. Default = 1.0
-    axis (optional): axis to compute values along. Default is the
-        first non-singleton axis.
-
-    Returns an array the same size as X. The result will sum to 1
-    along the specified axis.
-    """
-
-    # make X at least 2d
-    y = np.atleast_2d(X)
-
-    # find axis
-    if axis is None:
-        axis = next(j[0] for j in enumerate(y.shape) if j[1] > 1)
-
-    # multiply y against the theta parameter,
-    y = y * float(theta)
-
-    # subtract the max for numerical stability
-    y = y - np.expand_dims(np.max(y, axis=axis), axis)
-
-    # exponentiate y
-    y = np.exp(y)
-
-    # take the sum along the specified axis
-    ax_sum = np.expand_dims(np.sum(y, axis=axis), axis)
-
-    # finally: divide elementwise
-    p = y / ax_sum
-
-    # flatten if X was 1D
-    if len(X.shape) == 1:
-        p = p.flatten()
-
-    return p
diff --git a/settings.py b/settings.py
index 8ff09938b..00a1ac950 100644
--- a/settings.py
+++ b/settings.py
@@ -5,7 +5,7 @@
 # Game properties
 COLS = 17
 ROWS = 17
-CRATE_DENSITY = 0.0  # 0.75
+CRATE_DENSITY = 0.0  # 0.25  # 0.75
 MAX_AGENTS = 4
 
 # Round properties