style: Apply basic pre-commit format

angelmtenor · Feb 27, 2022 · 9b61583 · 9b61583
1 parent 765d162
commit 9b61583
Show file tree

Hide file tree

Showing 39 changed files with 2,813 additions and 1,861 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,10 +1,9 @@
 __pycache__
 *.py~
-# *.so
 results
 old
-.idea
 tasks/wander_HD.py
-.vscode/launch.json
+.vscode
 run_time_exp.py
 .ipynb_checkpoints/README-checkpoint.md
+# *.so
diff --git a/README.md b/README.md
@@ -4,8 +4,8 @@
 
 This repository provides a Reinforcement Learning framework in Python from the Machine Perception and Intelligent Robotics research group [(MAPIR)](http://mapir.isa.uma.es).
 
-Reference: *Towards a common implementation of reinforcement learning for multiple robotics tasks*. [ScienceDirect(updated)](http://www.sciencedirect.com/science/article/pii/S0957417417307613) &nbsp; &nbsp; &nbsp; [Arxiv preprint](https://arxiv.org/abs/1702.06329)
-
+Reference: *Towards a common implementation of reinforcement learning for multiple robotics tasks*. &nbsp; [Arxiv preprint](https://arxiv.org/abs/1702.06329) &nbsp;&nbsp;
+[ScienceDirect](http://www.sciencedirect.com/science/article/pii/S0957417417307613) 
 
 ## Requirements
 

diff --git a/__init__.py b/__init__.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python
-# -*- coding: utf-8 -*-
 #   +-----------------------------------+-----------------------------------+
 #   |                                 RL-ROBOT                              |
 #   |                                                                       |

diff --git a/action_qbiassr.py b/action_qbiassr.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 #   +-----------------------------------------------+
 #   | RL-ROBOT. Reinforcement Learning for Robotics |
 #   | Angel Martinez-Tenor                          |
@@ -28,7 +27,7 @@
 
 
 def setup():
-    """ Initializes QBIASSR """
+    """Initializes QBIASSR"""
     global control_sequence, rewards_sequence, comb, mix, initiated
     # size_sequence = size of eli queue: n < log(threshold) / log(gamma*lambda)
     threshold = 0.01
@@ -47,15 +46,14 @@ def setup():
     # Create mix[s], index[s], subrow[s]
     n_inputs = task.n_inputs
     n_states = task.n_states
-    comb = np.array(
-        list(combinations(range(n_inputs), n_inputs - 1)), dtype=np.int16)
+    comb = np.array(list(combinations(range(n_inputs), n_inputs - 1)), dtype=np.int16)
     mix = np.full([n_states, n_inputs, n_states], -1, dtype=np.int)
     try:
         index = np.full(([n_states, n_inputs, n_states]), -1, dtype=np.int)
     except MemoryError:
-        mem = (n_states **2) * n_inputs * np.dtype(np.int).itemsize / (2**20)
-        print( "There is Not Enough Memory. Needed {:.1f} GB.".format(mem))
-        print( "Please, select another task or reduce the number of states.")
+        mem = (n_states**2) * n_inputs * np.dtype(np.int).itemsize / (2**20)
+        print(f"There is Not Enough Memory. Needed {mem:.1f} GB.")
+        print("Please, select another task or reduce the number of states.")
         exit()
 
     for s in range(n_states):
@@ -67,12 +65,12 @@ def setup():
                 index[s, i, k] = agent.VAR[i, j, k]
         for idx, item in enumerate(comb):
             matches = reduce(np.intersect1d, (index[s, item]))
-            mix[s, idx, 0:len(matches)] = matches
+            mix[s, idx, 0 : len(matches)] = matches
     initiated = True
 
 
 def custom_softmax(input_array, temp):
-    """ Softmax Boltzmann action selection given a vector and temperature """
+    """Softmax Boltzmann action selection given a vector and temperature"""
     selected_action = -1
 
     # 1: Get the probabilities
@@ -96,7 +94,7 @@ def custom_softmax(input_array, temp):
 
 
 def select_biased_action(s):
-    """ Select an action 'a' given state 's' by QBIASSR """
+    """Select an action 'a' given state 's' by QBIASSR"""
     assert initiated, "QBIASSR not initiated! setup() must be called previously"
 
     n_actions = task.n_actions
@@ -123,8 +121,8 @@ def select_biased_action(s):
 
 
 def low_reward_loop_evasion(s):
-    """ Increase the temperature if the agent is stuck in a sequence of states
-    with negative average reward """
+    """Increase the temperature if the agent is stuck in a sequence of states
+    with negative average reward"""
     global temperature
     global control_sequence
     global rewards_sequence
@@ -136,7 +134,7 @@ def low_reward_loop_evasion(s):
         temperature = DEFAULT_TEMPERATURE
         return
 
-    control_sequence = lp.sasr_step[lp.step - size_sequence:lp.step, 0]
+    control_sequence = lp.sasr_step[lp.step - size_sequence : lp.step, 0]
     # different state reached:
     if s not in control_sequence:
         temperature = DEFAULT_TEMPERATURE
@@ -150,7 +148,7 @@ def low_reward_loop_evasion(s):
         return
 
     # average reward positive:
-    rewards_sequence = lp.sasr_step[lp.step - size_sequence:lp.step, 3]
+    rewards_sequence = lp.sasr_step[lp.step - size_sequence : lp.step, 3]
     if np.average(rewards_sequence) > 0:
         temperature = DEFAULT_TEMPERATURE
         return

diff --git a/action_selection.py b/action_selection.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 #   +-----------------------------------------------+
 #   | RL-ROBOT. Reinforcement Learning for Robotics |
 #   | Angel Martinez-Tenor                          |
@@ -26,15 +25,15 @@
 
 
 def setup():
-    """ Initialize QBIASSR if needed """
+    """Initialize QBIASSR if needed"""
     global initiated
     if exp.ACTION_STRATEGY == "QBIASSR":
         action_qbiassr.setup()
     initiated = True
 
 
 def execute(s):
-    """ From state s select an action a """
+    """From state s select an action a"""
 
     if exp.TEACH_THE_ROBOT:
         print("Warning: Controlling the robot for teaching not implemented")
@@ -75,20 +74,20 @@ def execute(s):
 
 
 def exploit_policy(s):
-    """ Exploit the action a given an state s according to the Policy """
+    """Exploit the action a given an state s according to the Policy"""
     selected_action = lp.policy[s]
     return selected_action
 
 
 def random_action():
-    """ Select a random action a (uniform distribution) """
+    """Select a random action a (uniform distribution)"""
     # random.seed()
     selected_action = random.randint(0, task.n_actions - 1)
     return selected_action
 
 
 def egreedy(s, e):  # if e = 0.3_: 30% exploration
-    """ Select an action a given a state s based on egreedy exploration """
+    """Select an action a given a state s based on egreedy exploration"""
     # random.seed()
     if random.random() < e:
         selected_action = random_action()
@@ -98,8 +97,8 @@ def egreedy(s, e):  # if e = 0.3_: 30% exploration
 
 
 def egreedy_least_explored(s, e, least):
-    """ Select an action a given a state s based on egreedy exploration
-        improving the probability of selecting the least explored action  """
+    """Select an action a given a state s based on egreedy exploration
+    improving the probability of selecting the least explored action"""
     # random.seed()
     if random.random() < e:
         if random.random() < least:
@@ -117,7 +116,7 @@ def egreedy_least_explored(s, e, least):
 
 
 def softmax(s):
-    """ Select an action a given a state s based on Boltzmann exploration """
+    """Select an action a given a state s based on Boltzmann exploration"""
     selected_action = -1
     # 1: Get the probabilities
     pa = np.zeros(task.n_actions)

diff --git a/agent.py b/agent.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 #   +-----------------------------------------------+
 #   | RL-ROBOT. Reinforcement Learning for Robotics |
 #   | Angel Martinez-Tenor                          |
@@ -33,7 +32,7 @@
 
 
 def setup_task():
-    """ Task setup will be performed in the agent """
+    """Task setup will be performed in the agent"""
     global n_inputs, in_values, n_outputs, out_values, Vs, Va, VAR, cont_VAR
     global in_sizes, out_sizes, n_states, n_actions, initiated
 
@@ -87,12 +86,11 @@ def setup_task():
     task.n_actions = n_actions
     task.out_data = output_data
 
-    print("Task {} \t {} states \t {} actions".format(task.NAME, n_states,
-                                                      n_actions))
+    print(f"Task {task.NAME} \t {n_states} states \t {n_actions} actions")
 
 
 def setup():
-    """ Create the variables needed for this module """
+    """Create the variables needed for this module"""
     global Vs, Va, VAR, cont_VAR, initiated, goal_reached
 
     robot.setup(task.AGENT_ELEMENTS, task.ENV_ELEMENTS)
@@ -115,18 +113,20 @@ def setup():
 
 
 def observe_state():
-    """ Returns the reached state s' from robot """
+    """Returns the reached state s' from robot"""
     assert initiated, "agent not initiated! setup() must be previously executed"
 
     unwrapped_s = np.zeros(n_inputs)
 
     # Special cases
     if exp.TEACHING_PROCESS:  # Observed states are already given
         from lp import step
+
         return exp.TAUGHT_SASR[step, 2]
     elif exp.LEARN_FROM_MODEL:
-        from lp import s, a
         import model
+        from lp import a, s
+
         return model.get_sp(s, a)  # return reached state s'
 
     robot.update()
@@ -144,25 +144,25 @@ def observe_state():
 
     state = wrap_state(unwrapped_s)
 
-    assert (0 <= state < n_states), ("Wrong state: ", str(state))
+    assert 0 <= state < n_states, ("Wrong state: ", str(state))
     return state
 
 
 def select_action(s):
-    """ Return action a by calling the action selection strategy """
+    """Return action a by calling the action selection strategy"""
     a = action_selection.execute(s)
     return a
 
 
 # ------------------------------------------------------------------------------
 def execute_action(a):
-    """ Execute action in robot """
+    """Execute action in robot"""
     # Special cases
     if exp.LEARN_FROM_MODEL:
         return
     elif exp.TEACHING_PROCESS and exp.SKIP_VIEW_TEACHING:
         return
-    assert (0 <= a < n_actions), ("Wrong action: ", str(a))
+    assert 0 <= a < n_actions, ("Wrong action: ", str(a))
 
     unwrapped_a = unwrap_action(a)
     actuator = np.zeros(n_outputs)
@@ -175,17 +175,19 @@ def execute_action(a):
 
 # ------------------------------------------------------------------------------
 def obtain_reward(s, a, sp):
-    """ Return the reward obtained """
+    """Return the reward obtained"""
     # Special cases
     if exp.TEACHING_PROCESS:
         from lp import step
+
         if step >= exp.TEACHING_STEPS:
             exp.TEACHING_PROCESS = False  # End of teaching
         else:
             return exp.TAUGHT_SASR[step, 3]
     if exp.LEARN_FROM_MODEL:
         # from lp import s, a, sp
         import model
+
         return model.get_r(s, a, sp)
 
     r = task.get_reward()  # (s,a, sp) arguments not needed here
@@ -194,21 +196,21 @@ def obtain_reward(s, a, sp):
 
 # ------------------------------------------------------------------------------
 def wrap_state(unw_s):
-    """ Compose the global state from an array of substates """
+    """Compose the global state from an array of substates"""
     s = unw_s[0]
     for i in range(1, n_inputs):
         pro = 1
         for j in range(0, i):
             pro *= in_sizes[j]
         s += pro * unw_s[i]
-    assert (0 <= s < n_states), ("Wrong state: ", str(s))
+    assert 0 <= s < n_states, ("Wrong state: ", str(s))
     return int(s)
 
 
 # ------------------------------------------------------------------------------
 def unwrap_state(s):
-    """ Return the array of substates from the global state s """
-    assert (0 <= s < n_states), ("Wrong state: ", str(s))
+    """Return the array of substates from the global state s"""
+    assert 0 <= s < n_states, ("Wrong state: ", str(s))
     unwrapped_s = np.zeros(n_inputs, dtype=np.int)
     aux = s
     for i in range(n_inputs - 1):
@@ -220,21 +222,21 @@ def unwrap_state(s):
 
 # ------------------------------------------------------------------------------
 def wrap_action(unw_a):
-    """ Compose the global action from an array of subactions """
+    """Compose the global action from an array of subactions"""
     a = unw_a[0]
     for i in range(1, n_outputs):
         pro = 1
         for j in range(0, i):
             pro *= out_sizes[j]
         a += pro * unw_a[i]
-    assert (0 <= a < n_actions), ("Wrong action: ", str(a))
+    assert 0 <= a < n_actions, ("Wrong action: ", str(a))
     return int(a)
 
 
 # ------------------------------------------------------------------------------
 def unwrap_action(a):
-    """ Return the array of subactions from the global action a """
-    assert (0 <= a < n_actions), ("Wrong action: ", str(a))
+    """Return the array of subactions from the global action a"""
+    assert 0 <= a < n_actions, ("Wrong action: ", str(a))
     unwrapped_a = np.zeros(n_outputs, dtype=np.int)
     aux = a
     for i in range(n_outputs - 1):
@@ -246,7 +248,7 @@ def unwrap_action(a):
 
 # ------------------------------------------------------------------------------
 def generate_vs():
-    """ Generate array of substates """
+    """Generate array of substates"""
     global Vs
     Vs = np.zeros([n_inputs, int(max(in_sizes))])
     for i in range(n_inputs):
@@ -256,7 +258,7 @@ def generate_vs():
 
 # ------------------------------------------------------------------------------
 def generate_va():
-    """ Generate array of subactions """
+    """Generate array of subactions"""
     global Va
     Va = np.zeros([n_outputs, int(max(out_sizes))])
 
@@ -267,7 +269,7 @@ def generate_va():
 
 # ------------------------------------------------------------------------------
 def generate_var():
-    """ Generate Variable Matrix (input, input_value, count) -> state """
+    """Generate Variable Matrix (input, input_value, count) -> state"""
     global VAR, cont_VAR
     VAR = np.full((n_inputs, int(max(in_sizes)), n_states), -1, dtype=np.int)
     cont_VAR = np.full((n_inputs, int(max(in_sizes))), 0, dtype=np.int)

diff --git a/algorithm_q.py b/algorithm_q.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 #   +-----------------------------------------------+
 #   | RL-ROBOT. Reinforcement Learning for Robotics |
 #   | Angel Martinez-Tenor                          |
@@ -19,12 +18,12 @@
 
 
 def setup():
-    """ Setup algorithm """
+    """Setup algorithm"""
     pass  # no needed here
 
 
 def execute():
-    """ Execute the learning algorithm """
+    """Execute the learning algorithm"""
     s = lp.s
     alpha = lp.alpha
     q = lp.q