Skip to content

Commit

Permalink
style: Apply basic pre-commit format
Browse files Browse the repository at this point in the history
  • Loading branch information
angelmtenor committed Feb 27, 2022
1 parent 765d162 commit 9b61583
Show file tree
Hide file tree
Showing 39 changed files with 2,813 additions and 1,861 deletions.
5 changes: 2 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
__pycache__
*.py~
# *.so
results
old
.idea
tasks/wander_HD.py
.vscode/launch.json
.vscode
run_time_exp.py
.ipynb_checkpoints/README-checkpoint.md
# *.so
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

This repository provides a Reinforcement Learning framework in Python from the Machine Perception and Intelligent Robotics research group [(MAPIR)](http://mapir.isa.uma.es).

Reference: *Towards a common implementation of reinforcement learning for multiple robotics tasks*. [ScienceDirect(updated)](http://www.sciencedirect.com/science/article/pii/S0957417417307613)       [Arxiv preprint](https://arxiv.org/abs/1702.06329)

Reference: *Towards a common implementation of reinforcement learning for multiple robotics tasks*.   [Arxiv preprint](https://arxiv.org/abs/1702.06329)   
[ScienceDirect](http://www.sciencedirect.com/science/article/pii/S0957417417307613)

## Requirements

Expand Down
1 change: 0 additions & 1 deletion __init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# +-----------------------------------+-----------------------------------+
# | RL-ROBOT |
# | |
Expand Down
26 changes: 12 additions & 14 deletions action_qbiassr.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# -*- coding: utf-8 -*-
# +-----------------------------------------------+
# | RL-ROBOT. Reinforcement Learning for Robotics |
# | Angel Martinez-Tenor |
Expand Down Expand Up @@ -28,7 +27,7 @@


def setup():
""" Initializes QBIASSR """
"""Initializes QBIASSR"""
global control_sequence, rewards_sequence, comb, mix, initiated
# size_sequence = size of eli queue: n < log(threshold) / log(gamma*lambda)
threshold = 0.01
Expand All @@ -47,15 +46,14 @@ def setup():
# Create mix[s], index[s], subrow[s]
n_inputs = task.n_inputs
n_states = task.n_states
comb = np.array(
list(combinations(range(n_inputs), n_inputs - 1)), dtype=np.int16)
comb = np.array(list(combinations(range(n_inputs), n_inputs - 1)), dtype=np.int16)
mix = np.full([n_states, n_inputs, n_states], -1, dtype=np.int)
try:
index = np.full(([n_states, n_inputs, n_states]), -1, dtype=np.int)
except MemoryError:
mem = (n_states **2) * n_inputs * np.dtype(np.int).itemsize / (2**20)
print( "There is Not Enough Memory. Needed {:.1f} GB.".format(mem))
print( "Please, select another task or reduce the number of states.")
mem = (n_states**2) * n_inputs * np.dtype(np.int).itemsize / (2**20)
print(f"There is Not Enough Memory. Needed {mem:.1f} GB.")
print("Please, select another task or reduce the number of states.")
exit()

for s in range(n_states):
Expand All @@ -67,12 +65,12 @@ def setup():
index[s, i, k] = agent.VAR[i, j, k]
for idx, item in enumerate(comb):
matches = reduce(np.intersect1d, (index[s, item]))
mix[s, idx, 0:len(matches)] = matches
mix[s, idx, 0 : len(matches)] = matches
initiated = True


def custom_softmax(input_array, temp):
""" Softmax Boltzmann action selection given a vector and temperature """
"""Softmax Boltzmann action selection given a vector and temperature"""
selected_action = -1

# 1: Get the probabilities
Expand All @@ -96,7 +94,7 @@ def custom_softmax(input_array, temp):


def select_biased_action(s):
""" Select an action 'a' given state 's' by QBIASSR """
"""Select an action 'a' given state 's' by QBIASSR"""
assert initiated, "QBIASSR not initiated! setup() must be called previously"

n_actions = task.n_actions
Expand All @@ -123,8 +121,8 @@ def select_biased_action(s):


def low_reward_loop_evasion(s):
""" Increase the temperature if the agent is stuck in a sequence of states
with negative average reward """
"""Increase the temperature if the agent is stuck in a sequence of states
with negative average reward"""
global temperature
global control_sequence
global rewards_sequence
Expand All @@ -136,7 +134,7 @@ def low_reward_loop_evasion(s):
temperature = DEFAULT_TEMPERATURE
return

control_sequence = lp.sasr_step[lp.step - size_sequence:lp.step, 0]
control_sequence = lp.sasr_step[lp.step - size_sequence : lp.step, 0]
# different state reached:
if s not in control_sequence:
temperature = DEFAULT_TEMPERATURE
Expand All @@ -150,7 +148,7 @@ def low_reward_loop_evasion(s):
return

# average reward positive:
rewards_sequence = lp.sasr_step[lp.step - size_sequence:lp.step, 3]
rewards_sequence = lp.sasr_step[lp.step - size_sequence : lp.step, 3]
if np.average(rewards_sequence) > 0:
temperature = DEFAULT_TEMPERATURE
return
Expand Down
17 changes: 8 additions & 9 deletions action_selection.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# -*- coding: utf-8 -*-
# +-----------------------------------------------+
# | RL-ROBOT. Reinforcement Learning for Robotics |
# | Angel Martinez-Tenor |
Expand Down Expand Up @@ -26,15 +25,15 @@


def setup():
""" Initialize QBIASSR if needed """
"""Initialize QBIASSR if needed"""
global initiated
if exp.ACTION_STRATEGY == "QBIASSR":
action_qbiassr.setup()
initiated = True


def execute(s):
""" From state s select an action a """
"""From state s select an action a"""

if exp.TEACH_THE_ROBOT:
print("Warning: Controlling the robot for teaching not implemented")
Expand Down Expand Up @@ -75,20 +74,20 @@ def execute(s):


def exploit_policy(s):
""" Exploit the action a given an state s according to the Policy """
"""Exploit the action a given an state s according to the Policy"""
selected_action = lp.policy[s]
return selected_action


def random_action():
""" Select a random action a (uniform distribution) """
"""Select a random action a (uniform distribution)"""
# random.seed()
selected_action = random.randint(0, task.n_actions - 1)
return selected_action


def egreedy(s, e): # if e = 0.3_: 30% exploration
""" Select an action a given a state s based on egreedy exploration """
"""Select an action a given a state s based on egreedy exploration"""
# random.seed()
if random.random() < e:
selected_action = random_action()
Expand All @@ -98,8 +97,8 @@ def egreedy(s, e): # if e = 0.3_: 30% exploration


def egreedy_least_explored(s, e, least):
""" Select an action a given a state s based on egreedy exploration
improving the probability of selecting the least explored action """
"""Select an action a given a state s based on egreedy exploration
improving the probability of selecting the least explored action"""
# random.seed()
if random.random() < e:
if random.random() < least:
Expand All @@ -117,7 +116,7 @@ def egreedy_least_explored(s, e, least):


def softmax(s):
""" Select an action a given a state s based on Boltzmann exploration """
"""Select an action a given a state s based on Boltzmann exploration"""
selected_action = -1
# 1: Get the probabilities
pa = np.zeros(task.n_actions)
Expand Down
48 changes: 25 additions & 23 deletions agent.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# -*- coding: utf-8 -*-
# +-----------------------------------------------+
# | RL-ROBOT. Reinforcement Learning for Robotics |
# | Angel Martinez-Tenor |
Expand Down Expand Up @@ -33,7 +32,7 @@


def setup_task():
""" Task setup will be performed in the agent """
"""Task setup will be performed in the agent"""
global n_inputs, in_values, n_outputs, out_values, Vs, Va, VAR, cont_VAR
global in_sizes, out_sizes, n_states, n_actions, initiated

Expand Down Expand Up @@ -87,12 +86,11 @@ def setup_task():
task.n_actions = n_actions
task.out_data = output_data

print("Task {} \t {} states \t {} actions".format(task.NAME, n_states,
n_actions))
print(f"Task {task.NAME} \t {n_states} states \t {n_actions} actions")


def setup():
""" Create the variables needed for this module """
"""Create the variables needed for this module"""
global Vs, Va, VAR, cont_VAR, initiated, goal_reached

robot.setup(task.AGENT_ELEMENTS, task.ENV_ELEMENTS)
Expand All @@ -115,18 +113,20 @@ def setup():


def observe_state():
""" Returns the reached state s' from robot """
"""Returns the reached state s' from robot"""
assert initiated, "agent not initiated! setup() must be previously executed"

unwrapped_s = np.zeros(n_inputs)

# Special cases
if exp.TEACHING_PROCESS: # Observed states are already given
from lp import step

return exp.TAUGHT_SASR[step, 2]
elif exp.LEARN_FROM_MODEL:
from lp import s, a
import model
from lp import a, s

return model.get_sp(s, a) # return reached state s'

robot.update()
Expand All @@ -144,25 +144,25 @@ def observe_state():

state = wrap_state(unwrapped_s)

assert (0 <= state < n_states), ("Wrong state: ", str(state))
assert 0 <= state < n_states, ("Wrong state: ", str(state))
return state


def select_action(s):
""" Return action a by calling the action selection strategy """
"""Return action a by calling the action selection strategy"""
a = action_selection.execute(s)
return a


# ------------------------------------------------------------------------------
def execute_action(a):
""" Execute action in robot """
"""Execute action in robot"""
# Special cases
if exp.LEARN_FROM_MODEL:
return
elif exp.TEACHING_PROCESS and exp.SKIP_VIEW_TEACHING:
return
assert (0 <= a < n_actions), ("Wrong action: ", str(a))
assert 0 <= a < n_actions, ("Wrong action: ", str(a))

unwrapped_a = unwrap_action(a)
actuator = np.zeros(n_outputs)
Expand All @@ -175,17 +175,19 @@ def execute_action(a):

# ------------------------------------------------------------------------------
def obtain_reward(s, a, sp):
""" Return the reward obtained """
"""Return the reward obtained"""
# Special cases
if exp.TEACHING_PROCESS:
from lp import step

if step >= exp.TEACHING_STEPS:
exp.TEACHING_PROCESS = False # End of teaching
else:
return exp.TAUGHT_SASR[step, 3]
if exp.LEARN_FROM_MODEL:
# from lp import s, a, sp
import model

return model.get_r(s, a, sp)

r = task.get_reward() # (s,a, sp) arguments not needed here
Expand All @@ -194,21 +196,21 @@ def obtain_reward(s, a, sp):

# ------------------------------------------------------------------------------
def wrap_state(unw_s):
""" Compose the global state from an array of substates """
"""Compose the global state from an array of substates"""
s = unw_s[0]
for i in range(1, n_inputs):
pro = 1
for j in range(0, i):
pro *= in_sizes[j]
s += pro * unw_s[i]
assert (0 <= s < n_states), ("Wrong state: ", str(s))
assert 0 <= s < n_states, ("Wrong state: ", str(s))
return int(s)


# ------------------------------------------------------------------------------
def unwrap_state(s):
""" Return the array of substates from the global state s """
assert (0 <= s < n_states), ("Wrong state: ", str(s))
"""Return the array of substates from the global state s"""
assert 0 <= s < n_states, ("Wrong state: ", str(s))
unwrapped_s = np.zeros(n_inputs, dtype=np.int)
aux = s
for i in range(n_inputs - 1):
Expand All @@ -220,21 +222,21 @@ def unwrap_state(s):

# ------------------------------------------------------------------------------
def wrap_action(unw_a):
""" Compose the global action from an array of subactions """
"""Compose the global action from an array of subactions"""
a = unw_a[0]
for i in range(1, n_outputs):
pro = 1
for j in range(0, i):
pro *= out_sizes[j]
a += pro * unw_a[i]
assert (0 <= a < n_actions), ("Wrong action: ", str(a))
assert 0 <= a < n_actions, ("Wrong action: ", str(a))
return int(a)


# ------------------------------------------------------------------------------
def unwrap_action(a):
""" Return the array of subactions from the global action a """
assert (0 <= a < n_actions), ("Wrong action: ", str(a))
"""Return the array of subactions from the global action a"""
assert 0 <= a < n_actions, ("Wrong action: ", str(a))
unwrapped_a = np.zeros(n_outputs, dtype=np.int)
aux = a
for i in range(n_outputs - 1):
Expand All @@ -246,7 +248,7 @@ def unwrap_action(a):

# ------------------------------------------------------------------------------
def generate_vs():
""" Generate array of substates """
"""Generate array of substates"""
global Vs
Vs = np.zeros([n_inputs, int(max(in_sizes))])
for i in range(n_inputs):
Expand All @@ -256,7 +258,7 @@ def generate_vs():

# ------------------------------------------------------------------------------
def generate_va():
""" Generate array of subactions """
"""Generate array of subactions"""
global Va
Va = np.zeros([n_outputs, int(max(out_sizes))])

Expand All @@ -267,7 +269,7 @@ def generate_va():

# ------------------------------------------------------------------------------
def generate_var():
""" Generate Variable Matrix (input, input_value, count) -> state """
"""Generate Variable Matrix (input, input_value, count) -> state"""
global VAR, cont_VAR
VAR = np.full((n_inputs, int(max(in_sizes)), n_states), -1, dtype=np.int)
cont_VAR = np.full((n_inputs, int(max(in_sizes))), 0, dtype=np.int)
Expand Down
5 changes: 2 additions & 3 deletions algorithm_q.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# -*- coding: utf-8 -*-
# +-----------------------------------------------+
# | RL-ROBOT. Reinforcement Learning for Robotics |
# | Angel Martinez-Tenor |
Expand All @@ -19,12 +18,12 @@


def setup():
""" Setup algorithm """
"""Setup algorithm"""
pass # no needed here


def execute():
""" Execute the learning algorithm """
"""Execute the learning algorithm"""
s = lp.s
alpha = lp.alpha
q = lp.q
Expand Down
Loading

0 comments on commit 9b61583

Please sign in to comment.