Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
angelmtenor committed Nov 14, 2016
0 parents commit 4091bd7
Show file tree
Hide file tree
Showing 54 changed files with 6,996 additions and 0 deletions.
10 changes: 10 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
__pycache__
*.py~

# *.so

results
old


.idea
34 changes: 34 additions & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
+--------------------------------------+---------------------------------------+
| ABOUT |
+------------------------------------------------------------------------------+

This file contains the list of people involved in the development of
RL-ROBOT, which started at the
Machine Perception and Intelligent Robotics (MAPIR) laboratory
at the University of Malaga <http://mapir.isa.uma.es>

If you feel someone is missing, please fork and pull-request.
The following list is roughly sorted in reverse chronological order.


+--------------------------------------+---------------------------------------+
| DEVELOPERS & CONTRIBUTORS |
+------------------------------------------------------------------------------+

* Angel Martinez-Tenor
[email protected]
http://mapir.isa.uma.es/mapirwebsite/index.php/people/115-people/230-angel-martinez-tenor
main developer


+--------------------------------------+---------------------------------------+
| OTHERS |
+------------------------------------------------------------------------------+

Bug reports and new feature suggestions, provided by users world-wide,
will usually be mentioned in the changelog.

We kindly thank all of them for this valuable feedback.


--- END OF FILE ---
19 changes: 19 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
+--------------------------------------+---------------------------------------+
| LICENSE |
+------------------------------------------------------------------------------+

* RL-ROBOT is released under a GPLv3 license. Read license-GPLv3,
or if not present, <http://www.gnu.org/licenses/>.

* For a closed-source version of RL-ROBOT
for commercial purposes, please contact the authors.

* If you use RL-ROBOT in an academic work,
please cite the most relevant publication associated by visiting:
<http://mapir.isa.uma.es>, or if any, please cite the
Machine Perception and Intelligent Robotics (MAPIR)
research group directly.



--- END OF FILE ---
31 changes: 31 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# RL-ROBOT
This repository provides a Reinforcement Learning framework in Python from the Machine Perception and Intelligent Robotics research group [(MAPIR)](http://mapir.isa.uma.es).

### Requirements
* Python 3
* numpy
* matplotlib
* tkinter `sudo apt-get install python-tk`


### V-REP settings:
(Tested on V-REP_PRO_EDU_V3_3_2 64_bits Linux)

1. Use default values of `remoteApiConnections.txt`
~~~
portIndex1_port = 19997
portIndex1_debug = false
portIndex1_syncSimTrigger = true
~~~
2. Activate threaded rendering (recommended):
`system/usrset.txt -> threadedRenderingDuringSimulation = 1`
**Execute V-REP** (`./vrep.sh on linux`). `File -> Open Scene -> (open any scene for RL-ROS)`
Recommended simulation settings for scenes in RL-ROS (already set in the provide ones):
* Simulation step time: 50 ms (default)
* Real-Time Simulation: Enabled
* Multiplication factor: 3.00 (required CPU >= i3 3110m)
23 changes: 23 additions & 0 deletions __init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# +-----------------------------------+-----------------------------------+
# | RL-ROBOT |
# | |
# | Copyright (c) 2016, Individual contributors, see AUTHORS file. |
# | Machine Perception and Intelligent Robotics (MAPIR), |
# | University of Malaga. <http://mapir.isa.uma.es> |
# | |
# | This program is free software: you can redistribute it and/or modify |
# | it under the terms of the GNU General Public License as published by |
# | the Free Software Foundation, either version 3 of the License, or |
# | (at your option) any later version. |
# | |
# | This program is distributed in the hope that it will be useful, |
# | but WITHOUT ANY WARRANTY; without even the implied warranty of |
# | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
# | GNU General Public License for more details. |
# | |
# | You should have received a copy of the GNU General Public License |
# | along with this program. If not, see <http://www.gnu.org/licenses/>. |
# +-----------------------------------------------------------------------+
""" RL-ROS init """
168 changes: 168 additions & 0 deletions action_qbiasr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
# -*- coding: utf-8 -*-
# +-----------------------------------------------+
# | RL-ROBOT. Reinforcement Learning for Robotics |
# | Angel Martinez-Tenor |
# | MAPIR. University of Malaga. 2016 |
# +-----------------------------------------------+
""" Q-Biased Softmax Regression (QBIASR) v0.8.7 optimized """

import math
import random
from functools import reduce
from itertools import combinations

import numpy as np

import agent
import exp
import lp
import task

DEFAULT_TEMPERATURE = exp.TEMPERATURE
temperature = DEFAULT_TEMPERATURE

control_sequence = np.full(0, -1, dtype=np.int32)
rewards_sequence = np.full(0, -1, dtype=np.float32)

mix = np.full(0, -1, dtype=np.int)
comb = np.full(0, -1, dtype=np.int)
initiated = False


def setup():
""" Initializes QBIASR """
global control_sequence, rewards_sequence, comb, mix, initiated

# size_sequence = size of eli queue: n < log(threshold) / log(gamma*lambda)
threshold = 0.01
size_sequence = int(math.log(threshold) / math.log(exp.GAMMA * exp.LAMBDA))

# size_sequence limits: [4, n_states/4]
lower_limit = 4 # Mandatory
upper_limit = int(task.n_states / 4)
if size_sequence > upper_limit:
size_sequence = upper_limit
if size_sequence < lower_limit:
size_sequence = lower_limit
control_sequence = np.full(size_sequence, -1, dtype=np.int32)
rewards_sequence = np.full(size_sequence, -1, dtype=np.float32)

# Create mix[s], index[s], subrow[s]
n_inputs = task.n_inputs
n_states = task.n_states

comb = np.array(list(combinations(range(n_inputs), n_inputs - 1)),
dtype=np.int16)
# comb = len(list(combinations(range(n_inputs), n_inputs - 1))) = n_inputs!!
mix = np.full([n_states, n_inputs, n_states], -1, dtype=np.int)
index = np.full(([n_states, n_inputs, n_states]), -1, dtype=np.int)

for s in range(n_states):
ss = agent.unwrap_state(s)

for i in range(ss.size):
j = ss[i]
n = agent.cont_VAR[i, j]
for k in range(n):
index[s, i, k] = agent.VAR[i, j, k]

for idx, item in enumerate(comb):
matches = reduce(np.intersect1d, (index[s, item]))
mix[s, idx, 0:len(matches)] = matches
initiated = True


def custom_softmax(input_array, temp):
""" Softmax Boltzmann action selection given a vector and temperature """
selected_action = -1
# 1: Get the probabilities
_input_array_size = len(input_array)
_Pa = np.zeros(_input_array_size)

for i in range(_input_array_size):
_Pa[i] = math.exp(input_array[i] / temp)
_Pa = np.divide(_Pa, sum(_Pa))

# 2: Select the action
ran = random.random()
accum = 0.0
for i in range(_input_array_size):
accum = accum + _Pa[i]
if ran < accum:
selected_action = i
break
assert (selected_action > -1)
return selected_action


def select_biased_action(s):
""" Select an action 'a' given state 's' by QBIASR """
assert initiated, " QBIASR not initiated! setup() must be called previously"

# n_combinations = math.factorial(N_INPUTS)/(math.factorial(level)*
# math.factorial(N_INPUTS-level))
n_actions = task.n_actions
q = lp.q
q_limit = lp.q_limit
bias_s = 0
for c in range(len(comb)):
s_array = mix[s, c]
s_array = s_array[s_array >= 0]
subrow = np.zeros((len(s_array), n_actions))
for idx, item in enumerate(s_array):
subrow[idx] = q[item]
# for k in range(len(s_array)):
# subrow[k] = q[s_array[k]]
aux = np.average(subrow, 0)
bias_s += aux / len(comb)

low_reward_loop_evasion(s)
q_s_bias = q[s] + bias_s # q_s_bias = np.sum([q[s], bias_s], axis=0)

# 2016_05_26: Temporal qs_bias row is normalized for softmax regression.
# Standard q_limit: 100 (e.g: Rmax=10, GAMMA=0.9)
q_s_bias *= 100.0 / q_limit
selected_action = custom_softmax(tuple(q_s_bias), temperature)
return selected_action


def low_reward_loop_evasion(s):
""" Increase the temperature if the agent is stuck in a sequence of states
with negative average reward """
global temperature
global control_sequence
global rewards_sequence

size_sequence = control_sequence.size

# early steps of learning:
if lp.step < size_sequence:
temperature = DEFAULT_TEMPERATURE
return

control_sequence = lp.sasr_step[lp.step - size_sequence:lp.step, 0]
# different state reached:
if s not in control_sequence:
temperature = DEFAULT_TEMPERATURE
return

# not enough repeated states:
unique_sequence = np.unique(control_sequence)
loop_rate = control_sequence.size / unique_sequence.size
if loop_rate <= 2:
temperature = DEFAULT_TEMPERATURE
return

# average reward positive:
rewards_sequence = lp.sasr_step[lp.step - size_sequence:lp.step, 3]
if np.average(rewards_sequence) > 0:
temperature = DEFAULT_TEMPERATURE
return

# low reward loop detected. Evasion:
temperature += 0.25 * loop_rate
if temperature > 50:
temperature = 50
# print(" Local maximum detected at: ",str(s_unique))
# print(" Temperature changed to: %0.2f" %temperature)
return
Loading

0 comments on commit 4091bd7

Please sign in to comment.