minor exceptions and comments

angelmtenor · Dec 1, 2016 · 22157fc · 22157fc
1 parent 4961858
commit 22157fc
Show file tree

Hide file tree

Showing 6 changed files with 40 additions and 64 deletions.
diff --git a/README.md b/README.md
@@ -2,14 +2,15 @@
 This repository provides a Reinforcement Learning framework in Python from the Machine Perception and Intelligent Robotics research group [(MAPIR)](http://mapir.isa.uma.es).
 
 ### Requirements
-* Python 3
-* numpy
-* matplotlib
+* Python >= 3.4
+* numpy >= 1.11
+* matplotlib >= 1.5
 * tkinter   `sudo apt-get install python-tk`
 
+Tested on Ubuntu 14.04 and 16.04 (64 bits).
 
 ### V-REP settings: 
-(Tested on V-REP_PRO_EDU_V3_3_2 64_bits Linux)
+(Tested version: V-REP PRO EDU V3.3.2)
 
 1. Use default values of `remoteApiConnections.txt`
     ~~~
@@ -29,6 +30,4 @@ Recommended simulation settings for scenes in RL-ROS (already set in the provide
 
  **Execute V-REP** 
  (`./vrep.sh on linux`). `File -> Open Scene -> (open any scene for RL-ROS)` 
-
- 
  
diff --git a/example.py b/example.py
@@ -10,18 +10,18 @@
 import rlrobot
 
 exp.ENVIRONMENT_TYPE = "VREP"
-exp.TASK_ID = "nav_1k"
+exp.TASK_ID = "wander_1k"
 # exp.FILE_MODEL = exp.TASK_ID + "_model"
 # exp.N_EPISODES_MODEL = 1
 
 exp.ALGORITHM = "TOSL"
 exp.ACTION_STRATEGY = "QBIASR"
 
 exp.N_REPETITIONS = 1
-exp.N_EPISODES = 10
-exp.N_STEPS = 1 * 60 * 60
+exp.N_EPISODES = 1
+exp.N_STEPS = 60 * 60
 
-exp.SUFFIX = "Video"
+exp.SUFFIX = ""
 exp.DISPLAY_STEP = 100
 
 rlrobot.run()

diff --git a/rl_vrep.py b/rl_vrep.py
@@ -63,12 +63,16 @@ def show_msg(message):
 
 def connect():
     """ Connect to the simulator"""
+    ip = '127.0.0.1'
+    port = 19997
     vrep.simxFinish(-1)  # just in case, close all opened connections
     global clientID
-    clientID = vrep.simxStart('127.0.0.1', 19997, True, True, 3000, 5)
+    clientID = vrep.simxStart(ip, port, True, True, 3000, 5)
     # Connect to V-REP
-    assert (clientID != -1), ("\n\n V-REP remote API server connection failed. "
-                              "Is V-REP running?")
+    if clientID == -1:
+        import sys
+        sys.exit('\nV-REP remote API server connection failed (' + ip +
+                 ':' + str(port) + '). Is V-REP running?')
     print('Connected to Robot')
     show_msg('Python: Hello')
     time.sleep(0.5)

diff --git a/rlrobot.py b/rlrobot.py
@@ -44,7 +44,10 @@ def run():
     exp.check()  # Check experiment parameters
 
     # copy the selected taskfile to speed up the execution:
-    copyfile("tasks/" + exp.TASK_ID + ".py", "task.py")
+    try:
+        copyfile("tasks/" + exp.TASK_ID + ".py", "task.py")
+    except IOError:
+        sys.exit("Task " + exp.TASK_ID + " not found. Please check exp.TASK_ID")
     import task
     import robot
     import lp

diff --git a/task.py b/task.py
@@ -6,43 +6,40 @@
 #   +-----------------------------------------------+
 """ Task template v1.1 """
 
-import math
-
 import numpy as np
 
 import agent
 import robot
 
 # Task Parameters:
-NAME = "navigation_2D_1024s_9a"
-DESCRIPTION = "Navigation to a Goal. 16 pos / 8 orient / 8 lasers states"
+NAME = "Wander_1024s_9a"
+DESCRIPTION = "Wander avoiding obstacles. 5 lasers. 4 ranges each. 9 actions"
 ROBOT = "Pioneer 3dx with 8-point laser"
-ENVIRONMENT = "VREP_SIM: square 4x4m with obstacles and a goal"
+ENVIRONMENT = "VREP_SIM: square 6x6m with obstacles"
 ENVIRONMENT_DETAIL = "SpeedX3. Threaded Render. Sens:Buffer, Act:Streaming"
 AGENT_ELEMENTS = ["MOBILE_BASE", "DISTANCE_SENSOR"]
-ENV_ELEMENTS = ["GOAL_OBJECT"]
-# AGENT_ELEMENTS: "MOBILE_BASE","DISTANCE_SENSOR", "ARM"
+ENV_ELEMENTS = []
+# AGENT_ELEMENTS: "MOBILE_BASE","DISTANCE_SENSOR","GOAL","ARM"
 # ENV_ELEMENTS: "GOAL_OBJECT"
 
 # Physical Parameters:
 STEP_TIME = 1  # s
 MOTOR_SPEED = 1  # rad/s (pioneer 3dx: 1 rad/s: ~ 0.1m/s)
-RANGE_OBSTACLES = 0.20  # m
+RANGE_OBSTACLES = 0.80  # m
 RANGE_DISPLACEMENT = 0.07  # m
-RANGE_DAMAGE = 0.08  # m
+RANGE_DAMAGE = 0.05  # m
 
 # STATES. Input variables:
 # Dictionary -> "variable name": (discrete intervals). Example:
 # "X": (0, 5)       2 ranges: s(X)=0 if X<=5, =1 if X>5.
 # "X": (0, 2, 4)    3 ranges: s(X)=0 if X<=2, =1 if X<4, =2 if X>4
 # For multiple linear intervals we suggest np.linspace
 INPUT_VARIABLES = {
-    "mobile_x": np.linspace(3, 7, 4),  # V-REP scenario: (3 to 7)m in X
-    "mobile_y": np.linspace(3, 7, 4),  # V-REP scenario: (3 to 7)m in Y
-    "mobile_theta": np.linspace(-math.pi, math.pi, 8),
-    "laser_front": np.linspace(0, RANGE_OBSTACLES, 2),
-    "laser_front_left": np.linspace(0, RANGE_OBSTACLES, 2),
-    "laser_front_right": np.linspace(0, RANGE_OBSTACLES, 2)
+    "laser_front": np.linspace(0, RANGE_OBSTACLES, 4),
+    "laser_front_left": np.linspace(0, RANGE_OBSTACLES, 4),
+    "laser_left": np.linspace(0, RANGE_OBSTACLES, 4),
+    "laser_right": np.linspace(0, RANGE_OBSTACLES, 4),
+    "laser_front_right": np.linspace(0, RANGE_OBSTACLES, 4)
 }
 OUTPUT_VARIABLES = {
     "left_wheel": np.linspace(-MOTOR_SPEED, MOTOR_SPEED, 3),
@@ -61,60 +58,33 @@ def execute_action(actuator):
     # 2 actions with only one wheel backwards changed to no motion (duplicated)
     if v_left < 0 and v_right < 0:
         v_left = v_right = MOTOR_SPEED * 2
-    elif (v_left == 0 and v_right <= 0) or (v_left < 0 and v_right == 0):
+    elif (v_left == 0 and v_right < 0) or (v_left < 0 and v_right == 0):
         v_left = v_right = 0
     robot.move_wheels(v_left, v_right)  # left wheel, right_wheel speeds (rad/s)
 
 
 # TASK DEFINITION: REWARDS ----------------------------------------------------
-REWARDS = np.array([-10.0, -2.0, -1.0, -0.1, 1.0, 2.0, 3.0, 4.0, 5.0, 10.0])
-THRES_STEPS_AT_GOAL = 5  # auxiliary: if the robot remains at the goal more than
-# this value, the step time will be reduced to accelerate the experiments
-steps_at_goal = 0
+REWARDS = np.array([-10.0, -2.0, -0.02, 10.0])
 
 
 def get_reward():  # abstract s,a,sp pointless here
     """ Return the reward from s,a,sp or the environment (recommended)"""
     # Sensors values already updated in robot.py when the state was observed
-    global steps_at_goal
-
     distance_fl = robot.sensor["laser_front_left"]
     distance_fr = robot.sensor["laser_front_right"]
     distance_f = robot.sensor["laser_front"]
-    distance_robot_goal = robot.distance_mobilebase_goal
-    displacement_robot_goal = robot.mobilebase_goal_displacement2d
-    # if negative: MobileBase is getting closer
+    displacement = robot.mobilebase_displacement2d
 
     n_collisions = (int(distance_fl < RANGE_DAMAGE) +
                     int(distance_fr < RANGE_DAMAGE) +
                     int(distance_f < RANGE_DAMAGE))
-
-    r = REWARDS[3]
-
-    if distance_robot_goal < 0.5:  # robot reaches the goal
-        r = max(REWARDS)
-
-    elif n_collisions > 1:
-        r = min(REWARDS)  # big penalty
-
+    r = REWARDS[2]
+    if n_collisions > 1:  # big penalty
+        r = min(REWARDS)
     elif n_collisions == 1:  # small penalty
         r = REWARDS[1]
-
-    elif displacement_robot_goal > RANGE_DISPLACEMENT:  # moving away
-        r = REWARDS[2]
-
-    elif displacement_robot_goal < -RANGE_DISPLACEMENT:  # approaching
-        r = round(REWARDS[4] + 4 / distance_robot_goal)
-        r = r if r < 5 else 5
-
-    if r >= max(REWARDS):
-        steps_at_goal += 1
-        if steps_at_goal > THRES_STEPS_AT_GOAL:
-            # LE.step_time = LE.INITIAL_STEP_TIME/8.0
-            agent.goal_reached = True
-            robot.stop_motion()
-    else:
-        steps_at_goal = 0
+    elif displacement > RANGE_DISPLACEMENT:
+        r = max(REWARDS)
 
     return r
 

diff --git a/vrep_scenes/C_table_cube.ttt b/vrep_scenes/C_table_cube.ttt