Move SafeActionSpace to wrappers.experimental module (#101)

* Experimental support for observation cropping 1. Create a new experimental package 2. Add ObservationCropping 3. Move action_space.SafeActionSpace to experimental.action_space.SafeActionSpace just to give an demonstration of how this new experimentatal package might be used. Fixes #55 Signed-off-by: Jesper Derehag <[email protected]> * Clarify warning * Update docs * Maintain backwards compatibility * Fix warnings * Import correctly * Fix imports * Fix references to non-experimental SafeActionSpace * Fix deprecation warning * Touch import ordering * Optimize imports * Fix imports * Update changelog
openai · Jan 8, 2017 · b9815e8 · b9815e8
1 parent cb048b3
commit b9815e8
Show file tree

Hide file tree

Showing 8 changed files with 135 additions and 54 deletions.
diff --git a/README.rst b/README.rst
@@ -361,7 +361,7 @@ What's next?
 
 Changelog
 ---------
-
+- 2017-01-08: The wrappers.SafeActionSpace has been moved to wrappers.experimental.SafeActionSpace. The old location will remain with a deprecation warning until 2017-02-08.
 - 2016-12-27: BACKWARDS INCOMPATIBILITY: The gym monitor is now a
   wrapper. Rather than starting monitoring as
   `env.monitor.start(directory)`, envs are now wrapped as follows:

diff --git a/example/diagnostic-agent/diagnostic-agent.py b/example/diagnostic-agent/diagnostic-agent.py
@@ -94,7 +94,7 @@ def __call__(self, observation, reward, done):
         # translator. Everything else probably wants a SafeActionSpace
         # wrapper to shield them from random-agent clicking around
         # everywhere.
-        env = wrappers.SafeActionSpace(env)
+        env = wrappers.experimental.SafeActionSpace(env)
     else:
         # Only gym-core are seedable
         env.seed([0])

diff --git a/example/random-agent/random-agent.py b/example/random-agent/random-agent.py
@@ -27,7 +27,7 @@ def main():
     # Restrict the valid random actions. (Try removing this and see
     # what happens when the agent is given full control of the
     # keyboard/mouse.)
-    env = wrappers.SafeActionSpace(env)
+    env = wrappers.experimental.SafeActionSpace(env)
     observation_n = env.reset()
 
     while True:

diff --git a/universe/wrappers/__init__.py b/universe/wrappers/__init__.py
@@ -1,22 +1,23 @@
 import gym
+import universe.wrappers.experimental
 from universe import envs, spaces
 from universe.wrappers import gym_core_sync
-
 from universe.wrappers.action_space import SafeActionSpace
-from universe.wrappers.gym_core import GymCoreAction, GymCoreObservation, CropAtari
 from universe.wrappers.blocking_reset import BlockingReset
 from universe.wrappers.diagnostics import Diagnostics
+from universe.wrappers.gym_core import GymCoreAction, GymCoreObservation, CropAtari
 from universe.wrappers.joint import Joint
 from universe.wrappers.logger import Logger
+from universe.wrappers.monitoring import Monitor
 from universe.wrappers.multiprocessing_env import WrappedMultiprocessingEnv, EpisodeID
+from universe.wrappers.recording import Recording
 from universe.wrappers.render import Render
 from universe.wrappers.throttle import Throttle
+from universe.wrappers.time_limit import TimeLimit
 from universe.wrappers.timer import Timer
 from universe.wrappers.vectorize import Vectorize, Unvectorize, WeakUnvectorize
 from universe.wrappers.vision import Vision
-from universe.wrappers.recording import Recording
-from universe.wrappers.monitoring import Monitor
-from universe.wrappers.time_limit import TimeLimit
+
 
 def wrap(env):
     return Timer(Render(Throttle(env)))

diff --git a/universe/wrappers/action_space.py b/universe/wrappers/action_space.py
@@ -1,5 +1,11 @@
+import logging
+
 import gym
-from universe import envs, error, spaces, vectorized
+from universe import error, spaces
+from universe import vectorized
+
+logger = logging.getLogger(__name__)
+
 
 def atari_vnc(up=False, down=False, left=False, right=False, z=False):
     return [spaces.KeyEvent.by_name('up', down=up),
@@ -24,55 +30,12 @@ def platform_vnc(up=False, left=False, right=False, space=False):
             spaces.KeyEvent.by_name('right', down=right),
             spaces.KeyEvent.by_name('space', down=space)]
 
-class SafeActionSpace(vectorized.Wrapper):
-    """
-Recall that every universe environment receives a list of VNC events as action.
-There exist many environments for which the set of relevant action is much smaller
-and is known.   For example, Atari environments have a modest number of keys,
-so this wrapper, when applied to an Atari environment will reduce its action space.
-Doing so is very convenient for research, since today's RL algorithms rely on random
-exploration, which is hurt by small action spaces.  As our algorithms get better
-and we switch to using the raw VNC commands, this wrapper will become less important.
-"""
-
-    def __init__(self, env):
-        super(SafeActionSpace, self).__init__(env)
-
-        if self.spec.tags.get('runtime') == 'gym-core':
-            self.action_space = gym_core_action_space(self.spec._kwargs['gym_core_id'])
-        elif self.spec is None:
-            pass
-        elif self.spec.id == 'internet.SlitherIO-v0' or self.spec.id == 'internet.SlitherIOErmiyaEskandaryBot-v0' or self.spec.id == 'internet.SlitherIOEasy-v0':
-            self.action_space = spaces.Hardcoded([
-                slither_vnc(left=True),
-                slither_vnc(right=True),
-                slither_vnc(space=True),
-                slither_vnc(left=True, space=True),
-                slither_vnc(right=True, space=True),
-            ])
-        elif self.spec.id in ['flashgames.DuskDrive-v0']:
-            # TODO: be more systematic
-            self.action_space = spaces.Hardcoded([
-                racing_vnc(up=True),
-                racing_vnc(left=True),
-                racing_vnc(right=True),
-            ])
-        elif self.spec.id in ['flashgames.RedBeard-v0']:
-            self.action_space = spaces.Hardcoded([
-                platform_vnc(up=True),
-                platform_vnc(left=True),
-                platform_vnc(right=True),
-                platform_vnc(space=True),
-            ])
-
 def gym_core_action_space(gym_core_id):
     spec = gym.spec(gym_core_id)
 
     if spec.id == 'CartPole-v0':
-        return spaces.Hardcoded([
-            [spaces.KeyEvent.by_name('left', down=True)],
-            [spaces.KeyEvent.by_name('left', down=False)],
-        ])
+        return spaces.Hardcoded([[spaces.KeyEvent.by_name('left', down=True)],
+                                 [spaces.KeyEvent.by_name('left', down=False)]])
     elif spec._entry_point.startswith('gym.envs.atari:'):
         actions = []
         env = spec.make()
@@ -87,3 +50,51 @@ def gym_core_action_space(gym_core_id):
         return spaces.Hardcoded(actions)
     else:
         raise error.Error('Unsupported env type: {}'.format(spec.id))
+
+
+
+class SafeActionSpace(vectorized.Wrapper):
+    """
+    Recall that every universe environment receives a list of VNC events as action.
+    There exist many environments for which the set of relevant action is much smaller
+    and is known.   For example, Atari environments have a modest number of keys,
+    so this wrapper, when applied to an Atari environment will reduce its action space.
+    Doing so is very convenient for research, since today's RL algorithms rely on random
+    exploration, which is hurt by small action spaces.  As our algorithms get better
+    and we switch to using the raw VNC commands, this wrapper will become less important.
+
+
+    NOTE: This class will soon be moved to `wrappers.experimental`. However the logic must currently remain in
+    wrappers.SafeActionSpace in order to maintain backwards compatibility.
+    """
+    def __init__(self, env):
+        super(SafeActionSpace, self).__init__(env)
+        self._deprecation_warning()
+
+        if self.spec.tags.get('runtime') == 'gym-core':
+            self.action_space = gym_core_action_space(self.spec._kwargs['gym_core_id'])
+        elif self.spec is None:
+            pass
+        elif self.spec.id in ['internet.SlitherIO-v0',
+                              'internet.SlitherIOErmiyaEskandaryBot-v0',
+                              'internet.SlitherIOEasy-v0']:
+            self.action_space = spaces.Hardcoded([slither_vnc(left=True),
+                                                  slither_vnc(right=True),
+                                                  slither_vnc(space=True),
+                                                  slither_vnc(left=True, space=True),
+                                                  slither_vnc(right=True, space=True)])
+        elif self.spec.id in ['flashgames.DuskDrive-v0']:
+            # TODO: be more systematic
+            self.action_space = spaces.Hardcoded([racing_vnc(up=True),
+                                                  racing_vnc(left=True),
+                                                  racing_vnc(right=True)])
+        elif self.spec.id in ['flashgames.RedBeard-v0']:
+            self.action_space = spaces.Hardcoded([platform_vnc(up=True),
+                                                  platform_vnc(left=True),
+                                                  platform_vnc(right=True),
+                                                  platform_vnc(space=True)])
+
+    def _deprecation_warning(self):
+        logger.warn(('DEPRECATION WARNING: wrappers.SafeActionSpace has been moved to '
+                     'wrappers.experimental.action_space.SafeActionSpace as of 2017-01-07. '
+                     'Using legacy wrappers.SafeActionSpace will soon be removed'))
diff --git a/universe/wrappers/experimental/__init__.py b/universe/wrappers/experimental/__init__.py
@@ -0,0 +1,2 @@
+from universe.wrappers.experimental.action_space import SafeActionSpace
+from universe.wrappers.experimental.observation import CropObservations
diff --git a/universe/wrappers/experimental/action_space.py b/universe/wrappers/experimental/action_space.py
@@ -0,0 +1,24 @@
+import logging
+
+from universe.wrappers.action_space import SafeActionSpace as _SafeActionSpace
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+class SafeActionSpace(_SafeActionSpace):
+    """
+    Recall that every universe environment receives a list of VNC events as action.
+    There exist many environments for which the set of relevant action is much smaller
+    and is known.   For example, Atari environments have a modest number of keys,
+    so this wrapper, when applied to an Atari environment will reduce its action space.
+    Doing so is very convenient for research, since today's RL algorithms rely on random
+    exploration, which is hurt by small action spaces.  As our algorithms get better
+    and we switch to using the raw VNC commands, this wrapper will become less important.
+
+    NOTE: This will be the new location for SafeActionSpace, however the logic must currently remain in
+    wrappers.SafeActionSpace in order to maintain backwards compatibility.
+    """
+
+    def _deprecation_warning(self):
+        # No deprecation warning here because we are using the correct import
+        pass
diff --git a/universe/wrappers/experimental/observation.py b/universe/wrappers/experimental/observation.py
@@ -0,0 +1,43 @@
+import logging
+
+from universe import vectorized, runtime_spec
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+
+def CropObservations(env):
+    """"
+    Crops the visual observations of an environment so that they only contain the game screen.
+    Removes anything outside the game that usually belongs to universe (browser borders and so on).
+    """
+    if env.spec.tags.get('flashgames', False):
+        spec = runtime_spec('flashgames').server_registry[env.spec.id]
+        return _CropObservations(env, x=18, y=84, height=spec["height"], width=spec["width"])
+    elif (env.spec.tags.get('atari', False) and env.spec.tags.get('vnc', False)):
+        return _CropObservations(env, height=194, width=160)
+    else:
+        # if unknown environment (or local atari), do nothing
+        return env
+
+class _CropObservations(vectorized.ObservationWrapper):
+    def __init__(self, env, height, width, x=0, y=0):
+        super(_CropObservations, self).__init__(env)
+        self.x = x
+        self.y = y
+        self.height = height
+        self.width = width
+
+        # modify observation_space? (if so, how to know depth and channels before we have seen the first frame?)
+        # self.observation_space = Box(0, 255, shape=(height, width, 3))
+
+    def _observation(self, observation_n):
+        return [self._crop_frame(observation) for observation in observation_n]
+
+    def _crop_frame(self, frame):
+        if frame is not None:
+            if isinstance(frame, dict):
+                frame['vision'] = frame['vision'][self.y:self.y + self.height, self.x:self.x + self.width]
+            else:
+                frame = frame[self.y:self.y + self.height, self.x:self.x + self.width]
+        return frame
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		from universe.wrappers.experimental.action_space import SafeActionSpace
		from universe.wrappers.experimental.observation import CropObservations