Skip to content
This repository has been archived by the owner on Apr 6, 2018. It is now read-only.

Commit

Permalink
Move SafeActionSpace to wrappers.experimental module (#101)
Browse files Browse the repository at this point in the history
* Experimental support for observation cropping

1. Create a new experimental package
2. Add ObservationCropping
3. Move action_space.SafeActionSpace to experimental.action_space.SafeActionSpace
   just to give an demonstration of how this new experimentatal package
   might be used.

Fixes #55

Signed-off-by: Jesper Derehag <[email protected]>

* Clarify warning

* Update docs

* Maintain backwards compatibility

* Fix warnings

* Import correctly

* Fix imports

* Fix references to non-experimental SafeActionSpace

* Fix deprecation warning

* Touch import ordering

* Optimize imports

* Fix imports

* Update changelog
  • Loading branch information
nottombrown authored Jan 8, 2017
1 parent cb048b3 commit b9815e8
Show file tree
Hide file tree
Showing 8 changed files with 135 additions and 54 deletions.
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ What's next?

Changelog
---------

- 2017-01-08: The wrappers.SafeActionSpace has been moved to wrappers.experimental.SafeActionSpace. The old location will remain with a deprecation warning until 2017-02-08.
- 2016-12-27: BACKWARDS INCOMPATIBILITY: The gym monitor is now a
wrapper. Rather than starting monitoring as
`env.monitor.start(directory)`, envs are now wrapped as follows:
Expand Down
2 changes: 1 addition & 1 deletion example/diagnostic-agent/diagnostic-agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def __call__(self, observation, reward, done):
# translator. Everything else probably wants a SafeActionSpace
# wrapper to shield them from random-agent clicking around
# everywhere.
env = wrappers.SafeActionSpace(env)
env = wrappers.experimental.SafeActionSpace(env)
else:
# Only gym-core are seedable
env.seed([0])
Expand Down
2 changes: 1 addition & 1 deletion example/random-agent/random-agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def main():
# Restrict the valid random actions. (Try removing this and see
# what happens when the agent is given full control of the
# keyboard/mouse.)
env = wrappers.SafeActionSpace(env)
env = wrappers.experimental.SafeActionSpace(env)
observation_n = env.reset()

while True:
Expand Down
11 changes: 6 additions & 5 deletions universe/wrappers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,23 @@
import gym
import universe.wrappers.experimental
from universe import envs, spaces
from universe.wrappers import gym_core_sync

from universe.wrappers.action_space import SafeActionSpace
from universe.wrappers.gym_core import GymCoreAction, GymCoreObservation, CropAtari
from universe.wrappers.blocking_reset import BlockingReset
from universe.wrappers.diagnostics import Diagnostics
from universe.wrappers.gym_core import GymCoreAction, GymCoreObservation, CropAtari
from universe.wrappers.joint import Joint
from universe.wrappers.logger import Logger
from universe.wrappers.monitoring import Monitor
from universe.wrappers.multiprocessing_env import WrappedMultiprocessingEnv, EpisodeID
from universe.wrappers.recording import Recording
from universe.wrappers.render import Render
from universe.wrappers.throttle import Throttle
from universe.wrappers.time_limit import TimeLimit
from universe.wrappers.timer import Timer
from universe.wrappers.vectorize import Vectorize, Unvectorize, WeakUnvectorize
from universe.wrappers.vision import Vision
from universe.wrappers.recording import Recording
from universe.wrappers.monitoring import Monitor
from universe.wrappers.time_limit import TimeLimit


def wrap(env):
return Timer(Render(Throttle(env)))
Expand Down
103 changes: 57 additions & 46 deletions universe/wrappers/action_space.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
import logging

import gym
from universe import envs, error, spaces, vectorized
from universe import error, spaces
from universe import vectorized

logger = logging.getLogger(__name__)


def atari_vnc(up=False, down=False, left=False, right=False, z=False):
return [spaces.KeyEvent.by_name('up', down=up),
Expand All @@ -24,55 +30,12 @@ def platform_vnc(up=False, left=False, right=False, space=False):
spaces.KeyEvent.by_name('right', down=right),
spaces.KeyEvent.by_name('space', down=space)]

class SafeActionSpace(vectorized.Wrapper):
"""
Recall that every universe environment receives a list of VNC events as action.
There exist many environments for which the set of relevant action is much smaller
and is known. For example, Atari environments have a modest number of keys,
so this wrapper, when applied to an Atari environment will reduce its action space.
Doing so is very convenient for research, since today's RL algorithms rely on random
exploration, which is hurt by small action spaces. As our algorithms get better
and we switch to using the raw VNC commands, this wrapper will become less important.
"""

def __init__(self, env):
super(SafeActionSpace, self).__init__(env)

if self.spec.tags.get('runtime') == 'gym-core':
self.action_space = gym_core_action_space(self.spec._kwargs['gym_core_id'])
elif self.spec is None:
pass
elif self.spec.id == 'internet.SlitherIO-v0' or self.spec.id == 'internet.SlitherIOErmiyaEskandaryBot-v0' or self.spec.id == 'internet.SlitherIOEasy-v0':
self.action_space = spaces.Hardcoded([
slither_vnc(left=True),
slither_vnc(right=True),
slither_vnc(space=True),
slither_vnc(left=True, space=True),
slither_vnc(right=True, space=True),
])
elif self.spec.id in ['flashgames.DuskDrive-v0']:
# TODO: be more systematic
self.action_space = spaces.Hardcoded([
racing_vnc(up=True),
racing_vnc(left=True),
racing_vnc(right=True),
])
elif self.spec.id in ['flashgames.RedBeard-v0']:
self.action_space = spaces.Hardcoded([
platform_vnc(up=True),
platform_vnc(left=True),
platform_vnc(right=True),
platform_vnc(space=True),
])

def gym_core_action_space(gym_core_id):
spec = gym.spec(gym_core_id)

if spec.id == 'CartPole-v0':
return spaces.Hardcoded([
[spaces.KeyEvent.by_name('left', down=True)],
[spaces.KeyEvent.by_name('left', down=False)],
])
return spaces.Hardcoded([[spaces.KeyEvent.by_name('left', down=True)],
[spaces.KeyEvent.by_name('left', down=False)]])
elif spec._entry_point.startswith('gym.envs.atari:'):
actions = []
env = spec.make()
Expand All @@ -87,3 +50,51 @@ def gym_core_action_space(gym_core_id):
return spaces.Hardcoded(actions)
else:
raise error.Error('Unsupported env type: {}'.format(spec.id))



class SafeActionSpace(vectorized.Wrapper):
"""
Recall that every universe environment receives a list of VNC events as action.
There exist many environments for which the set of relevant action is much smaller
and is known. For example, Atari environments have a modest number of keys,
so this wrapper, when applied to an Atari environment will reduce its action space.
Doing so is very convenient for research, since today's RL algorithms rely on random
exploration, which is hurt by small action spaces. As our algorithms get better
and we switch to using the raw VNC commands, this wrapper will become less important.
NOTE: This class will soon be moved to `wrappers.experimental`. However the logic must currently remain in
wrappers.SafeActionSpace in order to maintain backwards compatibility.
"""
def __init__(self, env):
super(SafeActionSpace, self).__init__(env)
self._deprecation_warning()

if self.spec.tags.get('runtime') == 'gym-core':
self.action_space = gym_core_action_space(self.spec._kwargs['gym_core_id'])
elif self.spec is None:
pass
elif self.spec.id in ['internet.SlitherIO-v0',
'internet.SlitherIOErmiyaEskandaryBot-v0',
'internet.SlitherIOEasy-v0']:
self.action_space = spaces.Hardcoded([slither_vnc(left=True),
slither_vnc(right=True),
slither_vnc(space=True),
slither_vnc(left=True, space=True),
slither_vnc(right=True, space=True)])
elif self.spec.id in ['flashgames.DuskDrive-v0']:
# TODO: be more systematic
self.action_space = spaces.Hardcoded([racing_vnc(up=True),
racing_vnc(left=True),
racing_vnc(right=True)])
elif self.spec.id in ['flashgames.RedBeard-v0']:
self.action_space = spaces.Hardcoded([platform_vnc(up=True),
platform_vnc(left=True),
platform_vnc(right=True),
platform_vnc(space=True)])

def _deprecation_warning(self):
logger.warn(('DEPRECATION WARNING: wrappers.SafeActionSpace has been moved to '
'wrappers.experimental.action_space.SafeActionSpace as of 2017-01-07. '
'Using legacy wrappers.SafeActionSpace will soon be removed'))
2 changes: 2 additions & 0 deletions universe/wrappers/experimental/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from universe.wrappers.experimental.action_space import SafeActionSpace
from universe.wrappers.experimental.observation import CropObservations
24 changes: 24 additions & 0 deletions universe/wrappers/experimental/action_space.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import logging

from universe.wrappers.action_space import SafeActionSpace as _SafeActionSpace

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

class SafeActionSpace(_SafeActionSpace):
"""
Recall that every universe environment receives a list of VNC events as action.
There exist many environments for which the set of relevant action is much smaller
and is known. For example, Atari environments have a modest number of keys,
so this wrapper, when applied to an Atari environment will reduce its action space.
Doing so is very convenient for research, since today's RL algorithms rely on random
exploration, which is hurt by small action spaces. As our algorithms get better
and we switch to using the raw VNC commands, this wrapper will become less important.
NOTE: This will be the new location for SafeActionSpace, however the logic must currently remain in
wrappers.SafeActionSpace in order to maintain backwards compatibility.
"""

def _deprecation_warning(self):
# No deprecation warning here because we are using the correct import
pass
43 changes: 43 additions & 0 deletions universe/wrappers/experimental/observation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import logging

from universe import vectorized, runtime_spec

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


def CropObservations(env):
""""
Crops the visual observations of an environment so that they only contain the game screen.
Removes anything outside the game that usually belongs to universe (browser borders and so on).
"""
if env.spec.tags.get('flashgames', False):
spec = runtime_spec('flashgames').server_registry[env.spec.id]
return _CropObservations(env, x=18, y=84, height=spec["height"], width=spec["width"])
elif (env.spec.tags.get('atari', False) and env.spec.tags.get('vnc', False)):
return _CropObservations(env, height=194, width=160)
else:
# if unknown environment (or local atari), do nothing
return env

class _CropObservations(vectorized.ObservationWrapper):
def __init__(self, env, height, width, x=0, y=0):
super(_CropObservations, self).__init__(env)
self.x = x
self.y = y
self.height = height
self.width = width

# modify observation_space? (if so, how to know depth and channels before we have seen the first frame?)
# self.observation_space = Box(0, 255, shape=(height, width, 3))

def _observation(self, observation_n):
return [self._crop_frame(observation) for observation in observation_n]

def _crop_frame(self, frame):
if frame is not None:
if isinstance(frame, dict):
frame['vision'] = frame['vision'][self.y:self.y + self.height, self.x:self.x + self.width]
else:
frame = frame[self.y:self.y + self.height, self.x:self.x + self.width]
return frame

0 comments on commit b9815e8

Please sign in to comment.