From f6489c38b3b0f493c0c873ff8f338d06fe7d74fc Mon Sep 17 00:00:00 2001 From: Mark Towers Date: Wed, 5 Oct 2022 17:53:45 +0100 Subject: [PATCH] Updated gymnasium to be equivalent to gym v26.2 (#36) --- .github/workflows/build.yml | 3 + .github/workflows/pre-commit.yml | 4 ++ gymnasium/envs/box2d/bipedal_walker.py | 8 +++ gymnasium/envs/box2d/car_racing.py | 10 ++- gymnasium/envs/box2d/lunar_lander.py | 8 +++ gymnasium/envs/classic_control/acrobot.py | 11 ++- gymnasium/envs/classic_control/cartpole.py | 8 +++ .../continuous_mountain_car.py | 8 +++ .../envs/classic_control/mountain_car.py | 8 +++ gymnasium/envs/classic_control/pendulum.py | 8 +++ gymnasium/envs/toy_text/blackjack.py | 8 +++ gymnasium/envs/toy_text/cliffwalking.py | 9 +++ gymnasium/envs/toy_text/frozen_lake.py | 9 +++ gymnasium/envs/toy_text/taxi.py | 10 ++- gymnasium/spaces/graph.py | 4 +- gymnasium/vector/async_vector_env.py | 7 +- gymnasium/vector/sync_vector_env.py | 3 +- gymnasium/version.py | 2 +- gymnasium/wrappers/atari_preprocessing.py | 6 +- setup.py | 2 +- tests/vector/test_vector_env.py | 67 ++++++++++++++++++- 21 files changed, 190 insertions(+), 13 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7a60fa8d3..6f01bf9d6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,6 +1,9 @@ name: build on: [pull_request, push] +permissions: + contents: read # to fetch code (actions/checkout) + jobs: build: runs-on: ubuntu-latest diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index b62f595eb..74350e6d6 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -5,6 +5,10 @@ on: pull_request: push: branches: [master] + +permissions: + contents: read # to fetch code (actions/checkout) + jobs: pre-commit: runs-on: ubuntu-latest diff --git a/gymnasium/envs/box2d/bipedal_walker.py b/gymnasium/envs/box2d/bipedal_walker.py index c791d7776..1d27ae751 100644 --- a/gymnasium/envs/box2d/bipedal_walker.py +++ b/gymnasium/envs/box2d/bipedal_walker.py @@ -608,6 +608,14 @@ def step(self, action: np.ndarray): return np.array(state, dtype=np.float32), reward, terminated, False, {} def render(self): + if self.render_mode is None: + gym.logger.warn( + "You are calling render method without specifying any render mode. " + "You can specify the render_mode at initialization, " + f'e.g. gym("{self.spec.id}", render_mode="rgb_array")' + ) + return + try: import pygame from pygame import gfxdraw diff --git a/gymnasium/envs/box2d/car_racing.py b/gymnasium/envs/box2d/car_racing.py index 77589700a..8cb9b5210 100644 --- a/gymnasium/envs/box2d/car_racing.py +++ b/gymnasium/envs/box2d/car_racing.py @@ -569,7 +569,15 @@ def step(self, action: Union[np.ndarray, int]): return self.state, step_reward, terminated, truncated, {} def render(self): - return self._render(self.render_mode) + if self.render_mode is None: + gym.logger.warn( + "You are calling render method without specifying any render mode. " + "You can specify the render_mode at initialization, " + f'e.g. gym("{self.spec.id}", render_mode="rgb_array")' + ) + return + else: + return self._render(self.render_mode) def _render(self, mode: str): assert mode in self.metadata["render_modes"] diff --git a/gymnasium/envs/box2d/lunar_lander.py b/gymnasium/envs/box2d/lunar_lander.py index 8879c1e73..85f2d1172 100644 --- a/gymnasium/envs/box2d/lunar_lander.py +++ b/gymnasium/envs/box2d/lunar_lander.py @@ -602,6 +602,14 @@ def step(self, action): return np.array(state, dtype=np.float32), reward, terminated, False, {} def render(self): + if self.render_mode is None: + gym.logger.warn( + "You are calling render method without specifying any render mode. " + "You can specify the render_mode at initialization, " + f'e.g. gym("{self.spec.id}", render_mode="rgb_array")' + ) + return + try: import pygame from pygame import gfxdraw diff --git a/gymnasium/envs/classic_control/acrobot.py b/gymnasium/envs/classic_control/acrobot.py index 4e27ef83c..147eb6c5f 100644 --- a/gymnasium/envs/classic_control/acrobot.py +++ b/gymnasium/envs/classic_control/acrobot.py @@ -4,7 +4,9 @@ import numpy as np from numpy import cos, pi, sin +import gymnasium as gym from gymnasium import Env, spaces +from gymnasium.envs.classic_control import utils from gymnasium.error import DependencyNotInstalled __copyright__ = "Copyright 2013, RLPy http://acl.mit.edu/RLPy" @@ -20,7 +22,6 @@ # SOURCE: # https://github.com/rlpy/rlpy/blob/master/rlpy/Domains/Acrobot.py -from gymnasium.envs.classic_control import utils class AcrobotEnv(Env): @@ -280,6 +281,14 @@ def _dsdt(self, s_augmented): return dtheta1, dtheta2, ddtheta1, ddtheta2, 0.0 def render(self): + if self.render_mode is None: + gym.logger.warn( + "You are calling render method without specifying any render mode. " + "You can specify the render_mode at initialization, " + f'e.g. gym("{self.spec.id}", render_mode="rgb_array")' + ) + return + try: import pygame from pygame import gfxdraw diff --git a/gymnasium/envs/classic_control/cartpole.py b/gymnasium/envs/classic_control/cartpole.py index 9e0c54024..68f34677c 100644 --- a/gymnasium/envs/classic_control/cartpole.py +++ b/gymnasium/envs/classic_control/cartpole.py @@ -209,6 +209,14 @@ def reset( return np.array(self.state, dtype=np.float32), {} def render(self): + if self.render_mode is None: + gym.logger.warn( + "You are calling render method without specifying any render mode. " + "You can specify the render_mode at initialization, " + f'e.g. gym("{self.spec.id}", render_mode="rgb_array")' + ) + return + try: import pygame from pygame import gfxdraw diff --git a/gymnasium/envs/classic_control/continuous_mountain_car.py b/gymnasium/envs/classic_control/continuous_mountain_car.py index e86175ec3..97357538b 100644 --- a/gymnasium/envs/classic_control/continuous_mountain_car.py +++ b/gymnasium/envs/classic_control/continuous_mountain_car.py @@ -193,6 +193,14 @@ def _height(self, xs): return np.sin(3 * xs) * 0.45 + 0.55 def render(self): + if self.render_mode is None: + gym.logger.warn( + "You are calling render method without specifying any render mode. " + "You can specify the render_mode at initialization, " + f'e.g. gym("{self.spec.id}", render_mode="rgb_array")' + ) + return + try: import pygame from pygame import gfxdraw diff --git a/gymnasium/envs/classic_control/mountain_car.py b/gymnasium/envs/classic_control/mountain_car.py index da23fb1f8..b725e6d88 100644 --- a/gymnasium/envs/classic_control/mountain_car.py +++ b/gymnasium/envs/classic_control/mountain_car.py @@ -170,6 +170,14 @@ def _height(self, xs): return np.sin(3 * xs) * 0.45 + 0.55 def render(self): + if self.render_mode is None: + gym.logger.warn( + "You are calling render method without specifying any render mode. " + "You can specify the render_mode at initialization, " + f'e.g. gym("{self.spec.id}", render_mode="rgb_array")' + ) + return + try: import pygame from pygame import gfxdraw diff --git a/gymnasium/envs/classic_control/pendulum.py b/gymnasium/envs/classic_control/pendulum.py index e07a1cbe9..aa18222ed 100644 --- a/gymnasium/envs/classic_control/pendulum.py +++ b/gymnasium/envs/classic_control/pendulum.py @@ -167,6 +167,14 @@ def _get_obs(self): return np.array([np.cos(theta), np.sin(theta), thetadot], dtype=np.float32) def render(self): + if self.render_mode is None: + gym.logger.warn( + "You are calling render method without specifying any render mode. " + "You can specify the render_mode at initialization, " + f'e.g. gym("{self.spec.id}", render_mode="rgb_array")' + ) + return + try: import pygame from pygame import gfxdraw diff --git a/gymnasium/envs/toy_text/blackjack.py b/gymnasium/envs/toy_text/blackjack.py index 8bcdc3b53..2d89119e3 100644 --- a/gymnasium/envs/toy_text/blackjack.py +++ b/gymnasium/envs/toy_text/blackjack.py @@ -191,6 +191,14 @@ def reset( return self._get_obs(), {} def render(self): + if self.render_mode is None: + gym.logger.warn( + "You are calling render method without specifying any render mode. " + "You can specify the render_mode at initialization, " + f'e.g. gym("{self.spec.id}", render_mode="rgb_array")' + ) + return + try: import pygame except ImportError: diff --git a/gymnasium/envs/toy_text/cliffwalking.py b/gymnasium/envs/toy_text/cliffwalking.py index f8d70f192..0476ccc8f 100644 --- a/gymnasium/envs/toy_text/cliffwalking.py +++ b/gymnasium/envs/toy_text/cliffwalking.py @@ -5,6 +5,7 @@ import numpy as np +import gymnasium as gym from gymnasium import Env, spaces from gymnasium.envs.toy_text.utils import categorical_sample from gymnasium.error import DependencyNotInstalled @@ -163,6 +164,14 @@ def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): return int(self.s), {"prob": 1} def render(self): + if self.render_mode is None: + gym.logger.warn( + "You are calling render method without specifying any render mode. " + "You can specify the render_mode at initialization, " + f'e.g. gym("{self.spec.id}", render_mode="rgb_array")' + ) + return + if self.render_mode == "ansi": return self._render_text() else: diff --git a/gymnasium/envs/toy_text/frozen_lake.py b/gymnasium/envs/toy_text/frozen_lake.py index afbb9d9e6..60e3a7000 100644 --- a/gymnasium/envs/toy_text/frozen_lake.py +++ b/gymnasium/envs/toy_text/frozen_lake.py @@ -5,6 +5,7 @@ import numpy as np +import gymnasium as gym from gymnasium import Env, spaces, utils from gymnasium.envs.toy_text.utils import categorical_sample from gymnasium.error import DependencyNotInstalled @@ -268,6 +269,14 @@ def reset( return int(self.s), {"prob": 1} def render(self): + if self.render_mode is None: + gym.logger.warn( + "You are calling render method without specifying any render mode. " + "You can specify the render_mode at initialization, " + f'e.g. gym("{self.spec.id}", render_mode="rgb_array")' + ) + return + if self.render_mode == "ansi": return self._render_text() else: # self.render_mode in {"human", "rgb_array"}: diff --git a/gymnasium/envs/toy_text/taxi.py b/gymnasium/envs/toy_text/taxi.py index 977d825e9..66697d059 100644 --- a/gymnasium/envs/toy_text/taxi.py +++ b/gymnasium/envs/toy_text/taxi.py @@ -5,6 +5,7 @@ import numpy as np +import gymnasium as gym from gymnasium import Env, spaces, utils from gymnasium.envs.toy_text.utils import categorical_sample from gymnasium.error import DependencyNotInstalled @@ -279,7 +280,14 @@ def reset( return int(self.s), {"prob": 1.0, "action_mask": self.action_mask(self.s)} def render(self): - if self.render_mode == "ansi": + if self.render_mode is None: + gym.logger.warn( + "You are calling render method without specifying any render mode. " + "You can specify the render_mode at initialization, " + f'e.g. gym("{self.spec.id}", render_mode="rgb_array")' + ) + return + elif self.render_mode == "ansi": return self._render_text() else: # self.render_mode in {"human", "rgb_array"}: return self._render_gui(self.render_mode) diff --git a/gymnasium/spaces/graph.py b/gymnasium/spaces/graph.py index d8a124dcc..3e8a80b57 100644 --- a/gymnasium/spaces/graph.py +++ b/gymnasium/spaces/graph.py @@ -14,8 +14,8 @@ class GraphInstance(NamedTuple): """A Graph space instance. * nodes (np.ndarray): an (n x ...) sized array representing the features for n nodes, (...) must adhere to the shape of the node space. - * edges (Optional[np.ndarray]): an (m x ...) sized array representing the features for m nodes, (...) must adhere to the shape of the edge space. - * edge_links (Optional[np.ndarray]): an (m x 2) sized array of ints representing the two nodes that each edge connects. + * edges (Optional[np.ndarray]): an (m x ...) sized array representing the features for m edges, (...) must adhere to the shape of the edge space. + * edge_links (Optional[np.ndarray]): an (m x 2) sized array of ints representing the indices of the two nodes that each edge connects. """ nodes: np.ndarray diff --git a/gymnasium/vector/async_vector_env.py b/gymnasium/vector/async_vector_env.py index 04b1851ad..d6cab15ef 100644 --- a/gymnasium/vector/async_vector_env.py +++ b/gymnasium/vector/async_vector_env.py @@ -566,9 +566,10 @@ def _worker(index, env_fn, pipe, parent_pipe, shared_memory, error_queue): info, ) = env.step(data) if terminated or truncated: - old_observation = observation + old_observation, old_info = observation, info observation, info = env.reset() info["final_observation"] = old_observation + info["final_info"] = old_info pipe.send(((observation, reward, terminated, truncated, info), True)) elif command == "seed": env.seed(data) @@ -636,10 +637,10 @@ def _worker_shared_memory(index, env_fn, pipe, parent_pipe, shared_memory, error info, ) = env.step(data) if terminated or truncated: - old_observation = observation + old_observation, old_info = observation, info observation, info = env.reset() info["final_observation"] = old_observation - + info["final_info"] = old_info write_to_shared_memory( observation_space, index, observation, shared_memory ) diff --git a/gymnasium/vector/sync_vector_env.py b/gymnasium/vector/sync_vector_env.py index 5377b11c3..20b564b80 100644 --- a/gymnasium/vector/sync_vector_env.py +++ b/gymnasium/vector/sync_vector_env.py @@ -150,9 +150,10 @@ def step_wait(self): ) = env.step(action) if self._terminateds[i] or self._truncateds[i]: - old_observation = observation + old_observation, old_info = observation, info observation, info = env.reset() info["final_observation"] = old_observation + info["final_info"] = old_info observations.append(observation) infos = self._add_info(infos, info, i) self.observations = concatenate( diff --git a/gymnasium/version.py b/gymnasium/version.py index 10ea7670f..6d91b3944 100644 --- a/gymnasium/version.py +++ b/gymnasium/version.py @@ -1 +1 @@ -VERSION = "0.26.1" +VERSION = "0.26.2" diff --git a/gymnasium/wrappers/atari_preprocessing.py b/gymnasium/wrappers/atari_preprocessing.py index 5a9a54def..779b3a25b 100644 --- a/gymnasium/wrappers/atari_preprocessing.py +++ b/gymnasium/wrappers/atari_preprocessing.py @@ -98,7 +98,6 @@ def __init__( np.empty(env.observation_space.shape, dtype=np.uint8), ] - self.ale = env.unwrapped.ale self.lives = 0 self.game_over = False @@ -112,6 +111,11 @@ def __init__( low=_low, high=_high, shape=_shape, dtype=_obs_dtype ) + @property + def ale(self): + """Make ale as a class property to avoid serialization error.""" + return self.env.unwrapped.ale + def step(self, action): """Applies the preprocessing for an :meth:`env.step`.""" total_reward, terminated, truncated, info = 0.0, False, False, {} diff --git a/setup.py b/setup.py index 6433d1081..779275eb9 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ "box2d": ["box2d-py==2.3.5", "pygame==2.1.0", "swig==4.*"], "classic_control": ["pygame==2.1.0"], "mujoco_py": ["mujoco_py<2.2,>=2.1"], - "mujoco": ["mujoco==2.2.0", "imageio>=2.14.1"], + "mujoco": ["mujoco==2.2", "imageio>=2.14.1"], "toy_text": ["pygame==2.1.0"], "other": ["lz4>=3.1.0", "opencv-python>=3.0", "matplotlib>=3.0", "moviepy>=1.0.0"], } diff --git a/tests/vector/test_vector_env.py b/tests/vector/test_vector_env.py index ac3ef2d2c..801ccb98e 100644 --- a/tests/vector/test_vector_env.py +++ b/tests/vector/test_vector_env.py @@ -1,10 +1,13 @@ +from functools import partial + import numpy as np import pytest -from gymnasium.spaces import Tuple +from gymnasium.spaces import Discrete, Tuple from gymnasium.vector.async_vector_env import AsyncVectorEnv from gymnasium.vector.sync_vector_env import SyncVectorEnv from gymnasium.vector.vector_env import VectorEnv +from tests.testing_env import GenericTestEnv from tests.vector.utils import CustomSpace, make_env @@ -58,3 +61,65 @@ def test_custom_space_vector_env(): assert isinstance(env.single_action_space, CustomSpace) assert isinstance(env.action_space, Tuple) + + +@pytest.mark.parametrize( + "vectoriser", + ( + SyncVectorEnv, + partial(AsyncVectorEnv, shared_memory=True), + partial(AsyncVectorEnv, shared_memory=False), + ), + ids=["Sync", "Async with shared memory", "Async without shared memory"], +) +def test_final_obs_info(vectoriser): + """Tests that the vector environments correctly return the final observation and info.""" + + def reset_fn(self, seed=None, options=None): + return 0, {"reset": True} + + def thunk(): + return GenericTestEnv( + action_space=Discrete(4), + observation_space=Discrete(4), + reset_fn=reset_fn, + step_fn=lambda self, action: ( + action if action < 3 else 0, + 0, + action >= 3, + False, + {"action": action}, + ), + ) + + env = vectoriser([thunk]) + obs, info = env.reset() + assert obs == np.array([0]) and info == { + "reset": np.array([True]), + "_reset": np.array([True]), + } + + obs, _, termination, _, info = env.step([1]) + assert ( + obs == np.array([1]) + and termination == np.array([False]) + and info == {"action": np.array([1]), "_action": np.array([True])} + ) + + obs, _, termination, _, info = env.step([2]) + assert ( + obs == np.array([2]) + and termination == np.array([False]) + and info == {"action": np.array([2]), "_action": np.array([True])} + ) + + obs, _, termination, _, info = env.step([3]) + assert ( + obs == np.array([0]) + and termination == np.array([True]) + and info["reset"] == np.array([True]) + ) + assert "final_observation" in info and "final_info" in info + assert info["final_observation"] == np.array([0]) and info["final_info"] == { + "action": 3 + }