Merge branch 'Farama-Foundation:main' into main

Farama-Foundation · May 31, 2024 · a2d8e70 · a2d8e70
2 parents 13f67fe + 04fb345
commit a2d8e70
Show file tree

Hide file tree

Showing 9 changed files with 117 additions and 78 deletions.
diff --git a/gymnasium/core.py b/gymnasium/core.py
@@ -264,6 +264,10 @@ def __exit__(self, *args: Any):
         # propagate exception
         return False
 
+    def has_wrapper_attr(self, name: str) -> bool:
+        """Checks if the attribute `name` exists in the environment."""
+        return hasattr(self, name)
+
     def get_wrapper_attr(self, name: str) -> Any:
         """Gets the attribute `name` from the environment."""
         return getattr(self, name)
@@ -392,6 +396,13 @@ def wrapper_spec(cls, **kwargs: Any) -> WrapperSpec:
             kwargs=kwargs,
         )
 
+    def has_wrapper_attr(self, name: str) -> bool:
+        """Checks if the given attribute is within the wrapper or its environment."""
+        if hasattr(self, name):
+            return True
+        else:
+            return self.env.has_wrapper_attr(name)
+
     def get_wrapper_attr(self, name: str) -> Any:
         """Gets an attribute from the wrapper and lower environments if `name` doesn't exist in this object.
 

diff --git a/gymnasium/envs/box2d/lunar_lander.py b/gymnasium/envs/box2d/lunar_lander.py
@@ -95,8 +95,6 @@ class LunarLander(gym.Env, EzPickle):
     ```shell
     python gymnasium/envs/box2d/lunar_lander.py
     ```
-    <!-- To play yourself, run: -->
-    <!-- python examples/agents/keyboard_agent.py LunarLander-v3 -->
 
     ## Action Space
     There are four discrete actions available:

diff --git a/gymnasium/utils/play.py b/gymnasium/utils/play.py
@@ -70,15 +70,13 @@ def _get_relevant_keys(
         self, keys_to_action: dict[tuple[int], int] | None = None
     ) -> set:
         if keys_to_action is None:
-            if hasattr(self.env, "get_keys_to_action"):
-                keys_to_action = self.env.get_keys_to_action()
-            elif hasattr(self.env.unwrapped, "get_keys_to_action"):
-                keys_to_action = self.env.unwrapped.get_keys_to_action()
+            if self.env.has_wrapper_attr("get_keys_to_action"):
+                keys_to_action = self.env.get_wrapper_attr("get_keys_to_action")()
             else:
                 assert self.env.spec is not None
                 raise MissingKeysToAction(
                     f"{self.env.spec.id} does not have explicit key to action mapping, "
-                    "please specify one manually"
+                    "please specify one manually, `play(env, keys_to_action=...)`"
                 )
         assert isinstance(keys_to_action, dict)
         relevant_keys = set(sum((list(k) for k in keys_to_action.keys()), []))
@@ -244,10 +242,8 @@ def play(
     env.reset(seed=seed)
 
     if keys_to_action is None:
-        if hasattr(env, "get_keys_to_action"):
-            keys_to_action = env.get_keys_to_action()
-        elif hasattr(env.unwrapped, "get_keys_to_action"):
-            keys_to_action = env.unwrapped.get_keys_to_action()
+        if env.has_wrapper_attr("get_keys_to_action"):
+            keys_to_action = env.get_wrapper_attr("get_keys_to_action")()
         else:
             assert env.spec is not None
             raise MissingKeysToAction(

diff --git a/gymnasium/wrappers/common.py b/gymnasium/wrappers/common.py
@@ -165,28 +165,9 @@ def spec(self) -> EnvSpec | None:
 class Autoreset(
     gym.Wrapper[ObsType, ActType, ObsType, ActType], gym.utils.RecordConstructorArgs
 ):
-    """The wrapped environment is automatically reset when an terminated or truncated state is reached.
+    """The wrapped environment is automatically reset when a terminated or truncated state is reached.
 
-    When calling step causes :meth:`Env.step` to return `terminated=True` or `truncated=True`, :meth:`Env.reset` is called,
-    and the return format of :meth:`self.step` is as follows: ``(new_obs, final_reward, final_terminated, final_truncated, info)``
-    with new step API and ``(new_obs, final_reward, final_done, info)`` with the old step API.
-    No vector version of the wrapper exists.
-
-     - ``obs`` is the first observation after calling :meth:`self.env.reset`
-     - ``final_reward`` is the reward after calling :meth:`self.env.step`, prior to calling :meth:`self.env.reset`.
-     - ``final_terminated`` is the terminated value before calling :meth:`self.env.reset`.
-     - ``final_truncated`` is the truncated value before calling :meth:`self.env.reset`. Both `final_terminated` and `final_truncated` cannot be False.
-     - ``info`` is a dict containing all the keys from the info dict returned by the call to :meth:`self.env.reset`,
-       with an additional key "final_observation" containing the observation returned by the last call to :meth:`self.env.step`
-       and "final_info" containing the info dict returned by the last call to :meth:`self.env.step`.
-
-    Warning:
-        When using this wrapper to collect rollouts, note that when :meth:`Env.step` returns `terminated` or `truncated`, a
-        new observation from after calling :meth:`Env.reset` is returned by :meth:`Env.step` alongside the
-        final reward, terminated and truncated state from the previous episode.
-        If you need the final state from the previous episode, you need to retrieve it via the
-        "final_observation" key in the info dict.
-        Make sure you know what you're doing if you use this wrapper!
+    This follows the vector autoreset api where on the step after an episode terminates or truncated then the environment is reset.
 
     Change logs:
      * v0.24.0 - Initially added as `AutoResetWrapper`
@@ -473,10 +454,6 @@ class RecordEpisodeStatistics(
     For a vectorized environments the output will be in the form of::
 
         >>> infos = {
-        ...     "final_observation": "<array of length num-envs>",
-        ...     "_final_observation": "<boolean array of length num-envs>",
-        ...     "final_info": "<array of length num-envs>",
-        ...     "_final_info": "<boolean array of length num-envs>",
         ...     "episode": {
         ...         "r": "<array of cumulative reward>",
         ...         "l": "<array of episode length>",

diff --git a/gymnasium/wrappers/transform_observation.py b/gymnasium/wrappers/transform_observation.py
@@ -517,9 +517,14 @@ def __init__(
         self.max_obs = max_obs
 
         # Imagine the x-axis between the old Box and the y-axis being the new Box
+        # float128 is not available everywhere
+        try:
+            high_low_diff_dtype = np.float128
+        except AttributeError:
+            high_low_diff_dtype = np.float64
         high_low_diff = np.array(
-            env.observation_space.high, dtype=np.float128
-        ) - np.array(env.observation_space.low, dtype=np.float128)
+            env.observation_space.high, dtype=high_low_diff_dtype
+        ) - np.array(env.observation_space.low, dtype=high_low_diff_dtype)
         gradient = np.array(
             (max_obs - min_obs) / high_low_diff, dtype=env.observation_space.dtype
         )

diff --git a/gymnasium/wrappers/vector/rendering.py b/gymnasium/wrappers/vector/rendering.py
@@ -106,38 +106,39 @@ def _render_frame(self):
             width_ratio = subenv_size[0] / self.screen_size[0]
             height_ratio = subenv_size[1] / self.screen_size[1]
 
-            rows, cols = 1, 1
-            while rows * cols < self.num_envs:
-                row_ratio = rows * height_ratio
-                col_ratio = cols * width_ratio
+            num_rows, num_cols = 1, 1
+            while num_rows * num_cols < self.num_envs:
+                row_ratio = num_rows * height_ratio
+                col_ratio = num_cols * width_ratio
 
                 if row_ratio == col_ratio:
-                    rows, cols = rows + 1, cols + 1
+                    num_rows, num_cols = num_rows + 1, num_cols + 1
                 elif row_ratio > col_ratio:
-                    cols += 1
+                    num_cols += 1
                 else:
-                    rows += 1
-
-            self.rows = rows
-            self.cols = cols
+                    num_rows += 1
 
             scaling_factor = min(
-                self.screen_size[0] / (cols * subenv_size[0]),
-                self.screen_size[1] / (rows * subenv_size[1]),
+                self.screen_size[0] / (num_cols * subenv_size[0]),
+                self.screen_size[1] / (num_rows * subenv_size[1]),
             )
+            assert (
+                num_cols * subenv_size[0] * scaling_factor == self.screen_size[0]
+            ) or (num_rows * subenv_size[1] * scaling_factor == self.screen_size[1])
+
+            self.num_rows = num_rows
+            self.num_cols = num_cols
             self.scaled_subenv_size = (
                 int(subenv_size[0] * scaling_factor),
                 int(subenv_size[1] * scaling_factor),
             )
 
-            assert (cols * subenv_size[0] * scaling_factor == self.screen_size[0]) or (
-                rows * subenv_size[1] * scaling_factor == self.screen_size[1]
-            )
-
             assert self.num_rows * self.num_cols >= self.num_envs
             assert self.scaled_subenv_size[0] * self.num_cols <= self.screen_size[0]
             assert self.scaled_subenv_size[1] * self.num_rows <= self.screen_size[1]
 
+        # print(f'{self.num_envs=}, {self.num_rows=}, {self.num_cols=}, {self.screen_size=}, {self.scaled_subenv_size=}')
+
         try:
             import cv2
         except ImportError as e:
@@ -146,21 +147,17 @@ def _render_frame(self):
             ) from e
 
         merged_rgb_array = np.zeros(self.screen_size + (3,), dtype=np.uint8)
-        i = 0
-        for x in np.arange(
-            0, self.screen_size[0], self.scaled_subenv_size[0], dtype=np.int32
-        ):
-            for y in np.arange(
-                0, self.screen_size[1], self.scaled_subenv_size[1], dtype=np.int32
-            ):
-                scaled_render = cv2.resize(
-                    subenv_renders[i], self.scaled_subenv_size[::-1]
-                )
-                merged_rgb_array[
-                    x : x + self.scaled_subenv_size[0],
-                    y : y + self.scaled_subenv_size[1],
-                ] = scaled_render
-                i += 1
+        cols, rows = np.meshgrid(np.arange(self.num_cols), np.arange(self.num_rows))
+
+        for i, col, row in zip(range(self.num_envs), cols.flatten(), rows.flatten()):
+            scaled_render = cv2.resize(subenv_renders[i], self.scaled_subenv_size[::-1])
+            x = col * self.scaled_subenv_size[0]
+            y = row * self.scaled_subenv_size[1]
+
+            merged_rgb_array[
+                x : x + self.scaled_subenv_size[0],
+                y : y + self.scaled_subenv_size[1],
+            ] = scaled_render
 
         if self.window is None:
             pygame.init()

diff --git a/tests/test_core.py b/tests/test_core.py
@@ -168,22 +168,25 @@ def test_reward_observation_action_wrapper():
 
 def test_get_set_wrapper_attr():
     env = gym.make("CartPole-v1")
+    assert env is not env.unwrapped
 
     # Test get_wrapper_attr
     with pytest.raises(AttributeError):
         env.gravity
     assert env.unwrapped.gravity is not None
+    assert env.has_wrapper_attr("gravity")
     assert env.get_wrapper_attr("gravity") is not None
 
     with pytest.raises(AttributeError):
         env.unknown_attr
+    assert env.has_wrapper_attr("unknown_attr") is False
     with pytest.raises(AttributeError):
         env.get_wrapper_attr("unknown_attr")
 
     # Test set_wrapper_attr
     env.set_wrapper_attr("gravity", 10.0)
     with pytest.raises(AttributeError):
-        env.gravity
+        env.gravity  # checks the top level wrapper hasn't been updated
     assert env.unwrapped.gravity == 10.0
     assert env.get_wrapper_attr("gravity") == 10.0
 
@@ -195,10 +198,12 @@ def test_get_set_wrapper_attr():
     # Test with OrderEnforcing (intermediate wrapper)
     assert not isinstance(env, OrderEnforcing)
 
+    # show that the base and top level objects don't contain the attribute
     with pytest.raises(AttributeError):
         env._disable_render_order_enforcing
     with pytest.raises(AttributeError):
         env.unwrapped._disable_render_order_enforcing
+    assert env.has_wrapper_attr("_disable_render_order_enforcing")
     assert env.get_wrapper_attr("_disable_render_order_enforcing") is False
 
     env.set_wrapper_attr("_disable_render_order_enforcing", True)

diff --git a/tests/wrappers/vector/test_dict_info_to_list.py b/tests/wrappers/vector/test_dict_info_to_list.py
@@ -60,9 +60,13 @@ def test_update_info():
         "_e": np.array([True]),
     }
     _, list_info = env.reset(options=vector_infos)
+
+    # The return dtype of np.array([0]) is platform dependent
+    np_array_int_default_dtype = np.array([0]).dtype.type
+
     expected_list_info = [
         {
-            "a": np.int64(0),
+            "a": np_array_int_default_dtype(0),
             "b": np.float64(0.0),
             "c": None,
             "d": np.zeros((2,)),
@@ -90,21 +94,21 @@ def test_update_info():
     _, list_info = env.reset(options=vector_infos)
     expected_list_info = [
         {
-            "a": np.int64(0),
+            "a": np_array_int_default_dtype(0),
             "b": np.float64(0.0),
             "c": None,
             "d": np.zeros((2,)),
             "e": Discrete(1),
         },
         {
-            "a": np.int64(1),
+            "a": np_array_int_default_dtype(1),
             "b": np.float64(1.0),
             "c": None,
             "d": np.zeros((2,)),
             "e": Discrete(2),
         },
         {
-            "a": np.int64(2),
+            "a": np_array_int_default_dtype(2),
             "b": np.float64(2.0),
             "c": None,
             "d": np.zeros((2,)),
@@ -134,7 +138,7 @@ def test_update_info():
     }
     _, list_info = env.reset(options=vector_infos)
     expected_list_info = [
-        {"a": np.int64(1), "b": np.float64(1.0)},
+        {"a": np_array_int_default_dtype(1), "b": np.float64(1.0)},
         {"c": None, "d": np.zeros((2,))},
         {"e": Discrete(3)},
     ]
@@ -156,8 +160,11 @@ def test_update_info():
     }
     _, list_info = env.reset(options=vector_infos)
     expected_list_info = [
-        {"episode": {"a": np.int64(1), "b": np.float64(1.0)}},
-        {"episode": {"a": np.int64(2), "b": np.float64(2.0)}, "a": np.int64(1)},
-        {"a": np.int64(2)},
+        {"episode": {"a": np_array_int_default_dtype(1), "b": np.float64(1.0)}},
+        {
+            "episode": {"a": np_array_int_default_dtype(2), "b": np.float64(2.0)},
+            "a": np_array_int_default_dtype(1),
+        },
+        {"a": np_array_int_default_dtype(2)},
     ]
     assert data_equivalence(list_info, expected_list_info)
diff --git a/tests/wrappers/vector/test_human_rendering.py b/tests/wrappers/vector/test_human_rendering.py
@@ -0,0 +1,43 @@
+"""Test suite of HumanRendering wrapper."""
+import re
+
+import pytest
+
+import gymnasium as gym
+from gymnasium.wrappers.vector import HumanRendering
+
+
+@pytest.mark.parametrize("env_id", ["CartPole-v1", "Ant-v4"])
+@pytest.mark.parametrize("num_envs", [1, 3, 9])
+@pytest.mark.parametrize("screen_size", [None, (400, 300), (300, 600), (600, 600)])
+def test_num_envs_screen_size(env_id, num_envs, screen_size):
+    envs = gym.make_vec(env_id, num_envs=num_envs, render_mode="rgb_array")
+    envs = HumanRendering(envs, screen_size=screen_size)
+
+    assert envs.render_mode == "human"
+
+    envs.reset()
+    for _ in range(25):
+        envs.step(envs.action_space.sample())
+    envs.close()
+
+
+def test_render_modes():
+    envs = HumanRendering(
+        gym.make_vec("CartPole-v1", num_envs=3, render_mode="rgb_array_list")
+    )
+    assert envs.render_mode == "human"
+
+    envs.reset()
+    for _ in range(25):
+        envs.step(envs.action_space.sample())
+    envs.close()
+
+    # HumanRenderer on human renderer should not work
+    with pytest.raises(
+        AssertionError,
+        match=re.escape(
+            "Expected env.render_mode to be one of ['rgb_array', 'rgb_array_list', 'depth_array', 'depth_array_list'] but got 'human'"
+        ),
+    ):
+        HumanRendering(envs)