Skip to content

Commit

Permalink
Merge branch 'Farama-Foundation:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
Kallinteris-Andreas authored May 31, 2024
2 parents 13f67fe + 04fb345 commit a2d8e70
Show file tree
Hide file tree
Showing 9 changed files with 117 additions and 78 deletions.
11 changes: 11 additions & 0 deletions gymnasium/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,10 @@ def __exit__(self, *args: Any):
# propagate exception
return False

def has_wrapper_attr(self, name: str) -> bool:
"""Checks if the attribute `name` exists in the environment."""
return hasattr(self, name)

def get_wrapper_attr(self, name: str) -> Any:
"""Gets the attribute `name` from the environment."""
return getattr(self, name)
Expand Down Expand Up @@ -392,6 +396,13 @@ def wrapper_spec(cls, **kwargs: Any) -> WrapperSpec:
kwargs=kwargs,
)

def has_wrapper_attr(self, name: str) -> bool:
"""Checks if the given attribute is within the wrapper or its environment."""
if hasattr(self, name):
return True
else:
return self.env.has_wrapper_attr(name)

def get_wrapper_attr(self, name: str) -> Any:
"""Gets an attribute from the wrapper and lower environments if `name` doesn't exist in this object.
Expand Down
2 changes: 0 additions & 2 deletions gymnasium/envs/box2d/lunar_lander.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,6 @@ class LunarLander(gym.Env, EzPickle):
```shell
python gymnasium/envs/box2d/lunar_lander.py
```
<!-- To play yourself, run: -->
<!-- python examples/agents/keyboard_agent.py LunarLander-v3 -->
## Action Space
There are four discrete actions available:
Expand Down
14 changes: 5 additions & 9 deletions gymnasium/utils/play.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,13 @@ def _get_relevant_keys(
self, keys_to_action: dict[tuple[int], int] | None = None
) -> set:
if keys_to_action is None:
if hasattr(self.env, "get_keys_to_action"):
keys_to_action = self.env.get_keys_to_action()
elif hasattr(self.env.unwrapped, "get_keys_to_action"):
keys_to_action = self.env.unwrapped.get_keys_to_action()
if self.env.has_wrapper_attr("get_keys_to_action"):
keys_to_action = self.env.get_wrapper_attr("get_keys_to_action")()
else:
assert self.env.spec is not None
raise MissingKeysToAction(
f"{self.env.spec.id} does not have explicit key to action mapping, "
"please specify one manually"
"please specify one manually, `play(env, keys_to_action=...)`"
)
assert isinstance(keys_to_action, dict)
relevant_keys = set(sum((list(k) for k in keys_to_action.keys()), []))
Expand Down Expand Up @@ -244,10 +242,8 @@ def play(
env.reset(seed=seed)

if keys_to_action is None:
if hasattr(env, "get_keys_to_action"):
keys_to_action = env.get_keys_to_action()
elif hasattr(env.unwrapped, "get_keys_to_action"):
keys_to_action = env.unwrapped.get_keys_to_action()
if env.has_wrapper_attr("get_keys_to_action"):
keys_to_action = env.get_wrapper_attr("get_keys_to_action")()
else:
assert env.spec is not None
raise MissingKeysToAction(
Expand Down
27 changes: 2 additions & 25 deletions gymnasium/wrappers/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,28 +165,9 @@ def spec(self) -> EnvSpec | None:
class Autoreset(
gym.Wrapper[ObsType, ActType, ObsType, ActType], gym.utils.RecordConstructorArgs
):
"""The wrapped environment is automatically reset when an terminated or truncated state is reached.
"""The wrapped environment is automatically reset when a terminated or truncated state is reached.
When calling step causes :meth:`Env.step` to return `terminated=True` or `truncated=True`, :meth:`Env.reset` is called,
and the return format of :meth:`self.step` is as follows: ``(new_obs, final_reward, final_terminated, final_truncated, info)``
with new step API and ``(new_obs, final_reward, final_done, info)`` with the old step API.
No vector version of the wrapper exists.
- ``obs`` is the first observation after calling :meth:`self.env.reset`
- ``final_reward`` is the reward after calling :meth:`self.env.step`, prior to calling :meth:`self.env.reset`.
- ``final_terminated`` is the terminated value before calling :meth:`self.env.reset`.
- ``final_truncated`` is the truncated value before calling :meth:`self.env.reset`. Both `final_terminated` and `final_truncated` cannot be False.
- ``info`` is a dict containing all the keys from the info dict returned by the call to :meth:`self.env.reset`,
with an additional key "final_observation" containing the observation returned by the last call to :meth:`self.env.step`
and "final_info" containing the info dict returned by the last call to :meth:`self.env.step`.
Warning:
When using this wrapper to collect rollouts, note that when :meth:`Env.step` returns `terminated` or `truncated`, a
new observation from after calling :meth:`Env.reset` is returned by :meth:`Env.step` alongside the
final reward, terminated and truncated state from the previous episode.
If you need the final state from the previous episode, you need to retrieve it via the
"final_observation" key in the info dict.
Make sure you know what you're doing if you use this wrapper!
This follows the vector autoreset api where on the step after an episode terminates or truncated then the environment is reset.
Change logs:
* v0.24.0 - Initially added as `AutoResetWrapper`
Expand Down Expand Up @@ -473,10 +454,6 @@ class RecordEpisodeStatistics(
For a vectorized environments the output will be in the form of::
>>> infos = {
... "final_observation": "<array of length num-envs>",
... "_final_observation": "<boolean array of length num-envs>",
... "final_info": "<array of length num-envs>",
... "_final_info": "<boolean array of length num-envs>",
... "episode": {
... "r": "<array of cumulative reward>",
... "l": "<array of episode length>",
Expand Down
9 changes: 7 additions & 2 deletions gymnasium/wrappers/transform_observation.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,9 +517,14 @@ def __init__(
self.max_obs = max_obs

# Imagine the x-axis between the old Box and the y-axis being the new Box
# float128 is not available everywhere
try:
high_low_diff_dtype = np.float128
except AttributeError:
high_low_diff_dtype = np.float64
high_low_diff = np.array(
env.observation_space.high, dtype=np.float128
) - np.array(env.observation_space.low, dtype=np.float128)
env.observation_space.high, dtype=high_low_diff_dtype
) - np.array(env.observation_space.low, dtype=high_low_diff_dtype)
gradient = np.array(
(max_obs - min_obs) / high_low_diff, dtype=env.observation_space.dtype
)
Expand Down
59 changes: 28 additions & 31 deletions gymnasium/wrappers/vector/rendering.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,38 +106,39 @@ def _render_frame(self):
width_ratio = subenv_size[0] / self.screen_size[0]
height_ratio = subenv_size[1] / self.screen_size[1]

rows, cols = 1, 1
while rows * cols < self.num_envs:
row_ratio = rows * height_ratio
col_ratio = cols * width_ratio
num_rows, num_cols = 1, 1
while num_rows * num_cols < self.num_envs:
row_ratio = num_rows * height_ratio
col_ratio = num_cols * width_ratio

if row_ratio == col_ratio:
rows, cols = rows + 1, cols + 1
num_rows, num_cols = num_rows + 1, num_cols + 1
elif row_ratio > col_ratio:
cols += 1
num_cols += 1
else:
rows += 1

self.rows = rows
self.cols = cols
num_rows += 1

scaling_factor = min(
self.screen_size[0] / (cols * subenv_size[0]),
self.screen_size[1] / (rows * subenv_size[1]),
self.screen_size[0] / (num_cols * subenv_size[0]),
self.screen_size[1] / (num_rows * subenv_size[1]),
)
assert (
num_cols * subenv_size[0] * scaling_factor == self.screen_size[0]
) or (num_rows * subenv_size[1] * scaling_factor == self.screen_size[1])

self.num_rows = num_rows
self.num_cols = num_cols
self.scaled_subenv_size = (
int(subenv_size[0] * scaling_factor),
int(subenv_size[1] * scaling_factor),
)

assert (cols * subenv_size[0] * scaling_factor == self.screen_size[0]) or (
rows * subenv_size[1] * scaling_factor == self.screen_size[1]
)

assert self.num_rows * self.num_cols >= self.num_envs
assert self.scaled_subenv_size[0] * self.num_cols <= self.screen_size[0]
assert self.scaled_subenv_size[1] * self.num_rows <= self.screen_size[1]

# print(f'{self.num_envs=}, {self.num_rows=}, {self.num_cols=}, {self.screen_size=}, {self.scaled_subenv_size=}')

try:
import cv2
except ImportError as e:
Expand All @@ -146,21 +147,17 @@ def _render_frame(self):
) from e

merged_rgb_array = np.zeros(self.screen_size + (3,), dtype=np.uint8)
i = 0
for x in np.arange(
0, self.screen_size[0], self.scaled_subenv_size[0], dtype=np.int32
):
for y in np.arange(
0, self.screen_size[1], self.scaled_subenv_size[1], dtype=np.int32
):
scaled_render = cv2.resize(
subenv_renders[i], self.scaled_subenv_size[::-1]
)
merged_rgb_array[
x : x + self.scaled_subenv_size[0],
y : y + self.scaled_subenv_size[1],
] = scaled_render
i += 1
cols, rows = np.meshgrid(np.arange(self.num_cols), np.arange(self.num_rows))

for i, col, row in zip(range(self.num_envs), cols.flatten(), rows.flatten()):
scaled_render = cv2.resize(subenv_renders[i], self.scaled_subenv_size[::-1])
x = col * self.scaled_subenv_size[0]
y = row * self.scaled_subenv_size[1]

merged_rgb_array[
x : x + self.scaled_subenv_size[0],
y : y + self.scaled_subenv_size[1],
] = scaled_render

if self.window is None:
pygame.init()
Expand Down
7 changes: 6 additions & 1 deletion tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,22 +168,25 @@ def test_reward_observation_action_wrapper():

def test_get_set_wrapper_attr():
env = gym.make("CartPole-v1")
assert env is not env.unwrapped

# Test get_wrapper_attr
with pytest.raises(AttributeError):
env.gravity
assert env.unwrapped.gravity is not None
assert env.has_wrapper_attr("gravity")
assert env.get_wrapper_attr("gravity") is not None

with pytest.raises(AttributeError):
env.unknown_attr
assert env.has_wrapper_attr("unknown_attr") is False
with pytest.raises(AttributeError):
env.get_wrapper_attr("unknown_attr")

# Test set_wrapper_attr
env.set_wrapper_attr("gravity", 10.0)
with pytest.raises(AttributeError):
env.gravity
env.gravity # checks the top level wrapper hasn't been updated
assert env.unwrapped.gravity == 10.0
assert env.get_wrapper_attr("gravity") == 10.0

Expand All @@ -195,10 +198,12 @@ def test_get_set_wrapper_attr():
# Test with OrderEnforcing (intermediate wrapper)
assert not isinstance(env, OrderEnforcing)

# show that the base and top level objects don't contain the attribute
with pytest.raises(AttributeError):
env._disable_render_order_enforcing
with pytest.raises(AttributeError):
env.unwrapped._disable_render_order_enforcing
assert env.has_wrapper_attr("_disable_render_order_enforcing")
assert env.get_wrapper_attr("_disable_render_order_enforcing") is False

env.set_wrapper_attr("_disable_render_order_enforcing", True)
Expand Down
23 changes: 15 additions & 8 deletions tests/wrappers/vector/test_dict_info_to_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,13 @@ def test_update_info():
"_e": np.array([True]),
}
_, list_info = env.reset(options=vector_infos)

# The return dtype of np.array([0]) is platform dependent
np_array_int_default_dtype = np.array([0]).dtype.type

expected_list_info = [
{
"a": np.int64(0),
"a": np_array_int_default_dtype(0),
"b": np.float64(0.0),
"c": None,
"d": np.zeros((2,)),
Expand Down Expand Up @@ -90,21 +94,21 @@ def test_update_info():
_, list_info = env.reset(options=vector_infos)
expected_list_info = [
{
"a": np.int64(0),
"a": np_array_int_default_dtype(0),
"b": np.float64(0.0),
"c": None,
"d": np.zeros((2,)),
"e": Discrete(1),
},
{
"a": np.int64(1),
"a": np_array_int_default_dtype(1),
"b": np.float64(1.0),
"c": None,
"d": np.zeros((2,)),
"e": Discrete(2),
},
{
"a": np.int64(2),
"a": np_array_int_default_dtype(2),
"b": np.float64(2.0),
"c": None,
"d": np.zeros((2,)),
Expand Down Expand Up @@ -134,7 +138,7 @@ def test_update_info():
}
_, list_info = env.reset(options=vector_infos)
expected_list_info = [
{"a": np.int64(1), "b": np.float64(1.0)},
{"a": np_array_int_default_dtype(1), "b": np.float64(1.0)},
{"c": None, "d": np.zeros((2,))},
{"e": Discrete(3)},
]
Expand All @@ -156,8 +160,11 @@ def test_update_info():
}
_, list_info = env.reset(options=vector_infos)
expected_list_info = [
{"episode": {"a": np.int64(1), "b": np.float64(1.0)}},
{"episode": {"a": np.int64(2), "b": np.float64(2.0)}, "a": np.int64(1)},
{"a": np.int64(2)},
{"episode": {"a": np_array_int_default_dtype(1), "b": np.float64(1.0)}},
{
"episode": {"a": np_array_int_default_dtype(2), "b": np.float64(2.0)},
"a": np_array_int_default_dtype(1),
},
{"a": np_array_int_default_dtype(2)},
]
assert data_equivalence(list_info, expected_list_info)
43 changes: 43 additions & 0 deletions tests/wrappers/vector/test_human_rendering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
"""Test suite of HumanRendering wrapper."""
import re

import pytest

import gymnasium as gym
from gymnasium.wrappers.vector import HumanRendering


@pytest.mark.parametrize("env_id", ["CartPole-v1", "Ant-v4"])
@pytest.mark.parametrize("num_envs", [1, 3, 9])
@pytest.mark.parametrize("screen_size", [None, (400, 300), (300, 600), (600, 600)])
def test_num_envs_screen_size(env_id, num_envs, screen_size):
envs = gym.make_vec(env_id, num_envs=num_envs, render_mode="rgb_array")
envs = HumanRendering(envs, screen_size=screen_size)

assert envs.render_mode == "human"

envs.reset()
for _ in range(25):
envs.step(envs.action_space.sample())
envs.close()


def test_render_modes():
envs = HumanRendering(
gym.make_vec("CartPole-v1", num_envs=3, render_mode="rgb_array_list")
)
assert envs.render_mode == "human"

envs.reset()
for _ in range(25):
envs.step(envs.action_space.sample())
envs.close()

# HumanRenderer on human renderer should not work
with pytest.raises(
AssertionError,
match=re.escape(
"Expected env.render_mode to be one of ['rgb_array', 'rgb_array_list', 'depth_array', 'depth_array_list'] but got 'human'"
),
):
HumanRendering(envs)

0 comments on commit a2d8e70

Please sign in to comment.