Skip to content

Commit

Permalink
Merge branch 'Farama-Foundation:main' into py313
Browse files Browse the repository at this point in the history
  • Loading branch information
Kallinteris-Andreas authored Dec 4, 2024
2 parents 37240a9 + f949331 commit c20bb25
Show file tree
Hide file tree
Showing 26 changed files with 869 additions and 74 deletions.
2 changes: 1 addition & 1 deletion docs/introduction/record_agent.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ print(f'Episode lengths: {env.length_queue}')
In the script above, for the :class:`RecordVideo` wrapper, we specify three different variables: ``video_folder`` to specify the folder that the videos should be saved (change for your problem), ``name_prefix`` for the prefix of videos themselves and finally an ``episode_trigger`` such that every episode is recorded. This means that for every episode of the environment, a video will be recorded and saved in the style "cartpole-agent/eval-episode-x.mp4".
For the :class:`RecordEpisodicStatistics`, we only need to specify the buffer lengths, this is the max length of the internal ``time_queue``, ``return_queue`` and ``length_queue``. Rather than collect the data for each episode individually, we can use the data queues to print the information at the end of the evaluation.
For the :class:`RecordEpisodeStatistics`, we only need to specify the buffer lengths, this is the max length of the internal ``time_queue``, ``return_queue`` and ``length_queue``. Rather than collect the data for each episode individually, we can use the data queues to print the information at the end of the evaluation.
For speed ups in evaluating environments, it is possible to implement this with vector environments in order to evaluate ``N`` episodes at the same time in parallel rather than series.
```
Expand Down
6 changes: 3 additions & 3 deletions docs/introduction/train_agent.md
Original file line number Diff line number Diff line change
Expand Up @@ -160,17 +160,17 @@ fig, axs = plt.subplots(1, 3, figsize=(20, 8))

# np.convolve will compute the rolling mean for 100 episodes

axs[0].plot(np.convolve(env.return_queue, np.ones(100)))
axs[0].plot(np.convolve(env.return_queue, np.ones(100)/100))
axs[0].set_title("Episode Rewards")
axs[0].set_xlabel("Episode")
axs[0].set_ylabel("Reward")

axs[1].plot(np.convolve(env.length_queue, np.ones(100)))
axs[1].plot(np.convolve(env.length_queue, np.ones(100)/100))
axs[1].set_title("Episode Lengths")
axs[1].set_xlabel("Episode")
axs[1].set_ylabel("Length")

axs[2].plot(np.convolve(agent.training_error, np.ones(100)))
axs[2].plot(np.convolve(agent.training_error, np.ones(100)/100))
axs[2].set_title("Training Error")
axs[2].set_xlabel("Episode")
axs[2].set_ylabel("Temporal Difference")
Expand Down
2 changes: 1 addition & 1 deletion docs/tutorials/training_agents/blackjack_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ def decay_epsilon(self):
#


env = gym.wrappers.RecordEpisodeStatistics(env, buffer_length=n_episodes)
env = gym.wrappers.RecordEpisodeStatistics(env, deque_size=n_episodes)
for episode in tqdm(range(n_episodes)):
obs, info = env.reset()
done = False
Expand Down
4 changes: 3 additions & 1 deletion gymnasium/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,9 @@ def __init__(self, env: Env[ObsType, ActType]):
env: The environment to wrap
"""
self.env = env
assert isinstance(env, Env)
assert isinstance(
env, Env
), f"Expected env to be a `gymnasium.Env` but got {type(env)}"

self._action_space: spaces.Space[WrapperActType] | None = None
self._observation_space: spaces.Space[WrapperObsType] | None = None
Expand Down
3 changes: 2 additions & 1 deletion gymnasium/envs/classic_control/cartpole.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from gymnasium import logger, spaces
from gymnasium.envs.classic_control import utils
from gymnasium.error import DependencyNotInstalled
from gymnasium.vector import VectorEnv
from gymnasium.vector import AutoresetMode, VectorEnv
from gymnasium.vector.utils import batch_space


Expand Down Expand Up @@ -355,6 +355,7 @@ class CartPoleVectorEnv(VectorEnv):
metadata = {
"render_modes": ["rgb_array"],
"render_fps": 50,
"autoreset_mode": AutoresetMode.NEXT_STEP,
}

def __init__(
Expand Down
3 changes: 2 additions & 1 deletion gymnasium/envs/functional_jax_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from gymnasium.envs.registration import EnvSpec
from gymnasium.experimental.functional import ActType, FuncEnv, StateType
from gymnasium.utils import seeding
from gymnasium.vector import AutoresetMode
from gymnasium.vector.utils import batch_space


Expand Down Expand Up @@ -115,7 +116,7 @@ def __init__(
"""Initialize the environment from a FuncEnv."""
super().__init__()
if metadata is None:
metadata = {}
metadata = {"autoreset_mode": AutoresetMode.NEXT_STEP}
self.func_env = func_env
self.num_envs = num_envs

Expand Down
8 changes: 4 additions & 4 deletions gymnasium/envs/mujoco/mujoco_rendering.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,11 +258,13 @@ def render(

# Process rendered images according to render_mode
if render_mode in ["depth_array", "rgbd_tuple"]:
depth_img = depth_arr.reshape(self.viewport.height, self.viewport.width)
depth_img = depth_arr.reshape((self.viewport.height, self.viewport.width))
# original image is upside-down, so flip it
depth_img = depth_img[::-1, :]
if render_mode in ["rgb_array", "rgbd_tuple"]:
rgb_img = rgb_arr.reshape(self.viewport.height, self.viewport.width, 3)
rgb_img = rgb_arr.reshape((self.viewport.height, self.viewport.width, 3))
# original image is upside-down, so flip it
rgb_img = rgb_img[::-1, :]

if segmentation:
seg_img = (
Expand All @@ -281,8 +283,6 @@ def render(
seg_ids[geom.segid + 1, 0] = geom.objtype
seg_ids[geom.segid + 1, 1] = geom.objid
rgb_img = seg_ids[seg_img]
# original image is upside-down, so flip it
rgb_img = rgb_img[::-1, :, :]

# Return processed images based on render_mode
if render_mode == "rgb_array":
Expand Down
8 changes: 7 additions & 1 deletion gymnasium/envs/phys2d/cartpole.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from gymnasium.error import DependencyNotInstalled
from gymnasium.experimental.functional import ActType, FuncEnv, StateType
from gymnasium.utils import EzPickle
from gymnasium.vector import AutoresetMode


RenderStateType = Tuple["pygame.Surface", "pygame.time.Clock"] # type: ignore # noqa: F821
Expand Down Expand Up @@ -272,7 +273,12 @@ def __init__(self, render_mode: str | None = None, **kwargs: Any):
class CartPoleJaxVectorEnv(FunctionalJaxVectorEnv, EzPickle):
"""Jax-based implementation of the vectorized CartPole environment."""

metadata = {"render_modes": ["rgb_array"], "render_fps": 50, "jax": True}
metadata = {
"render_modes": ["rgb_array"],
"render_fps": 50,
"jax": True,
"autoreset_mode": AutoresetMode.NEXT_STEP,
}

def __init__(
self,
Expand Down
8 changes: 7 additions & 1 deletion gymnasium/envs/phys2d/pendulum.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from gymnasium.error import DependencyNotInstalled
from gymnasium.experimental.functional import ActType, FuncEnv, StateType
from gymnasium.utils import EzPickle
from gymnasium.vector import AutoresetMode


RenderStateType = Tuple["pygame.Surface", "pygame.time.Clock", Optional[float]] # type: ignore # noqa: F821
Expand Down Expand Up @@ -225,7 +226,12 @@ def get_default_params(self, **kwargs) -> PendulumParams:
class PendulumJaxEnv(FunctionalJaxEnv, EzPickle):
"""Jax-based pendulum environment using the functional version as base."""

metadata = {"render_modes": ["rgb_array"], "render_fps": 30, "jax": True}
metadata = {
"render_modes": ["rgb_array"],
"render_fps": 30,
"jax": True,
"autoreset_mode": AutoresetMode.NEXT_STEP,
}

def __init__(self, render_mode: str | None = None, **kwargs: Any):
"""Constructor where the kwargs are passed to the base environment to modify the parameters."""
Expand Down
11 changes: 11 additions & 0 deletions gymnasium/envs/registration.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

import gymnasium as gym
from gymnasium import Env, Wrapper, error, logger
from gymnasium.logger import warn
from gymnasium.vector import AutoresetMode


if sys.version_info < (3, 10):
Expand Down Expand Up @@ -976,6 +978,15 @@ def create_single_env() -> Env:
copied_id_spec.kwargs["wrappers"] = wrappers
env.unwrapped.spec = copied_id_spec

if "autoreset_mode" not in env.metadata:
warn(
f"The VectorEnv ({env}) is missing AutoresetMode metadata, metadata={env.metadata}"
)
elif not isinstance(env.metadata["autoreset_mode"], AutoresetMode):
warn(
f"The VectorEnv ({env}) metadata['autoreset_mode'] is not an instance of AutoresetMode, {type(env.metadata['autoreset_mode'])}."
)

return env


Expand Down
2 changes: 2 additions & 0 deletions gymnasium/envs/tabular/blackjack.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from gymnasium.error import DependencyNotInstalled
from gymnasium.experimental.functional import ActType, FuncEnv, StateType
from gymnasium.utils import EzPickle, seeding
from gymnasium.vector import AutoresetMode
from gymnasium.wrappers import HumanRendering


Expand Down Expand Up @@ -239,6 +240,7 @@ class BlackjackFunctional(
metadata = {
"render_modes": ["rgb_array"],
"render_fps": 4,
"autoreseet-mode": AutoresetMode.NEXT_STEP,
}

def transition(
Expand Down
2 changes: 2 additions & 0 deletions gymnasium/envs/tabular/cliffwalking.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from gymnasium.error import DependencyNotInstalled
from gymnasium.experimental.functional import ActType, FuncEnv, StateType
from gymnasium.utils import EzPickle
from gymnasium.vector import AutoresetMode
from gymnasium.wrappers import HumanRendering


Expand Down Expand Up @@ -136,6 +137,7 @@ class CliffWalkingFunctional(
metadata = {
"render_modes": ["rgb_array"],
"render_fps": 4,
"autoreset_mode": AutoresetMode.NEXT_STEP,
}

def transition(
Expand Down
2 changes: 2 additions & 0 deletions gymnasium/vector/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from gymnasium.vector.async_vector_env import AsyncVectorEnv
from gymnasium.vector.sync_vector_env import SyncVectorEnv
from gymnasium.vector.vector_env import (
AutoresetMode,
VectorActionWrapper,
VectorEnv,
VectorObservationWrapper,
Expand All @@ -21,4 +22,5 @@
"SyncVectorEnv",
"AsyncVectorEnv",
"utils",
"AutoresetMode",
]
87 changes: 78 additions & 9 deletions gymnasium/vector/async_vector_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
read_from_shared_memory,
write_to_shared_memory,
)
from gymnasium.vector.vector_env import ArrayType, VectorEnv
from gymnasium.vector.vector_env import ArrayType, AutoresetMode, VectorEnv


__all__ = ["AsyncVectorEnv", "AsyncState"]
Expand Down Expand Up @@ -101,6 +101,7 @@ def __init__(
| None
) = None,
observation_mode: str | Space = "same",
autoreset_mode: str | AutoresetMode = AutoresetMode.NEXT_STEP,
):
"""Vectorized environment that runs multiple environments in parallel.
Expand All @@ -120,6 +121,7 @@ def __init__(
'different' defines that there can be multiple observation spaces with different parameters though requires the same shape and dtype,
warning, may raise unexpected errors. Passing a ``Tuple[Space, Space]`` object allows defining a custom ``single_observation_space`` and
``observation_space``, warning, may raise unexpected errors.
autoreset_mode: The Autoreset Mode used, see todo for more details.
Warnings:
worker is an advanced mode option. It provides a high degree of flexibility and a high chance
Expand All @@ -135,7 +137,15 @@ def __init__(
self.env_fns = env_fns
self.shared_memory = shared_memory
self.copy = copy
self.context = context
self.daemon = daemon
self.worker = worker
self.observation_mode = observation_mode
self.autoreset_mode = (
autoreset_mode
if isinstance(autoreset_mode, AutoresetMode)
else AutoresetMode(autoreset_mode)
)

self.num_envs = len(env_fns)

Expand All @@ -145,6 +155,7 @@ def __init__(

# As we support `make_vec(spec)` then we can't include a `spec = dummy_env.spec` as this doesn't guarantee we can actual recreate the vector env.
self.metadata = dummy_env.metadata
self.metadata["autoreset_mode"] = self.autoreset_mode
self.render_mode = dummy_env.render_mode

self.single_action_space = dummy_env.action_space
Expand Down Expand Up @@ -211,6 +222,7 @@ def __init__(
parent_pipe,
_obs_buffer,
self.error_queue,
self.autoreset_mode,
),
)

Expand Down Expand Up @@ -287,9 +299,32 @@ def reset_async(
str(self._state.value),
)

for pipe, env_seed in zip(self.parent_pipes, seed):
env_kwargs = {"seed": env_seed, "options": options}
pipe.send(("reset", env_kwargs))
if options is not None and "reset_mask" in options:
reset_mask = options.pop("reset_mask")
assert isinstance(
reset_mask, np.ndarray
), f"`options['reset_mask': mask]` must be a numpy array, got {type(reset_mask)}"
assert reset_mask.shape == (
self.num_envs,
), f"`options['reset_mask': mask]` must have shape `({self.num_envs},)`, got {reset_mask.shape}"
assert (
reset_mask.dtype == np.bool_
), f"`options['reset_mask': mask]` must have `dtype=np.bool_`, got {reset_mask.dtype}"
assert np.any(
reset_mask
), f"`options['reset_mask': mask]` must contain a boolean array, got reset_mask={reset_mask}"

for pipe, env_seed, env_reset in zip(self.parent_pipes, seed, reset_mask):
if env_reset:
env_kwargs = {"seed": env_seed, "options": options}
pipe.send(("reset", env_kwargs))
else:
pipe.send(("reset-noop", None))
else:
for pipe, env_seed in zip(self.parent_pipes, seed):
env_kwargs = {"seed": env_seed, "options": options}
pipe.send(("reset", env_kwargs))

self._state = AsyncState.WAITING_RESET

def reset_wait(
Expand Down Expand Up @@ -688,11 +723,13 @@ def _async_worker(
parent_pipe: Connection,
shared_memory: multiprocessing.Array | dict[str, Any] | tuple[Any, ...],
error_queue: Queue,
autoreset_mode: AutoresetMode,
):
env = env_fn()
observation_space = env.observation_space
action_space = env.action_space
autoreset = False
observation = None

parent_pipe.close()

Expand All @@ -709,19 +746,51 @@ def _async_worker(
observation = None
autoreset = False
pipe.send(((observation, info), True))
elif command == "reset-noop":
pipe.send(((observation, {}), True))
elif command == "step":
if autoreset:
observation, info = env.reset()
reward, terminated, truncated = 0, False, False
else:
if autoreset_mode == AutoresetMode.NEXT_STEP:
if autoreset:
observation, info = env.reset()
reward, terminated, truncated = 0, False, False
else:
(
observation,
reward,
terminated,
truncated,
info,
) = env.step(data)
autoreset = terminated or truncated
elif autoreset_mode == AutoresetMode.SAME_STEP:
(
observation,
reward,
terminated,
truncated,
info,
) = env.step(data)
autoreset = terminated or truncated

if terminated or truncated:
reset_observation, reset_info = env.reset()

info = {
"final_info": info,
"final_obs": observation,
**reset_info,
}
observation = reset_observation
elif autoreset_mode == AutoresetMode.DISABLED:
assert autoreset is False
(
observation,
reward,
terminated,
truncated,
info,
) = env.step(data)
else:
raise ValueError(f"Unexpected autoreset_mode: {autoreset_mode}")

if shared_memory:
write_to_shared_memory(
Expand Down
Loading

0 comments on commit c20bb25

Please sign in to comment.