diff --git a/gymnasium/envs/box2d/lunar_lander.py b/gymnasium/envs/box2d/lunar_lander.py index 4242105a9..4aeff6ac9 100644 --- a/gymnasium/envs/box2d/lunar_lander.py +++ b/gymnasium/envs/box2d/lunar_lander.py @@ -95,8 +95,6 @@ class LunarLander(gym.Env, EzPickle): ```shell python gymnasium/envs/box2d/lunar_lander.py ``` - - ## Action Space There are four discrete actions available: diff --git a/gymnasium/wrappers/common.py b/gymnasium/wrappers/common.py index 1489e25a2..dd70ab5d7 100644 --- a/gymnasium/wrappers/common.py +++ b/gymnasium/wrappers/common.py @@ -165,28 +165,9 @@ def spec(self) -> EnvSpec | None: class Autoreset( gym.Wrapper[ObsType, ActType, ObsType, ActType], gym.utils.RecordConstructorArgs ): - """The wrapped environment is automatically reset when an terminated or truncated state is reached. + """The wrapped environment is automatically reset when a terminated or truncated state is reached. - When calling step causes :meth:`Env.step` to return `terminated=True` or `truncated=True`, :meth:`Env.reset` is called, - and the return format of :meth:`self.step` is as follows: ``(new_obs, final_reward, final_terminated, final_truncated, info)`` - with new step API and ``(new_obs, final_reward, final_done, info)`` with the old step API. - No vector version of the wrapper exists. - - - ``obs`` is the first observation after calling :meth:`self.env.reset` - - ``final_reward`` is the reward after calling :meth:`self.env.step`, prior to calling :meth:`self.env.reset`. - - ``final_terminated`` is the terminated value before calling :meth:`self.env.reset`. - - ``final_truncated`` is the truncated value before calling :meth:`self.env.reset`. Both `final_terminated` and `final_truncated` cannot be False. - - ``info`` is a dict containing all the keys from the info dict returned by the call to :meth:`self.env.reset`, - with an additional key "final_observation" containing the observation returned by the last call to :meth:`self.env.step` - and "final_info" containing the info dict returned by the last call to :meth:`self.env.step`. - - Warning: - When using this wrapper to collect rollouts, note that when :meth:`Env.step` returns `terminated` or `truncated`, a - new observation from after calling :meth:`Env.reset` is returned by :meth:`Env.step` alongside the - final reward, terminated and truncated state from the previous episode. - If you need the final state from the previous episode, you need to retrieve it via the - "final_observation" key in the info dict. - Make sure you know what you're doing if you use this wrapper! + This follows the vector autoreset api where on the step after an episode terminates or truncated then the environment is reset. Change logs: * v0.24.0 - Initially added as `AutoResetWrapper` @@ -473,10 +454,6 @@ class RecordEpisodeStatistics( For a vectorized environments the output will be in the form of:: >>> infos = { - ... "final_observation": "", - ... "_final_observation": "", - ... "final_info": "", - ... "_final_info": "", ... "episode": { ... "r": "", ... "l": "",