Skip to content

Commit

Permalink
Randomize LunarLander wind generation at reset to gain statistical in…
Browse files Browse the repository at this point in the history
…dependence between episodes (#959)
  • Loading branch information
TobiasKallehauge authored Mar 9, 2024
1 parent d684778 commit fd4ae52
Show file tree
Hide file tree
Showing 8 changed files with 70 additions and 36 deletions.
2 changes: 1 addition & 1 deletion docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ An API standard for reinforcement learning with a diverse collection of referenc
import gymnasium as gym
# Initialise the environment
env = gym.make("LunarLander-v2", render_mode="human")
env = gym.make("LunarLander-v3", render_mode="human")
# Reset the environment to generate the first observation
observation, info = env.reset(seed=42)
Expand Down
2 changes: 1 addition & 1 deletion docs/introduction/basic_usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ For gymnasium, the "agent-environment-loop" is implemented below for a single ep
```python
import gymnasium as gym

env = gym.make("LunarLander-v2", render_mode="human")
env = gym.make("LunarLander-v3", render_mode="human")
observation, info = env.reset()

episode_over = False
Expand Down
4 changes: 2 additions & 2 deletions docs/introduction/migration_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ Gymnasium is a fork of `OpenAI Gym v0.26 <https://github.com/openai/gym/releases

```python
import gym
env = gym.make("LunarLander-v2", options={})
env = gym.make("LunarLander-v3", options={})
env.seed(123)
observation = env.reset()

Expand All @@ -33,7 +33,7 @@ env.close()

```python
import gym
env = gym.make("LunarLander-v2", render_mode="human")
env = gym.make("LunarLander-v3", render_mode="human")
observation, info = env.reset(seed=123, options={})

done = False
Expand Down
28 changes: 14 additions & 14 deletions docs/tutorials/gymnasium_basics/vector_envs_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ def update_parameters(
# The simplest way to create vector environments is by calling `gym.vector.make`, which creates multiple instances of the same environment:
#

envs = gym.vector.make("LunarLander-v2", num_envs=3, max_episode_steps=600)
envs = gym.vector.make("LunarLander-v3", num_envs=3, max_episode_steps=600)


# %%
Expand All @@ -277,29 +277,29 @@ def update_parameters(
# If we want to randomize the environment for training to get more robust agents (that can deal with different parameterizations of an environment
# and theirfore might have a higher degree of generalization), we can set the desired parameters manually or use a pseudo-random number generator to generate them.
#
# Manually setting up 3 parallel 'LunarLander-v2' envs with different parameters:
# Manually setting up 3 parallel 'LunarLander-v3' envs with different parameters:


envs = gym.vector.AsyncVectorEnv(
[
lambda: gym.make(
"LunarLander-v2",
"LunarLander-v3",
gravity=-10.0,
enable_wind=True,
wind_power=15.0,
turbulence_power=1.5,
max_episode_steps=600,
),
lambda: gym.make(
"LunarLander-v2",
"LunarLander-v3",
gravity=-9.8,
enable_wind=True,
wind_power=10.0,
turbulence_power=1.3,
max_episode_steps=600,
),
lambda: gym.make(
"LunarLander-v2", gravity=-7.0, enable_wind=False, max_episode_steps=600
"LunarLander-v3", gravity=-7.0, enable_wind=False, max_episode_steps=600
),
]
)
Expand All @@ -309,14 +309,14 @@ def update_parameters(
#
# ------------------------------
#
# Randomly generating the parameters for 3 parallel 'LunarLander-v2' envs, using `np.clip` to stay in the recommended parameter space:
# Randomly generating the parameters for 3 parallel 'LunarLander-v3' envs, using `np.clip` to stay in the recommended parameter space:
#


envs = gym.vector.AsyncVectorEnv(
[
lambda: gym.make(
"LunarLander-v2",
"LunarLander-v3",
gravity=np.clip(
np.random.normal(loc=-10.0, scale=1.0), a_min=-11.99, a_max=-0.01
),
Expand Down Expand Up @@ -374,7 +374,7 @@ def update_parameters(
envs = gym.vector.AsyncVectorEnv(
[
lambda: gym.make(
"LunarLander-v2",
"LunarLander-v3",
gravity=np.clip(
np.random.normal(loc=-10.0, scale=1.0), a_min=-11.99, a_max=-0.01
),
Expand All @@ -392,7 +392,7 @@ def update_parameters(
)

else:
envs = gym.vector.make("LunarLander-v2", num_envs=n_envs, max_episode_steps=600)
envs = gym.vector.make("LunarLander-v3", num_envs=n_envs, max_episode_steps=600)


obs_shape = envs.single_observation_space.shape[0]
Expand Down Expand Up @@ -499,7 +499,7 @@ def update_parameters(
rolling_length = 20
fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(12, 5))
fig.suptitle(
f"Training plots for {agent.__class__.__name__} in the LunarLander-v2 environment \n \
f"Training plots for {agent.__class__.__name__} in the LunarLander-v3 environment \n \
(n_envs={n_envs}, n_steps_per_update={n_steps_per_update}, randomize_domain={randomize_domain})"
)

Expand Down Expand Up @@ -606,7 +606,7 @@ def update_parameters(
# because the gradients of the environments are good enough after a relatively low number of environments
# (especially if the environment is not very complex). In this case, increasing the number of environments
# does not increase the learning speed, and actually increases the runtime, possibly due to the additional time
# needed to calculate the gradients. For LunarLander-v2, the best performing configuration used a AsyncVectorEnv
# needed to calculate the gradients. For LunarLander-v3, the best performing configuration used a AsyncVectorEnv
# with 10 parallel environments, but environments with a higher complexity may require more
# parallel environments to achieve optimal performance.
#
Expand Down Expand Up @@ -662,7 +662,7 @@ def update_parameters(
# create a new sample environment to get new random parameters
if randomize_domain:
env = gym.make(
"LunarLander-v2",
"LunarLander-v3",
render_mode="human",
gravity=np.clip(
np.random.normal(loc=-10.0, scale=2.0), a_min=-11.99, a_max=-0.01
Expand All @@ -677,7 +677,7 @@ def update_parameters(
max_episode_steps=500,
)
else:
env = gym.make("LunarLander-v2", render_mode="human", max_episode_steps=500)
env = gym.make("LunarLander-v3", render_mode="human", max_episode_steps=500)

# get an initial state
state, info = env.reset()
Expand Down Expand Up @@ -705,7 +705,7 @@ def update_parameters(

# from gymnasium.utils.play import play
#
# play(gym.make('LunarLander-v2', render_mode='rgb_array'),
# play(gym.make('LunarLander-v3', render_mode='rgb_array'),
# keys_to_action={'w': 2, 'a': 1, 'd': 3}, noop=0)


Expand Down
4 changes: 2 additions & 2 deletions gymnasium/envs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,14 +81,14 @@
# ----------------------------------------

register(
id="LunarLander-v2",
id="LunarLander-v3",
entry_point="gymnasium.envs.box2d.lunar_lander:LunarLander",
max_episode_steps=1000,
reward_threshold=200,
)

register(
id="LunarLanderContinuous-v2",
id="LunarLanderContinuous-v3",
entry_point="gymnasium.envs.box2d.lunar_lander:LunarLander",
kwargs={"continuous": True},
max_episode_steps=1000,
Expand Down
17 changes: 10 additions & 7 deletions gymnasium/envs/box2d/lunar_lander.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ class LunarLander(gym.Env, EzPickle):
python gymnasium/envs/box2d/lunar_lander.py
```
<!-- To play yourself, run: -->
<!-- python examples/agents/keyboard_agent.py LunarLander-v2 -->
<!-- python examples/agents/keyboard_agent.py LunarLander-v3 -->
## Action Space
There are four discrete actions available:
Expand Down Expand Up @@ -150,10 +150,10 @@ class LunarLander(gym.Env, EzPickle):
```python
>>> import gymnasium as gym
>>> env = gym.make("LunarLander-v2", continuous=False, gravity=-10.0,
>>> env = gym.make("LunarLander-v3", continuous=False, gravity=-10.0,
... enable_wind=False, wind_power=15.0, turbulence_power=1.5)
>>> env
<TimeLimit<OrderEnforcing<PassiveEnvChecker<LunarLander<LunarLander-v2>>>>>
<TimeLimit<OrderEnforcing<PassiveEnvChecker<LunarLander<LunarLander-v3>>>>>
```
Expand All @@ -179,6 +179,7 @@ class LunarLander(gym.Env, EzPickle):
The recommended value for `turbulence_power` is between 0.0 and 2.0.
## Version History
- v3: Reset wind and turbulence offset (`C`) whenever the environment is reset to ensure statistical independence between consecutive episodes (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium/issues/954)).
- v2: Count energy spent and in v0.24, added turbulence with wind power and turbulence_power parameters
- v1: Legs contact with ground added in state vector; contact with ground give +10 reward points,
and -10 if then lose contact; reward renormalized to 200; harder initial random push.
Expand Down Expand Up @@ -254,8 +255,6 @@ def __init__(
self.turbulence_power = turbulence_power

self.enable_wind = enable_wind
self.wind_idx = np.random.randint(-9999, 9999)
self.torque_idx = np.random.randint(-9999, 9999)

self.screen: pygame.Surface = None
self.clock = None
Expand Down Expand Up @@ -403,6 +402,10 @@ def reset(
True,
)

if self.enable_wind: # Initialize wind pattern based on index
self.wind_idx = self.np_random.integers(-9999, 9999)
self.torque_idx = self.np_random.integers(-9999, 9999)

# Create Lander Legs
self.legs = []
for i in [-1, +1]:
Expand Down Expand Up @@ -872,10 +875,10 @@ def __init__(self):
"Error initializing LunarLanderContinuous Environment.\n"
"Currently, we do not support initializing this mode of environment by calling the class directly.\n"
"To use this environment, instead create it by specifying the continuous keyword in gym.make, i.e.\n"
'gym.make("LunarLander-v2", continuous=True)'
'gym.make("LunarLander-v3", continuous=True)'
)


if __name__ == "__main__":
env = gym.make("LunarLander-v2", render_mode="rgb_array")
env = gym.make("LunarLander-v3", render_mode="rgb_array")
demo_heuristic_lander(env, render=True)
16 changes: 8 additions & 8 deletions gymnasium/wrappers/rendering.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class RenderCollection(
Example:
Return the list of frames for the number of steps ``render`` wasn't called.
>>> import gymnasium as gym
>>> env = gym.make("LunarLander-v2", render_mode="rgb_array")
>>> env = gym.make("LunarLander-v3", render_mode="rgb_array")
>>> env = RenderCollection(env)
>>> _ = env.reset(seed=123)
>>> for _ in range(5):
Expand All @@ -51,7 +51,7 @@ class RenderCollection(
Return the list of frames for the number of steps the episode was running.
>>> import gymnasium as gym
>>> env = gym.make("LunarLander-v2", render_mode="rgb_array")
>>> env = gym.make("LunarLander-v3", render_mode="rgb_array")
>>> env = RenderCollection(env, pop_frames=False)
>>> _ = env.reset(seed=123)
>>> for _ in range(5):
Expand All @@ -67,7 +67,7 @@ class RenderCollection(
Collect all frames for all episodes, without clearing them when render is called
>>> import gymnasium as gym
>>> env = gym.make("LunarLander-v2", render_mode="rgb_array")
>>> env = gym.make("LunarLander-v3", render_mode="rgb_array")
>>> env = RenderCollection(env, pop_frames=False, reset_clean=False)
>>> _ = env.reset(seed=123)
>>> for _ in range(5):
Expand Down Expand Up @@ -177,7 +177,7 @@ class RecordVideo(
Examples - Run the environment for 50 episodes, and save the video every 10 episodes starting from the 0th:
>>> import os
>>> import gymnasium as gym
>>> env = gym.make("LunarLander-v2", render_mode="rgb_array")
>>> env = gym.make("LunarLander-v3", render_mode="rgb_array")
>>> trigger = lambda t: t % 10 == 0
>>> env = RecordVideo(env, video_folder="./save_videos1", episode_trigger=trigger, disable_logger=True)
>>> for i in range(50):
Expand All @@ -193,7 +193,7 @@ class RecordVideo(
Examples - Run the environment for 5 episodes, start a recording every 200th step, making sure each video is 100 frames long:
>>> import os
>>> import gymnasium as gym
>>> env = gym.make("LunarLander-v2", render_mode="rgb_array")
>>> env = gym.make("LunarLander-v3", render_mode="rgb_array")
>>> trigger = lambda t: t % 200 == 0
>>> env = RecordVideo(env, video_folder="./save_videos2", step_trigger=trigger, video_length=100, disable_logger=True)
>>> for i in range(5):
Expand All @@ -210,7 +210,7 @@ class RecordVideo(
Examples - Run 3 episodes, record everything, but in chunks of 1000 frames:
>>> import os
>>> import gymnasium as gym
>>> env = gym.make("LunarLander-v2", render_mode="rgb_array")
>>> env = gym.make("LunarLander-v3", render_mode="rgb_array")
>>> env = RecordVideo(env, video_folder="./save_videos3", video_length=1000, disable_logger=True)
>>> for i in range(3):
... termination, truncation = False, False
Expand Down Expand Up @@ -432,7 +432,7 @@ class HumanRendering(
Example:
>>> import gymnasium as gym
>>> from gymnasium.wrappers import HumanRendering
>>> env = gym.make("LunarLander-v2", render_mode="rgb_array")
>>> env = gym.make("LunarLander-v3", render_mode="rgb_array")
>>> wrapped = HumanRendering(env)
>>> obs, _ = wrapped.reset() # This will start rendering to the screen
Expand All @@ -446,7 +446,7 @@ class HumanRendering(
Warning: If the base environment uses ``render_mode="rgb_array_list"``, its (i.e. the *base environment's*) render method
will always return an empty list:
>>> env = gym.make("LunarLander-v2", render_mode="rgb_array_list")
>>> env = gym.make("LunarLander-v3", render_mode="rgb_array_list")
>>> wrapped = HumanRendering(env)
>>> obs, _ = wrapped.reset()
>>> env.render() # env.render() will always return an empty list!
Expand Down
33 changes: 32 additions & 1 deletion tests/envs/test_env_implementation.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,42 @@

def test_lunar_lander_heuristics():
"""Tests the LunarLander environment by checking if the heuristic lander works."""
lunar_lander = gym.make("LunarLander-v2", disable_env_checker=True)
lunar_lander = gym.make("LunarLander-v3", disable_env_checker=True)
total_reward = demo_heuristic_lander(lunar_lander, seed=1)
assert total_reward > 100


@pytest.mark.parametrize("seed", [0, 10, 20, 30, 40])
def test_lunar_lander_random_wind_seed(seed: int):
"""Test that the wind_idx and torque are correctly drawn when setting a seed"""

lunar_lander = gym.make(
"LunarLander-v3", disable_env_checker=True, enable_wind=True
).unwrapped
lunar_lander.reset(seed=seed)

# Test that same seed gives same wind
w1, t1 = lunar_lander.wind_idx, lunar_lander.torque_idx
lunar_lander.reset(seed=seed)
w2, t2 = lunar_lander.wind_idx, lunar_lander.torque_idx
assert (
w1 == w2 and t1 == t2
), "Setting same seed caused different initial wind or torque index"

# Test that different seed gives different wind
# There is a small chance that different seeds causes same number so test
# 10 times (with different seeds) to make this chance incredibly tiny.
for i in range(1, 11):
lunar_lander.reset(seed=seed + i)
w3, t3 = lunar_lander.wind_idx, lunar_lander.torque_idx
if w2 != w3 and t1 != t3: # Found different initial values
break
else: # no break
raise AssertionError(
"Setting different seed caused same initial wind or torque index"
)


def test_carracing_domain_randomize():
"""Tests the CarRacing Environment domain randomization.
Expand Down

0 comments on commit fd4ae52

Please sign in to comment.