From fd4ae5204558aa2871276ce4353056ac329534cb Mon Sep 17 00:00:00 2001 From: TobiasKallehauge Date: Sat, 9 Mar 2024 10:42:08 +0100 Subject: [PATCH] Randomize LunarLander wind generation at reset to gain statistical independence between episodes (#959) --- docs/index.md | 2 +- docs/introduction/basic_usage.md | 2 +- docs/introduction/migration_guide.md | 4 +-- .../gymnasium_basics/vector_envs_tutorial.py | 28 ++++++++-------- gymnasium/envs/__init__.py | 4 +-- gymnasium/envs/box2d/lunar_lander.py | 17 ++++++---- gymnasium/wrappers/rendering.py | 16 ++++----- tests/envs/test_env_implementation.py | 33 ++++++++++++++++++- 8 files changed, 70 insertions(+), 36 deletions(-) diff --git a/docs/index.md b/docs/index.md index 208535862..1a340a49e 100644 --- a/docs/index.md +++ b/docs/index.md @@ -23,7 +23,7 @@ An API standard for reinforcement learning with a diverse collection of referenc import gymnasium as gym # Initialise the environment -env = gym.make("LunarLander-v2", render_mode="human") +env = gym.make("LunarLander-v3", render_mode="human") # Reset the environment to generate the first observation observation, info = env.reset(seed=42) diff --git a/docs/introduction/basic_usage.md b/docs/introduction/basic_usage.md index dec6861d2..87bb2e608 100644 --- a/docs/introduction/basic_usage.md +++ b/docs/introduction/basic_usage.md @@ -54,7 +54,7 @@ For gymnasium, the "agent-environment-loop" is implemented below for a single ep ```python import gymnasium as gym -env = gym.make("LunarLander-v2", render_mode="human") +env = gym.make("LunarLander-v3", render_mode="human") observation, info = env.reset() episode_over = False diff --git a/docs/introduction/migration_guide.md b/docs/introduction/migration_guide.md index 895104bc5..267fd172a 100644 --- a/docs/introduction/migration_guide.md +++ b/docs/introduction/migration_guide.md @@ -15,7 +15,7 @@ Gymnasium is a fork of `OpenAI Gym v0.26 - + ## Action Space There are four discrete actions available: @@ -150,10 +150,10 @@ class LunarLander(gym.Env, EzPickle): ```python >>> import gymnasium as gym - >>> env = gym.make("LunarLander-v2", continuous=False, gravity=-10.0, + >>> env = gym.make("LunarLander-v3", continuous=False, gravity=-10.0, ... enable_wind=False, wind_power=15.0, turbulence_power=1.5) >>> env - >>>> + >>>> ``` @@ -179,6 +179,7 @@ class LunarLander(gym.Env, EzPickle): The recommended value for `turbulence_power` is between 0.0 and 2.0. ## Version History + - v3: Reset wind and turbulence offset (`C`) whenever the environment is reset to ensure statistical independence between consecutive episodes (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium/issues/954)). - v2: Count energy spent and in v0.24, added turbulence with wind power and turbulence_power parameters - v1: Legs contact with ground added in state vector; contact with ground give +10 reward points, and -10 if then lose contact; reward renormalized to 200; harder initial random push. @@ -254,8 +255,6 @@ def __init__( self.turbulence_power = turbulence_power self.enable_wind = enable_wind - self.wind_idx = np.random.randint(-9999, 9999) - self.torque_idx = np.random.randint(-9999, 9999) self.screen: pygame.Surface = None self.clock = None @@ -403,6 +402,10 @@ def reset( True, ) + if self.enable_wind: # Initialize wind pattern based on index + self.wind_idx = self.np_random.integers(-9999, 9999) + self.torque_idx = self.np_random.integers(-9999, 9999) + # Create Lander Legs self.legs = [] for i in [-1, +1]: @@ -872,10 +875,10 @@ def __init__(self): "Error initializing LunarLanderContinuous Environment.\n" "Currently, we do not support initializing this mode of environment by calling the class directly.\n" "To use this environment, instead create it by specifying the continuous keyword in gym.make, i.e.\n" - 'gym.make("LunarLander-v2", continuous=True)' + 'gym.make("LunarLander-v3", continuous=True)' ) if __name__ == "__main__": - env = gym.make("LunarLander-v2", render_mode="rgb_array") + env = gym.make("LunarLander-v3", render_mode="rgb_array") demo_heuristic_lander(env, render=True) diff --git a/gymnasium/wrappers/rendering.py b/gymnasium/wrappers/rendering.py index 6b1afade9..591ce7861 100644 --- a/gymnasium/wrappers/rendering.py +++ b/gymnasium/wrappers/rendering.py @@ -35,7 +35,7 @@ class RenderCollection( Example: Return the list of frames for the number of steps ``render`` wasn't called. >>> import gymnasium as gym - >>> env = gym.make("LunarLander-v2", render_mode="rgb_array") + >>> env = gym.make("LunarLander-v3", render_mode="rgb_array") >>> env = RenderCollection(env) >>> _ = env.reset(seed=123) >>> for _ in range(5): @@ -51,7 +51,7 @@ class RenderCollection( Return the list of frames for the number of steps the episode was running. >>> import gymnasium as gym - >>> env = gym.make("LunarLander-v2", render_mode="rgb_array") + >>> env = gym.make("LunarLander-v3", render_mode="rgb_array") >>> env = RenderCollection(env, pop_frames=False) >>> _ = env.reset(seed=123) >>> for _ in range(5): @@ -67,7 +67,7 @@ class RenderCollection( Collect all frames for all episodes, without clearing them when render is called >>> import gymnasium as gym - >>> env = gym.make("LunarLander-v2", render_mode="rgb_array") + >>> env = gym.make("LunarLander-v3", render_mode="rgb_array") >>> env = RenderCollection(env, pop_frames=False, reset_clean=False) >>> _ = env.reset(seed=123) >>> for _ in range(5): @@ -177,7 +177,7 @@ class RecordVideo( Examples - Run the environment for 50 episodes, and save the video every 10 episodes starting from the 0th: >>> import os >>> import gymnasium as gym - >>> env = gym.make("LunarLander-v2", render_mode="rgb_array") + >>> env = gym.make("LunarLander-v3", render_mode="rgb_array") >>> trigger = lambda t: t % 10 == 0 >>> env = RecordVideo(env, video_folder="./save_videos1", episode_trigger=trigger, disable_logger=True) >>> for i in range(50): @@ -193,7 +193,7 @@ class RecordVideo( Examples - Run the environment for 5 episodes, start a recording every 200th step, making sure each video is 100 frames long: >>> import os >>> import gymnasium as gym - >>> env = gym.make("LunarLander-v2", render_mode="rgb_array") + >>> env = gym.make("LunarLander-v3", render_mode="rgb_array") >>> trigger = lambda t: t % 200 == 0 >>> env = RecordVideo(env, video_folder="./save_videos2", step_trigger=trigger, video_length=100, disable_logger=True) >>> for i in range(5): @@ -210,7 +210,7 @@ class RecordVideo( Examples - Run 3 episodes, record everything, but in chunks of 1000 frames: >>> import os >>> import gymnasium as gym - >>> env = gym.make("LunarLander-v2", render_mode="rgb_array") + >>> env = gym.make("LunarLander-v3", render_mode="rgb_array") >>> env = RecordVideo(env, video_folder="./save_videos3", video_length=1000, disable_logger=True) >>> for i in range(3): ... termination, truncation = False, False @@ -432,7 +432,7 @@ class HumanRendering( Example: >>> import gymnasium as gym >>> from gymnasium.wrappers import HumanRendering - >>> env = gym.make("LunarLander-v2", render_mode="rgb_array") + >>> env = gym.make("LunarLander-v3", render_mode="rgb_array") >>> wrapped = HumanRendering(env) >>> obs, _ = wrapped.reset() # This will start rendering to the screen @@ -446,7 +446,7 @@ class HumanRendering( Warning: If the base environment uses ``render_mode="rgb_array_list"``, its (i.e. the *base environment's*) render method will always return an empty list: - >>> env = gym.make("LunarLander-v2", render_mode="rgb_array_list") + >>> env = gym.make("LunarLander-v3", render_mode="rgb_array_list") >>> wrapped = HumanRendering(env) >>> obs, _ = wrapped.reset() >>> env.render() # env.render() will always return an empty list! diff --git a/tests/envs/test_env_implementation.py b/tests/envs/test_env_implementation.py index 22677b328..e75982b86 100644 --- a/tests/envs/test_env_implementation.py +++ b/tests/envs/test_env_implementation.py @@ -12,11 +12,42 @@ def test_lunar_lander_heuristics(): """Tests the LunarLander environment by checking if the heuristic lander works.""" - lunar_lander = gym.make("LunarLander-v2", disable_env_checker=True) + lunar_lander = gym.make("LunarLander-v3", disable_env_checker=True) total_reward = demo_heuristic_lander(lunar_lander, seed=1) assert total_reward > 100 +@pytest.mark.parametrize("seed", [0, 10, 20, 30, 40]) +def test_lunar_lander_random_wind_seed(seed: int): + """Test that the wind_idx and torque are correctly drawn when setting a seed""" + + lunar_lander = gym.make( + "LunarLander-v3", disable_env_checker=True, enable_wind=True + ).unwrapped + lunar_lander.reset(seed=seed) + + # Test that same seed gives same wind + w1, t1 = lunar_lander.wind_idx, lunar_lander.torque_idx + lunar_lander.reset(seed=seed) + w2, t2 = lunar_lander.wind_idx, lunar_lander.torque_idx + assert ( + w1 == w2 and t1 == t2 + ), "Setting same seed caused different initial wind or torque index" + + # Test that different seed gives different wind + # There is a small chance that different seeds causes same number so test + # 10 times (with different seeds) to make this chance incredibly tiny. + for i in range(1, 11): + lunar_lander.reset(seed=seed + i) + w3, t3 = lunar_lander.wind_idx, lunar_lander.torque_idx + if w2 != w3 and t1 != t3: # Found different initial values + break + else: # no break + raise AssertionError( + "Setting different seed caused same initial wind or torque index" + ) + + def test_carracing_domain_randomize(): """Tests the CarRacing Environment domain randomization.