Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change end-of-episode in CarRacing to termination as opposed to truncation #813

Merged
merged 5 commits into from
Aug 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/introduction/basic_usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ In order to wrap an environment, you must first initialize a base environment. T
```python
>>> import gymnasium as gym
>>> from gymnasium.wrappers import FlattenObservation
>>> env = gym.make("CarRacing-v2")
>>> env = gym.make("CarRacing-v3")
>>> env.observation_space.shape
(96, 96, 3)
>>> wrapped_env = FlattenObservation(env)
Expand Down Expand Up @@ -156,7 +156,7 @@ If you have a wrapped environment, and you want to get the unwrapped environment

```python
>>> wrapped_env
<FlattenObservation<TimeLimit<OrderEnforcing<PassiveEnvChecker<CarRacing<CarRacing-v2>>>>>>
<FlattenObservation<TimeLimit<OrderEnforcing<PassiveEnvChecker<CarRacing<CarRacing-v3>>>>>>
>>> wrapped_env.unwrapped
<gymnasium.envs.box2d.car_racing.CarRacing object at 0x7f04efcb8850>
```
Expand Down
2 changes: 1 addition & 1 deletion gymnasium/envs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@
)

register(
id="CarRacing-v2",
id="CarRacing-v3",
entry_point="gymnasium.envs.box2d.car_racing:CarRacing",
max_episode_steps=1000,
reward_threshold=900,
Expand Down
18 changes: 10 additions & 8 deletions gymnasium/envs/box2d/car_racing.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,9 @@ class CarRacing(gym.Env, EzPickle):

```python
>>> import gymnasium as gym
>>> env = gym.make("CarRacing-v2", render_mode="rgb_array", lap_complete_percent=0.95, domain_randomize=False, continuous=False)
>>> env = gym.make("CarRacing-v3", render_mode="rgb_array", lap_complete_percent=0.95, domain_randomize=False, continuous=False)
>>> env
<TimeLimit<OrderEnforcing<PassiveEnvChecker<CarRacing<CarRacing-v2>>>>>
<TimeLimit<OrderEnforcing<PassiveEnvChecker<CarRacing<CarRacing-v3>>>>>

```

Expand All @@ -176,7 +176,7 @@ class CarRacing(gym.Env, EzPickle):

```python
>>> import gymnasium as gym
>>> env = gym.make("CarRacing-v2", domain_randomize=True)
>>> env = gym.make("CarRacing-v3", domain_randomize=True)

# normal reset, this changes the colour scheme by default
>>> obs, _ = env.reset()
Expand All @@ -190,6 +190,7 @@ class CarRacing(gym.Env, EzPickle):
```

## Version History
- v2: Change truncation to termination when finishing the lap (1.0.0)
- v1: Change track completion logic and add domain randomization (0.24.0)
- v0: Original version

Expand Down Expand Up @@ -564,6 +565,7 @@ def step(self, action: Union[np.ndarray, int]):
step_reward = 0
terminated = False
truncated = False
info = {}
if action is not None: # First step without action, called from reset()
self.reward -= 0.1
# We actually don't want to count fuel spent, we want car to be faster.
Expand All @@ -572,18 +574,18 @@ def step(self, action: Union[np.ndarray, int]):
step_reward = self.reward - self.prev_reward
self.prev_reward = self.reward
if self.tile_visited_count == len(self.track) or self.new_lap:
# Truncation due to finishing lap
# This should not be treated as a failure
# but like a timeout
truncated = True
# Termination due to finishing lap
terminated = True
info["lap_finished"] = True
x, y = self.car.hull.position
if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
terminated = True
info["lap_finished"] = False
step_reward = -100

if self.render_mode == "human":
self.render()
return self.state, step_reward, terminated, truncated, {}
return self.state, step_reward, terminated, truncated, info

def render(self):
if self.render_mode is None:
Expand Down
2 changes: 1 addition & 1 deletion gymnasium/utils/play.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ def play(
>>> import gymnasium as gym
>>> import numpy as np
>>> from gymnasium.utils.play import play
>>> play(gym.make("CarRacing-v2", render_mode="rgb_array"), # doctest: +SKIP
>>> play(gym.make("CarRacing-v3", render_mode="rgb_array"), # doctest: +SKIP
... keys_to_action={
... "w": np.array([0, 0.7, 0], dtype=np.float32),
... "a": np.array([-1, 0, 0], dtype=np.float32),
Expand Down
2 changes: 1 addition & 1 deletion gymnasium/wrappers/stateful_observation.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ class FrameStackObservation(
Example:
>>> import gymnasium as gym
>>> from gymnasium.wrappers import FrameStackObservation
>>> env = gym.make("CarRacing-v2")
>>> env = gym.make("CarRacing-v3")
>>> env = FrameStackObservation(env, stack_size=4)
>>> env.observation_space
Box(0, 255, (4, 96, 96, 3), uint8)
Expand Down
8 changes: 4 additions & 4 deletions gymnasium/wrappers/transform_observation.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ class FlattenObservation(
Example:
>>> import gymnasium as gym
>>> from gymnasium.wrappers import FlattenObservation
>>> env = gym.make("CarRacing-v2")
>>> env = gym.make("CarRacing-v3")
>>> env.observation_space.shape
(96, 96, 3)
>>> env = FlattenObservation(env)
Expand Down Expand Up @@ -267,7 +267,7 @@ class GrayscaleObservation(
Example:
>>> import gymnasium as gym
>>> from gymnasium.wrappers import GrayscaleObservation
>>> env = gym.make("CarRacing-v2")
>>> env = gym.make("CarRacing-v3")
>>> env.observation_space.shape
(96, 96, 3)
>>> grayscale_env = GrayscaleObservation(env)
Expand Down Expand Up @@ -345,7 +345,7 @@ class ResizeObservation(
Example:
>>> import gymnasium as gym
>>> from gymnasium.wrappers import ResizeObservation
>>> env = gym.make("CarRacing-v2")
>>> env = gym.make("CarRacing-v3")
>>> env.observation_space.shape
(96, 96, 3)
>>> resized_env = ResizeObservation(env, (32, 32))
Expand Down Expand Up @@ -416,7 +416,7 @@ class ReshapeObservation(
Example:
>>> import gymnasium as gym
>>> from gymnasium.wrappers import ReshapeObservation
>>> env = gym.make("CarRacing-v2")
>>> env = gym.make("CarRacing-v3")
>>> env.observation_space.shape
(96, 96, 3)
>>> reshape_env = ReshapeObservation(env, (24, 4, 96, 1, 3))
Expand Down
8 changes: 4 additions & 4 deletions gymnasium/wrappers/vector/vectorize_observation.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ class FlattenObservation(VectorizeTransformObservation):

Example:
>>> import gymnasium as gym
>>> envs = gym.make_vec("CarRacing-v2", num_envs=3, vectorization_mode="sync")
>>> envs = gym.make_vec("CarRacing-v3", num_envs=3, vectorization_mode="sync")
>>> obs, info = envs.reset(seed=123)
>>> obs.shape
(3, 96, 96, 3)
Expand All @@ -238,7 +238,7 @@ class GrayscaleObservation(VectorizeTransformObservation):

Example:
>>> import gymnasium as gym
>>> envs = gym.make_vec("CarRacing-v2", num_envs=3, vectorization_mode="sync")
>>> envs = gym.make_vec("CarRacing-v3", num_envs=3, vectorization_mode="sync")
>>> obs, info = envs.reset(seed=123)
>>> obs.shape
(3, 96, 96, 3)
Expand Down Expand Up @@ -266,7 +266,7 @@ class ResizeObservation(VectorizeTransformObservation):

Example:
>>> import gymnasium as gym
>>> envs = gym.make_vec("CarRacing-v2", num_envs=3, vectorization_mode="sync")
>>> envs = gym.make_vec("CarRacing-v3", num_envs=3, vectorization_mode="sync")
>>> obs, info = envs.reset(seed=123)
>>> obs.shape
(3, 96, 96, 3)
Expand All @@ -292,7 +292,7 @@ class ReshapeObservation(VectorizeTransformObservation):

Example:
>>> import gymnasium as gym
>>> envs = gym.make_vec("CarRacing-v2", num_envs=3, vectorization_mode="sync")
>>> envs = gym.make_vec("CarRacing-v3", num_envs=3, vectorization_mode="sync")
>>> obs, info = envs.reset(seed=123)
>>> obs.shape
(3, 96, 96, 3)
Expand Down
2 changes: 1 addition & 1 deletion tests/envs/registration/test_make.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ def test_make_human_rendering(register_rendering_testing_envs):
TypeError,
match=re.escape("got an unexpected keyword argument 'render'"),
):
gym.make("CarRacing-v2", render="human")
gym.make("CarRacing-v3", render="human")

# This test checks that a user can create an environment without the metadata including the render mode
with pytest.warns(
Expand Down
2 changes: 1 addition & 1 deletion tests/envs/test_env_implementation.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def test_carracing_domain_randomize():
CarRacing DomainRandomize should have different colours at every reset.
However, it should have same colours when `options={"randomize": False}` is given to reset.
"""
env: CarRacing = gym.make("CarRacing-v2", domain_randomize=True).unwrapped
env: CarRacing = gym.make("CarRacing-v3", domain_randomize=True).unwrapped

road_color = env.road_color
bg_color = env.bg_color
Expand Down
2 changes: 1 addition & 1 deletion tests/wrappers/test_normalize_observation.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def test_update_running_mean_property():

def test_normalize_obs_with_vector():
def thunk():
env = gym.make("CarRacing-v2")
env = gym.make("CarRacing-v3")
env = gym.wrappers.GrayscaleObservation(env)
env = gym.wrappers.NormalizeObservation(env)
return env
Expand Down
4 changes: 2 additions & 2 deletions tests/wrappers/test_resize_observation.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def test_resize_observation_wrapper(env):

@pytest.mark.parametrize("shape", ((10, 10), (20, 20), (60, 60), (100, 100)))
def test_resize_shapes(shape: tuple[int, int]):
env = ResizeObservation(gym.make("CarRacing-v2"), shape)
env = ResizeObservation(gym.make("CarRacing-v3"), shape)
assert env.observation_space == Box(
low=0, high=255, shape=shape + (3,), dtype=np.uint8
)
Expand All @@ -59,7 +59,7 @@ def test_resize_shapes(shape: tuple[int, int]):


def test_invalid_input():
env = gym.make("CarRacing-v2")
env = gym.make("CarRacing-v3")

with pytest.raises(AssertionError):
ResizeObservation(env, ())
Expand Down
8 changes: 4 additions & 4 deletions tests/wrappers/vector/test_vector_wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ def custom_environments():
(
("CustomDictEnv-v0", "FilterObservation", {"filter_keys": ["a"]}),
("CartPole-v1", "FlattenObservation", {}),
("CarRacing-v2", "GrayscaleObservation", {}),
("CarRacing-v2", "ResizeObservation", {"shape": (35, 45)}),
("CarRacing-v2", "ReshapeObservation", {"shape": (96, 48, 6)}),
("CarRacing-v3", "GrayscaleObservation", {}),
("CarRacing-v3", "ResizeObservation", {"shape": (35, 45)}),
("CarRacing-v3", "ReshapeObservation", {"shape": (96, 48, 6)}),
(
"CartPole-v1",
"RescaleObservation",
Expand All @@ -53,7 +53,7 @@ def custom_environments():
"max_obs": np.array([1, np.inf, 1, np.inf]),
},
),
("CarRacing-v2", "DtypeObservation", {"dtype": np.int32}),
("CarRacing-v3", "DtypeObservation", {"dtype": np.int32}),
# ("CartPole-v1", "RenderObservation", {}), # not implemented
# ("CartPole-v1", "TimeAwareObservation", {}), # not implemented
# ("CartPole-v1", "FrameStackObservation", {}), # not implemented
Expand Down
Loading