Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add compatibility with numpy 2.0 #1094

Merged
merged 16 commits into from
Jun 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,21 @@ jobs:
strategy:
matrix:
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
numpy-version: ['>=1.21,<2.0', '>=2.0']
exclude:
- python-version: '3.8' # numpy>=2.0 requires Python>=3.9
numpy-version: '>=2.0'
steps:
- uses: actions/checkout@v4
- run: |
docker build -f bin/all-py.Dockerfile \
--build-arg PYTHON_VERSION=${{ matrix.python-version }} \
--tag gymnasium-all-docker .
docker build -f bin/all-py.Dockerfile \
--build-arg PYTHON_VERSION="${{ matrix.python-version }}" \
--build-arg NUMPY_VERSION="${{ matrix.numpy-version }}" \
--tag gymnasium-all-docker .
- name: Run tests
run: docker run gymnasium-all-docker pytest tests/*
- name: Run doctests
if: ${{ matrix.numpy-version == '>=2.0' }}
run: docker run gymnasium-all-docker pytest --doctest-modules gymnasium/

build-necessary:
Expand Down
4 changes: 4 additions & 0 deletions bin/all-py.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# A Dockerfile that sets up a full Gymnasium install with test dependencies
ARG PYTHON_VERSION
ARG NUMPY_VERSION=">=1.21,<2.0"
FROM python:$PYTHON_VERSION

SHELL ["/bin/bash", "-o", "pipefail", "-c"]
Expand Down Expand Up @@ -27,6 +28,9 @@ RUN git clone https://github.com/openai/mujoco-py.git\
COPY . /usr/local/gymnasium/
WORKDIR /usr/local/gymnasium/

# Specify the numpy version to cover both 1.x and 2.x
RUN pip install --upgrade "numpy$NUMPY_VERSION"

# Test with PyTorch CPU build, since CUDA is not available in CI anyway
RUN pip install .[all,testing] --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu

Expand Down
1 change: 1 addition & 0 deletions gymnasium/envs/box2d/car_racing.py
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,7 @@ def reset(
def step(self, action: Union[np.ndarray, int]):
assert self.car is not None
if action is not None:
action = action.astype(np.float64)
if self.continuous:
self.car.steer(-action[0])
self.car.gas(action[1])
Expand Down
2 changes: 1 addition & 1 deletion gymnasium/envs/box2d/lunar_lander.py
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,7 @@ def step(self, action):
)

if self.continuous:
action = np.clip(action, -1, +1).astype(np.float32)
action = np.clip(action, -1, +1).astype(np.float64)
else:
assert self.action_space.contains(
action
Expand Down
4 changes: 2 additions & 2 deletions gymnasium/envs/classic_control/acrobot.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,9 +447,9 @@ def rk4(derivs, y0, t):
try:
Ny = len(y0)
except TypeError:
yout = np.zeros((len(t),), np.float_)
yout = np.zeros((len(t),), np.float64)
else:
yout = np.zeros((len(t), Ny), np.float_)
yout = np.zeros((len(t), Ny), np.float64)

yout[0] = y0

Expand Down
5 changes: 4 additions & 1 deletion gymnasium/envs/classic_control/pendulum.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,10 @@ def render(self):
if self.last_u is not None:
scale_img = pygame.transform.smoothscale(
img,
(scale * np.abs(self.last_u) / 2, scale * np.abs(self.last_u) / 2),
(
float(scale * np.abs(self.last_u) / 2),
float(scale * np.abs(self.last_u) / 2),
),
)
is_flip = bool(self.last_u > 0)
scale_img = pygame.transform.flip(scale_img, is_flip, True)
Expand Down
2 changes: 1 addition & 1 deletion gymnasium/spaces/dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class Dict(Space[typing.Dict[str, Any]], typing.Mapping[str, Space[Any]]):
>>> from gymnasium.spaces import Dict, Box, Discrete
>>> observation_space = Dict({"position": Box(-1, 1, shape=(2,)), "color": Discrete(3)}, seed=42)
>>> observation_space.sample()
{'color': 0, 'position': array([-0.3991573 , 0.21649833], dtype=float32)}
{'color': np.int64(0), 'position': array([-0.3991573 , 0.21649833], dtype=float32)}

With a nested dict:

Expand Down
4 changes: 2 additions & 2 deletions gymnasium/spaces/discrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ class Discrete(Space[np.int64]):
>>> from gymnasium.spaces import Discrete
>>> observation_space = Discrete(2, seed=42) # {0, 1}
>>> observation_space.sample()
0
np.int64(0)
>>> observation_space = Discrete(3, start=-1, seed=42) # {-1, 0, 1}
>>> observation_space.sample()
-1
np.int64(-1)
"""

def __init__(
Expand Down
4 changes: 2 additions & 2 deletions gymnasium/spaces/oneof.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ class OneOf(Space[Any]):
>>> from gymnasium.spaces import OneOf, Box, Discrete
>>> observation_space = OneOf((Discrete(2), Box(-1, 1, shape=(2,))), seed=123)
>>> observation_space.sample() # the first element is the space index (Box in this case) and the second element is the sample from Box
(0, 0)
(np.int64(0), np.int64(0))
>>> observation_space.sample() # this time the Discrete space was sampled as index=0
(1, array([-0.00711833, -0.7257502 ], dtype=float32))
(np.int64(1), array([-0.00711833, -0.7257502 ], dtype=float32))
>>> observation_space[0]
Discrete(2)
>>> observation_space[1]
Expand Down
2 changes: 1 addition & 1 deletion gymnasium/spaces/tuple.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class Tuple(Space[typing.Tuple[Any, ...]], typing.Sequence[Any]):
>>> from gymnasium.spaces import Tuple, Box, Discrete
>>> observation_space = Tuple((Discrete(2), Box(-1, 1, shape=(2,))), seed=42)
>>> observation_space.sample()
(0, array([-0.3991573 , 0.21649833], dtype=float32))
(np.int64(0), array([-0.3991573 , 0.21649833], dtype=float32))
"""

def __init__(
Expand Down
15 changes: 3 additions & 12 deletions gymnasium/utils/passive_env_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,9 @@ def _check_box_observation_space(observation_space: spaces.Box):
), f"The Box observation space shape and high shape have have different shapes, high shape: {observation_space.high.shape}, box shape: {observation_space.shape}"

if np.any(observation_space.low == observation_space.high):
logger.warn(
"A Box observation space maximum and minimum values are equal. "
f"Actual equal coordinates: {[x for x in zip(*np.where(observation_space.low == observation_space.high))]}"
)
logger.warn("A Box observation space maximum and minimum values are equal.")
elif np.any(observation_space.high < observation_space.low):
logger.warn(
"A Box observation space low value is greater than a high value. "
f"Actual less than coordinates: {[x for x in zip(*np.where(observation_space.high < observation_space.low))]}"
)
logger.warn("A Box observation space low value is greater than a high value.")


def _check_box_action_space(action_space: spaces.Box):
Expand All @@ -57,10 +51,7 @@ def _check_box_action_space(action_space: spaces.Box):
), f"The Box action space shape and high shape have different shapes, high shape: {action_space.high.shape}, box shape: {action_space.shape}"

if np.any(action_space.low == action_space.high):
logger.warn(
"A Box action space maximum and minimum values are equal. "
f"Actual equal coordinates: {[x for x in zip(*np.where(action_space.low == action_space.high))]}"
)
logger.warn("A Box action space maximum and minimum values are equal.")


def check_space(
Expand Down
10 changes: 8 additions & 2 deletions gymnasium/wrappers/atari_preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,14 @@ class AtariPreprocessing(gym.Wrapper, gym.utils.RecordConstructorArgs):

Example:
>>> import gymnasium as gym # doctest: +SKIP
>>> env = gym.make("ALE/Adventure-v5") # doctest: +SKIP
>>> env = AtariPreprocessing(env, noop_max=10, frame_skip=0, screen_size=84, terminal_on_life_loss=True, grayscale_obs=False, grayscale_newaxis=False) # doctest: +SKIP
>>> import ale_py # doctest: +SKIP
>>> gym.register_envs(ale_py) # doctest: +SKIP
>>> env = gym.make("ALE/Pong-v5", frameskip=1) # doctest: +SKIP
>>> env = AtariPreprocessing( # doctest: +SKIP
... env,
... noop_max=10, frame_skip=4, terminal_on_life_loss=True,
... screen_size=84, grayscale_obs=False, grayscale_newaxis=False
... )

Change logs:
* Added in gym v0.12.2 (gym #1455)
Expand Down
4 changes: 2 additions & 2 deletions gymnasium/wrappers/stateful_observation.py
Original file line number Diff line number Diff line change
Expand Up @@ -559,9 +559,9 @@ class MaxAndSkipObservation(
>>> wrapped_obs0, *_ = wrapped_env.reset(seed=123)
>>> wrapped_obs1, *_ = wrapped_env.step(1)
>>> np.all(obs0 == wrapped_obs0)
True
np.True_
>>> np.all(wrapped_obs1 == skip_and_max_obs)
True
np.True_

Change logs:
* v1.0.0 - Initially add
Expand Down
4 changes: 2 additions & 2 deletions gymnasium/wrappers/stateful_reward.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class NormalizeReward(
...
>>> env.close()
>>> np.var(episode_rewards)
0.0008876301247721108
np.float64(0.0008876301247721108)

Example with the normalize reward wrapper:
>>> import numpy as np
Expand All @@ -70,7 +70,7 @@ class NormalizeReward(
>>> env.close()
>>> # will approach 0.99 with more episodes
>>> np.var(episode_rewards)
0.010162116476634746
np.float64(0.010162116476634746)

Change logs:
* v0.21.0 - Initially added
Expand Down
2 changes: 1 addition & 1 deletion gymnasium/wrappers/transform_action.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ class RescaleAction(
>>> wrapped_env = RescaleAction(env, min_action=min_action, max_action=max_action)
>>> wrapped_env_obs, _, _, _, _ = wrapped_env.step(max_action)
>>> np.all(obs == wrapped_env_obs)
True
np.True_

Change logs:
* v0.15.4 - Initially added
Expand Down
8 changes: 4 additions & 4 deletions gymnasium/wrappers/transform_observation.py
Original file line number Diff line number Diff line change
Expand Up @@ -632,11 +632,11 @@ class AddRenderObservation(
>>> obs, _ = env.reset(seed=123)
>>> image = env.render()
>>> np.all(obs == image)
True
np.True_
>>> obs, *_ = env.step(env.action_space.sample())
>>> image = env.render()
>>> np.all(obs == image)
True
np.True_

Example - Add the rendered image to the original observation as a dictionary item:
>>> env = gym.make("CartPole-v1", render_mode="rgb_array")
Expand All @@ -649,11 +649,11 @@ class AddRenderObservation(
>>> obs["state"]
array([ 0.01823519, -0.0446179 , -0.02796401, -0.03156282], dtype=float32)
>>> np.all(obs["pixels"] == env.render())
True
np.True_
>>> obs, reward, terminates, truncates, info = env.step(env.action_space.sample())
>>> image = env.render()
>>> np.all(obs["pixels"] == image)
True
np.True_

Change logs:
* v0.15.0 - Initially added as ``PixelObservationWrapper``
Expand Down
2 changes: 1 addition & 1 deletion gymnasium/wrappers/transform_reward.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ class ClipReward(TransformReward[ObsType, ActType], gym.utils.RecordConstructorA
>>> _ = env.reset()
>>> _, rew, _, _, _ = env.step(1)
>>> rew
0.5
np.float64(0.5)

Change logs:
* v1.0.0 - Initially added
Expand Down
6 changes: 3 additions & 3 deletions gymnasium/wrappers/vector/dict_info_to_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,18 +50,18 @@ class DictInfoToList(VectorWrapper):
Another example for vector environments:
>>> import numpy as np
>>> import gymnasium as gym
>>> envs = gym.make_vec("HalfCheetah-v4", num_envs=3)
>>> envs = gym.make_vec("HalfCheetah-v4", num_envs=2)
>>> _ = envs.reset(seed=123)
>>> _ = envs.action_space.seed(123)
>>> _, _, _, _, infos = envs.step(envs.action_space.sample())
>>> infos
{'x_position': array([0.03332211, 0.10172355, 0.08920531]), '_x_position': array([ True, True, True]), 'x_velocity': array([-0.06296527, 0.89345848, 0.37710836]), '_x_velocity': array([ True, True, True]), 'reward_run': array([-0.06296527, 0.89345848, 0.37710836]), '_reward_run': array([ True, True, True]), 'reward_ctrl': array([-0.24503503, -0.21944423, -0.20672209]), '_reward_ctrl': array([ True, True, True])}
{'x_position': array([0.03332211, 0.10172355]), '_x_position': array([ True, True]), 'x_velocity': array([-0.06296527, 0.89345848]), '_x_velocity': array([ True, True]), 'reward_run': array([-0.06296527, 0.89345848]), '_reward_run': array([ True, True]), 'reward_ctrl': array([-0.24503504, -0.21944423], dtype=float32), '_reward_ctrl': array([ True, True])}
>>> envs = DictInfoToList(envs)
>>> _ = envs.reset(seed=123)
>>> _ = envs.action_space.seed(123)
>>> _, _, _, _, infos = envs.step(envs.action_space.sample())
>>> infos
[{'x_position': 0.03332210900362942, 'x_velocity': -0.06296527291998533, 'reward_run': -0.06296527291998533, 'reward_ctrl': -0.2450350284576416}, {'x_position': 0.10172354684460168, 'x_velocity': 0.8934584807363618, 'reward_run': 0.8934584807363618, 'reward_ctrl': -0.21944422721862794}, {'x_position': 0.08920531470057845, 'x_velocity': 0.3771083596080768, 'reward_run': 0.3771083596080768, 'reward_ctrl': -0.20672209262847902}]
[{'x_position': np.float64(0.03332210900362942), 'x_velocity': np.float64(-0.06296527291998533), 'reward_run': np.float64(-0.06296527291998533), 'reward_ctrl': np.float32(-0.24503504)}, {'x_position': np.float64(0.10172354684460168), 'x_velocity': np.float64(0.8934584807363618), 'reward_run': np.float64(0.8934584807363618), 'reward_ctrl': np.float32(-0.21944423)}]

Change logs:
* v0.24.0 - Initially added as ``VectorListInfo``
Expand Down
8 changes: 4 additions & 4 deletions gymnasium/wrappers/vector/stateful_observation.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ class NormalizeObservation(VectorObservationWrapper, gym.utils.RecordConstructor
>>> for _ in range(100):
... obs, *_ = envs.step(envs.action_space.sample())
>>> np.mean(obs)
0.024251968
np.float32(0.024251968)
>>> np.std(obs)
0.62259156
np.float32(0.62259156)
>>> envs.close()

Example with the normalize reward wrapper:
Expand All @@ -49,9 +49,9 @@ class NormalizeObservation(VectorObservationWrapper, gym.utils.RecordConstructor
>>> for _ in range(100):
... obs, *_ = envs.step(envs.action_space.sample())
>>> np.mean(obs)
-0.2359734
np.float32(-0.2359734)
>>> np.std(obs)
1.1938739
np.float32(1.1938739)
>>> envs.close()
"""

Expand Down
8 changes: 4 additions & 4 deletions gymnasium/wrappers/vector/stateful_reward.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@ class NormalizeReward(VectorWrapper, gym.utils.RecordConstructorArgs):
...
>>> envs.close()
>>> np.mean(episode_rewards)
-0.03359492141887935
np.float64(-0.03359492141887935)
>>> np.std(episode_rewards)
0.029028230434438706
np.float64(0.029028230434438706)

Example with the normalize reward wrapper:
>>> import gymnasium as gym
Expand All @@ -62,9 +62,9 @@ class NormalizeReward(VectorWrapper, gym.utils.RecordConstructorArgs):
...
>>> envs.close()
>>> np.mean(episode_rewards)
-0.1598639586606745
np.float64(-0.1598639586606745)
>>> np.std(episode_rewards)
0.27800309628058434
np.float64(0.27800309628058434)
"""

def __init__(
Expand Down
2 changes: 1 addition & 1 deletion gymnasium/wrappers/vector/vectorize_action.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class TransformAction(VectorActionWrapper):
>>> obs
array([[-0.46553135, -0.00142543],
[-0.498371 , -0.00715587],
[-0.4651575 , -0.00624371]], dtype=float32)
[-0.46515748, -0.00624371]], dtype=float32)

Example - With action transformation:
>>> import gymnasium as gym
Expand Down
8 changes: 4 additions & 4 deletions gymnasium/wrappers/vector/vectorize_observation.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,15 +321,15 @@ class RescaleObservation(VectorizeTransformObservation):
>>> envs = gym.make_vec("CartPole-v1", num_envs=3, vectorization_mode="sync")
>>> obs, info = envs.reset(seed=123)
>>> obs.min()
-0.0446179
np.float32(-0.0446179)
>>> obs.max()
0.0469136
np.float32(0.0469136)
>>> envs = RescaleObservation(envs, min_obs=-5.0, max_obs=5.0)
>>> obs, info = envs.reset(seed=123)
>>> obs.min()
-0.33379582
np.float32(-0.33379582)
>>> obs.max()
0.55998987
np.float32(0.55998987)
>>> envs.close()
"""

Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,6 @@ gymnasium = [
# Linters and Test tools #######################################################

[tool.black]
safe = true

[tool.isort]
atomic = true
Expand Down
8 changes: 3 additions & 5 deletions tests/envs/test_action_dim_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,19 +127,17 @@ def test_box_actions_out_of_bound(env: gym.Env):
if is_upper_bound:
obs, _, _, _, _ = env.step(upper_bounds)
oob_action = upper_bounds.copy()
oob_action[i] += np.cast[dtype](OOB_VALUE)
oob_action[i] += np.asarray(OOB_VALUE, dtype=dtype)

assert oob_action[i] > upper_bounds[i]
oob_obs, _, _, _, _ = oob_env.step(oob_action)

assert np.all(obs == oob_obs)

if is_lower_bound:
obs, _, _, _, _ = env.step(
lower_bounds
) # `env` is unwrapped, and in new step API
obs, _, _, _, _ = env.step(lower_bounds)
oob_action = lower_bounds.copy()
oob_action[i] -= np.cast[dtype](OOB_VALUE)
oob_action[i] -= np.asarray(OOB_VALUE, dtype=dtype)

assert oob_action[i] < lower_bounds[i]
oob_obs, _, _, _, _ = oob_env.step(oob_action)
Expand Down
2 changes: 1 addition & 1 deletion tests/spaces/test_spaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ def chi2_test(sample, low, high, bounded_below, bounded_above):

if bounded_below and bounded_above:
# X ~ U(low, high)
degrees_of_freedom = high - low + 1
degrees_of_freedom = int(high) - int(low) + 1
observed_frequency = np.bincount(sample - low, minlength=degrees_of_freedom)
assert observed_frequency.shape == (degrees_of_freedom,)
expected_frequency = np.ones(degrees_of_freedom) * n_trials / degrees_of_freedom
Expand Down
Loading
Loading