diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 73c7fb783..8aabf2f23 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -10,15 +10,21 @@ jobs: strategy: matrix: python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] + numpy-version: ['>=1.21,<2.0', '>=2.0'] + exclude: + - python-version: '3.8' # numpy>=2.0 requires Python>=3.9 + numpy-version: '>=2.0' steps: - uses: actions/checkout@v4 - run: | - docker build -f bin/all-py.Dockerfile \ - --build-arg PYTHON_VERSION=${{ matrix.python-version }} \ - --tag gymnasium-all-docker . + docker build -f bin/all-py.Dockerfile \ + --build-arg PYTHON_VERSION="${{ matrix.python-version }}" \ + --build-arg NUMPY_VERSION="${{ matrix.numpy-version }}" \ + --tag gymnasium-all-docker . - name: Run tests run: docker run gymnasium-all-docker pytest tests/* - name: Run doctests + if: ${{ matrix.numpy-version == '>=2.0' }} run: docker run gymnasium-all-docker pytest --doctest-modules gymnasium/ build-necessary: diff --git a/bin/all-py.Dockerfile b/bin/all-py.Dockerfile index 2ef303edf..4872bd8fb 100644 --- a/bin/all-py.Dockerfile +++ b/bin/all-py.Dockerfile @@ -1,5 +1,6 @@ # A Dockerfile that sets up a full Gymnasium install with test dependencies ARG PYTHON_VERSION +ARG NUMPY_VERSION=">=1.21,<2.0" FROM python:$PYTHON_VERSION SHELL ["/bin/bash", "-o", "pipefail", "-c"] @@ -27,6 +28,9 @@ RUN git clone https://github.com/openai/mujoco-py.git\ COPY . /usr/local/gymnasium/ WORKDIR /usr/local/gymnasium/ +# Specify the numpy version to cover both 1.x and 2.x +RUN pip install --upgrade "numpy$NUMPY_VERSION" + # Test with PyTorch CPU build, since CUDA is not available in CI anyway RUN pip install .[all,testing] --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu diff --git a/gymnasium/envs/box2d/car_racing.py b/gymnasium/envs/box2d/car_racing.py index 19368c907..c15024f13 100644 --- a/gymnasium/envs/box2d/car_racing.py +++ b/gymnasium/envs/box2d/car_racing.py @@ -540,6 +540,7 @@ def reset( def step(self, action: Union[np.ndarray, int]): assert self.car is not None if action is not None: + action = action.astype(np.float64) if self.continuous: self.car.steer(-action[0]) self.car.gas(action[1]) diff --git a/gymnasium/envs/box2d/lunar_lander.py b/gymnasium/envs/box2d/lunar_lander.py index 4aeff6ac9..976647b15 100644 --- a/gymnasium/envs/box2d/lunar_lander.py +++ b/gymnasium/envs/box2d/lunar_lander.py @@ -509,7 +509,7 @@ def step(self, action): ) if self.continuous: - action = np.clip(action, -1, +1).astype(np.float32) + action = np.clip(action, -1, +1).astype(np.float64) else: assert self.action_space.contains( action diff --git a/gymnasium/envs/classic_control/acrobot.py b/gymnasium/envs/classic_control/acrobot.py index 8ee4358e8..80ef05e6c 100644 --- a/gymnasium/envs/classic_control/acrobot.py +++ b/gymnasium/envs/classic_control/acrobot.py @@ -447,9 +447,9 @@ def rk4(derivs, y0, t): try: Ny = len(y0) except TypeError: - yout = np.zeros((len(t),), np.float_) + yout = np.zeros((len(t),), np.float64) else: - yout = np.zeros((len(t), Ny), np.float_) + yout = np.zeros((len(t), Ny), np.float64) yout[0] = y0 diff --git a/gymnasium/envs/classic_control/pendulum.py b/gymnasium/envs/classic_control/pendulum.py index 8ec162539..a908b1139 100644 --- a/gymnasium/envs/classic_control/pendulum.py +++ b/gymnasium/envs/classic_control/pendulum.py @@ -241,7 +241,10 @@ def render(self): if self.last_u is not None: scale_img = pygame.transform.smoothscale( img, - (scale * np.abs(self.last_u) / 2, scale * np.abs(self.last_u) / 2), + ( + float(scale * np.abs(self.last_u) / 2), + float(scale * np.abs(self.last_u) / 2), + ), ) is_flip = bool(self.last_u > 0) scale_img = pygame.transform.flip(scale_img, is_flip, True) diff --git a/gymnasium/spaces/dict.py b/gymnasium/spaces/dict.py index 128cf8c71..49ff4c907 100644 --- a/gymnasium/spaces/dict.py +++ b/gymnasium/spaces/dict.py @@ -20,7 +20,7 @@ class Dict(Space[typing.Dict[str, Any]], typing.Mapping[str, Space[Any]]): >>> from gymnasium.spaces import Dict, Box, Discrete >>> observation_space = Dict({"position": Box(-1, 1, shape=(2,)), "color": Discrete(3)}, seed=42) >>> observation_space.sample() - {'color': 0, 'position': array([-0.3991573 , 0.21649833], dtype=float32)} + {'color': np.int64(0), 'position': array([-0.3991573 , 0.21649833], dtype=float32)} With a nested dict: diff --git a/gymnasium/spaces/discrete.py b/gymnasium/spaces/discrete.py index 41b9c356a..9a4575252 100644 --- a/gymnasium/spaces/discrete.py +++ b/gymnasium/spaces/discrete.py @@ -18,10 +18,10 @@ class Discrete(Space[np.int64]): >>> from gymnasium.spaces import Discrete >>> observation_space = Discrete(2, seed=42) # {0, 1} >>> observation_space.sample() - 0 + np.int64(0) >>> observation_space = Discrete(3, start=-1, seed=42) # {-1, 0, 1} >>> observation_space.sample() - -1 + np.int64(-1) """ def __init__( diff --git a/gymnasium/spaces/oneof.py b/gymnasium/spaces/oneof.py index 08aa50a5a..50e463be4 100644 --- a/gymnasium/spaces/oneof.py +++ b/gymnasium/spaces/oneof.py @@ -19,9 +19,9 @@ class OneOf(Space[Any]): >>> from gymnasium.spaces import OneOf, Box, Discrete >>> observation_space = OneOf((Discrete(2), Box(-1, 1, shape=(2,))), seed=123) >>> observation_space.sample() # the first element is the space index (Box in this case) and the second element is the sample from Box - (0, 0) + (np.int64(0), np.int64(0)) >>> observation_space.sample() # this time the Discrete space was sampled as index=0 - (1, array([-0.00711833, -0.7257502 ], dtype=float32)) + (np.int64(1), array([-0.00711833, -0.7257502 ], dtype=float32)) >>> observation_space[0] Discrete(2) >>> observation_space[1] diff --git a/gymnasium/spaces/tuple.py b/gymnasium/spaces/tuple.py index c7d2330d6..aae683ad2 100644 --- a/gymnasium/spaces/tuple.py +++ b/gymnasium/spaces/tuple.py @@ -19,7 +19,7 @@ class Tuple(Space[typing.Tuple[Any, ...]], typing.Sequence[Any]): >>> from gymnasium.spaces import Tuple, Box, Discrete >>> observation_space = Tuple((Discrete(2), Box(-1, 1, shape=(2,))), seed=42) >>> observation_space.sample() - (0, array([-0.3991573 , 0.21649833], dtype=float32)) + (np.int64(0), array([-0.3991573 , 0.21649833], dtype=float32)) """ def __init__( diff --git a/gymnasium/utils/passive_env_checker.py b/gymnasium/utils/passive_env_checker.py index b6f26435a..0b08fc516 100644 --- a/gymnasium/utils/passive_env_checker.py +++ b/gymnasium/utils/passive_env_checker.py @@ -32,15 +32,9 @@ def _check_box_observation_space(observation_space: spaces.Box): ), f"The Box observation space shape and high shape have have different shapes, high shape: {observation_space.high.shape}, box shape: {observation_space.shape}" if np.any(observation_space.low == observation_space.high): - logger.warn( - "A Box observation space maximum and minimum values are equal. " - f"Actual equal coordinates: {[x for x in zip(*np.where(observation_space.low == observation_space.high))]}" - ) + logger.warn("A Box observation space maximum and minimum values are equal.") elif np.any(observation_space.high < observation_space.low): - logger.warn( - "A Box observation space low value is greater than a high value. " - f"Actual less than coordinates: {[x for x in zip(*np.where(observation_space.high < observation_space.low))]}" - ) + logger.warn("A Box observation space low value is greater than a high value.") def _check_box_action_space(action_space: spaces.Box): @@ -57,10 +51,7 @@ def _check_box_action_space(action_space: spaces.Box): ), f"The Box action space shape and high shape have different shapes, high shape: {action_space.high.shape}, box shape: {action_space.shape}" if np.any(action_space.low == action_space.high): - logger.warn( - "A Box action space maximum and minimum values are equal. " - f"Actual equal coordinates: {[x for x in zip(*np.where(action_space.low == action_space.high))]}" - ) + logger.warn("A Box action space maximum and minimum values are equal.") def check_space( diff --git a/gymnasium/wrappers/atari_preprocessing.py b/gymnasium/wrappers/atari_preprocessing.py index c3f9efc8c..e04562cf8 100644 --- a/gymnasium/wrappers/atari_preprocessing.py +++ b/gymnasium/wrappers/atari_preprocessing.py @@ -37,8 +37,14 @@ class AtariPreprocessing(gym.Wrapper, gym.utils.RecordConstructorArgs): Example: >>> import gymnasium as gym # doctest: +SKIP - >>> env = gym.make("ALE/Adventure-v5") # doctest: +SKIP - >>> env = AtariPreprocessing(env, noop_max=10, frame_skip=0, screen_size=84, terminal_on_life_loss=True, grayscale_obs=False, grayscale_newaxis=False) # doctest: +SKIP + >>> import ale_py # doctest: +SKIP + >>> gym.register_envs(ale_py) # doctest: +SKIP + >>> env = gym.make("ALE/Pong-v5", frameskip=1) # doctest: +SKIP + >>> env = AtariPreprocessing( # doctest: +SKIP + ... env, + ... noop_max=10, frame_skip=4, terminal_on_life_loss=True, + ... screen_size=84, grayscale_obs=False, grayscale_newaxis=False + ... ) Change logs: * Added in gym v0.12.2 (gym #1455) diff --git a/gymnasium/wrappers/stateful_observation.py b/gymnasium/wrappers/stateful_observation.py index 1d08974ba..9e7c56167 100644 --- a/gymnasium/wrappers/stateful_observation.py +++ b/gymnasium/wrappers/stateful_observation.py @@ -559,9 +559,9 @@ class MaxAndSkipObservation( >>> wrapped_obs0, *_ = wrapped_env.reset(seed=123) >>> wrapped_obs1, *_ = wrapped_env.step(1) >>> np.all(obs0 == wrapped_obs0) - True + np.True_ >>> np.all(wrapped_obs1 == skip_and_max_obs) - True + np.True_ Change logs: * v1.0.0 - Initially add diff --git a/gymnasium/wrappers/stateful_reward.py b/gymnasium/wrappers/stateful_reward.py index 99a62f4b0..32aa56ddf 100644 --- a/gymnasium/wrappers/stateful_reward.py +++ b/gymnasium/wrappers/stateful_reward.py @@ -52,7 +52,7 @@ class NormalizeReward( ... >>> env.close() >>> np.var(episode_rewards) - 0.0008876301247721108 + np.float64(0.0008876301247721108) Example with the normalize reward wrapper: >>> import numpy as np @@ -70,7 +70,7 @@ class NormalizeReward( >>> env.close() >>> # will approach 0.99 with more episodes >>> np.var(episode_rewards) - 0.010162116476634746 + np.float64(0.010162116476634746) Change logs: * v0.21.0 - Initially added diff --git a/gymnasium/wrappers/transform_action.py b/gymnasium/wrappers/transform_action.py index 3c0c1623a..9d2834f8b 100644 --- a/gymnasium/wrappers/transform_action.py +++ b/gymnasium/wrappers/transform_action.py @@ -144,7 +144,7 @@ class RescaleAction( >>> wrapped_env = RescaleAction(env, min_action=min_action, max_action=max_action) >>> wrapped_env_obs, _, _, _, _ = wrapped_env.step(max_action) >>> np.all(obs == wrapped_env_obs) - True + np.True_ Change logs: * v0.15.4 - Initially added diff --git a/gymnasium/wrappers/transform_observation.py b/gymnasium/wrappers/transform_observation.py index 8e44b19bc..06079aa21 100644 --- a/gymnasium/wrappers/transform_observation.py +++ b/gymnasium/wrappers/transform_observation.py @@ -632,11 +632,11 @@ class AddRenderObservation( >>> obs, _ = env.reset(seed=123) >>> image = env.render() >>> np.all(obs == image) - True + np.True_ >>> obs, *_ = env.step(env.action_space.sample()) >>> image = env.render() >>> np.all(obs == image) - True + np.True_ Example - Add the rendered image to the original observation as a dictionary item: >>> env = gym.make("CartPole-v1", render_mode="rgb_array") @@ -649,11 +649,11 @@ class AddRenderObservation( >>> obs["state"] array([ 0.01823519, -0.0446179 , -0.02796401, -0.03156282], dtype=float32) >>> np.all(obs["pixels"] == env.render()) - True + np.True_ >>> obs, reward, terminates, truncates, info = env.step(env.action_space.sample()) >>> image = env.render() >>> np.all(obs["pixels"] == image) - True + np.True_ Change logs: * v0.15.0 - Initially added as ``PixelObservationWrapper`` diff --git a/gymnasium/wrappers/transform_reward.py b/gymnasium/wrappers/transform_reward.py index d30248b09..b17308c25 100644 --- a/gymnasium/wrappers/transform_reward.py +++ b/gymnasium/wrappers/transform_reward.py @@ -77,7 +77,7 @@ class ClipReward(TransformReward[ObsType, ActType], gym.utils.RecordConstructorA >>> _ = env.reset() >>> _, rew, _, _, _ = env.step(1) >>> rew - 0.5 + np.float64(0.5) Change logs: * v1.0.0 - Initially added diff --git a/gymnasium/wrappers/vector/dict_info_to_list.py b/gymnasium/wrappers/vector/dict_info_to_list.py index 337349c1e..ebaadaf67 100644 --- a/gymnasium/wrappers/vector/dict_info_to_list.py +++ b/gymnasium/wrappers/vector/dict_info_to_list.py @@ -50,18 +50,18 @@ class DictInfoToList(VectorWrapper): Another example for vector environments: >>> import numpy as np >>> import gymnasium as gym - >>> envs = gym.make_vec("HalfCheetah-v4", num_envs=3) + >>> envs = gym.make_vec("HalfCheetah-v4", num_envs=2) >>> _ = envs.reset(seed=123) >>> _ = envs.action_space.seed(123) >>> _, _, _, _, infos = envs.step(envs.action_space.sample()) >>> infos - {'x_position': array([0.03332211, 0.10172355, 0.08920531]), '_x_position': array([ True, True, True]), 'x_velocity': array([-0.06296527, 0.89345848, 0.37710836]), '_x_velocity': array([ True, True, True]), 'reward_run': array([-0.06296527, 0.89345848, 0.37710836]), '_reward_run': array([ True, True, True]), 'reward_ctrl': array([-0.24503503, -0.21944423, -0.20672209]), '_reward_ctrl': array([ True, True, True])} + {'x_position': array([0.03332211, 0.10172355]), '_x_position': array([ True, True]), 'x_velocity': array([-0.06296527, 0.89345848]), '_x_velocity': array([ True, True]), 'reward_run': array([-0.06296527, 0.89345848]), '_reward_run': array([ True, True]), 'reward_ctrl': array([-0.24503504, -0.21944423], dtype=float32), '_reward_ctrl': array([ True, True])} >>> envs = DictInfoToList(envs) >>> _ = envs.reset(seed=123) >>> _ = envs.action_space.seed(123) >>> _, _, _, _, infos = envs.step(envs.action_space.sample()) >>> infos - [{'x_position': 0.03332210900362942, 'x_velocity': -0.06296527291998533, 'reward_run': -0.06296527291998533, 'reward_ctrl': -0.2450350284576416}, {'x_position': 0.10172354684460168, 'x_velocity': 0.8934584807363618, 'reward_run': 0.8934584807363618, 'reward_ctrl': -0.21944422721862794}, {'x_position': 0.08920531470057845, 'x_velocity': 0.3771083596080768, 'reward_run': 0.3771083596080768, 'reward_ctrl': -0.20672209262847902}] + [{'x_position': np.float64(0.03332210900362942), 'x_velocity': np.float64(-0.06296527291998533), 'reward_run': np.float64(-0.06296527291998533), 'reward_ctrl': np.float32(-0.24503504)}, {'x_position': np.float64(0.10172354684460168), 'x_velocity': np.float64(0.8934584807363618), 'reward_run': np.float64(0.8934584807363618), 'reward_ctrl': np.float32(-0.21944423)}] Change logs: * v0.24.0 - Initially added as ``VectorListInfo`` diff --git a/gymnasium/wrappers/vector/stateful_observation.py b/gymnasium/wrappers/vector/stateful_observation.py index 75a80416f..266c488d1 100644 --- a/gymnasium/wrappers/vector/stateful_observation.py +++ b/gymnasium/wrappers/vector/stateful_observation.py @@ -35,9 +35,9 @@ class NormalizeObservation(VectorObservationWrapper, gym.utils.RecordConstructor >>> for _ in range(100): ... obs, *_ = envs.step(envs.action_space.sample()) >>> np.mean(obs) - 0.024251968 + np.float32(0.024251968) >>> np.std(obs) - 0.62259156 + np.float32(0.62259156) >>> envs.close() Example with the normalize reward wrapper: @@ -49,9 +49,9 @@ class NormalizeObservation(VectorObservationWrapper, gym.utils.RecordConstructor >>> for _ in range(100): ... obs, *_ = envs.step(envs.action_space.sample()) >>> np.mean(obs) - -0.2359734 + np.float32(-0.2359734) >>> np.std(obs) - 1.1938739 + np.float32(1.1938739) >>> envs.close() """ diff --git a/gymnasium/wrappers/vector/stateful_reward.py b/gymnasium/wrappers/vector/stateful_reward.py index 59d72a031..8022efd98 100644 --- a/gymnasium/wrappers/vector/stateful_reward.py +++ b/gymnasium/wrappers/vector/stateful_reward.py @@ -44,9 +44,9 @@ class NormalizeReward(VectorWrapper, gym.utils.RecordConstructorArgs): ... >>> envs.close() >>> np.mean(episode_rewards) - -0.03359492141887935 + np.float64(-0.03359492141887935) >>> np.std(episode_rewards) - 0.029028230434438706 + np.float64(0.029028230434438706) Example with the normalize reward wrapper: >>> import gymnasium as gym @@ -62,9 +62,9 @@ class NormalizeReward(VectorWrapper, gym.utils.RecordConstructorArgs): ... >>> envs.close() >>> np.mean(episode_rewards) - -0.1598639586606745 + np.float64(-0.1598639586606745) >>> np.std(episode_rewards) - 0.27800309628058434 + np.float64(0.27800309628058434) """ def __init__( diff --git a/gymnasium/wrappers/vector/vectorize_action.py b/gymnasium/wrappers/vector/vectorize_action.py index 1f517f663..3dc4a797a 100644 --- a/gymnasium/wrappers/vector/vectorize_action.py +++ b/gymnasium/wrappers/vector/vectorize_action.py @@ -33,7 +33,7 @@ class TransformAction(VectorActionWrapper): >>> obs array([[-0.46553135, -0.00142543], [-0.498371 , -0.00715587], - [-0.4651575 , -0.00624371]], dtype=float32) + [-0.46515748, -0.00624371]], dtype=float32) Example - With action transformation: >>> import gymnasium as gym diff --git a/gymnasium/wrappers/vector/vectorize_observation.py b/gymnasium/wrappers/vector/vectorize_observation.py index 0c95e6bcf..ace3ea2e3 100644 --- a/gymnasium/wrappers/vector/vectorize_observation.py +++ b/gymnasium/wrappers/vector/vectorize_observation.py @@ -321,15 +321,15 @@ class RescaleObservation(VectorizeTransformObservation): >>> envs = gym.make_vec("CartPole-v1", num_envs=3, vectorization_mode="sync") >>> obs, info = envs.reset(seed=123) >>> obs.min() - -0.0446179 + np.float32(-0.0446179) >>> obs.max() - 0.0469136 + np.float32(0.0469136) >>> envs = RescaleObservation(envs, min_obs=-5.0, max_obs=5.0) >>> obs, info = envs.reset(seed=123) >>> obs.min() - -0.33379582 + np.float32(-0.33379582) >>> obs.max() - 0.55998987 + np.float32(0.55998987) >>> envs.close() """ diff --git a/pyproject.toml b/pyproject.toml index 541c1a2d0..6d57d613b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -108,7 +108,6 @@ gymnasium = [ # Linters and Test tools ####################################################### [tool.black] -safe = true [tool.isort] atomic = true diff --git a/tests/envs/test_action_dim_check.py b/tests/envs/test_action_dim_check.py index d42ad0eb7..e6ed31ed6 100644 --- a/tests/envs/test_action_dim_check.py +++ b/tests/envs/test_action_dim_check.py @@ -127,7 +127,7 @@ def test_box_actions_out_of_bound(env: gym.Env): if is_upper_bound: obs, _, _, _, _ = env.step(upper_bounds) oob_action = upper_bounds.copy() - oob_action[i] += np.cast[dtype](OOB_VALUE) + oob_action[i] += np.asarray(OOB_VALUE, dtype=dtype) assert oob_action[i] > upper_bounds[i] oob_obs, _, _, _, _ = oob_env.step(oob_action) @@ -135,11 +135,9 @@ def test_box_actions_out_of_bound(env: gym.Env): assert np.all(obs == oob_obs) if is_lower_bound: - obs, _, _, _, _ = env.step( - lower_bounds - ) # `env` is unwrapped, and in new step API + obs, _, _, _, _ = env.step(lower_bounds) oob_action = lower_bounds.copy() - oob_action[i] -= np.cast[dtype](OOB_VALUE) + oob_action[i] -= np.asarray(OOB_VALUE, dtype=dtype) assert oob_action[i] < lower_bounds[i] oob_obs, _, _, _, _ = oob_env.step(oob_action) diff --git a/tests/spaces/test_spaces.py b/tests/spaces/test_spaces.py index c77c62aa3..6df5963dc 100644 --- a/tests/spaces/test_spaces.py +++ b/tests/spaces/test_spaces.py @@ -239,7 +239,7 @@ def chi2_test(sample, low, high, bounded_below, bounded_above): if bounded_below and bounded_above: # X ~ U(low, high) - degrees_of_freedom = high - low + 1 + degrees_of_freedom = int(high) - int(low) + 1 observed_frequency = np.bincount(sample - low, minlength=degrees_of_freedom) assert observed_frequency.shape == (degrees_of_freedom,) expected_frequency = np.ones(degrees_of_freedom) * n_trials / degrees_of_freedom diff --git a/tests/utils/test_passive_env_checker.py b/tests/utils/test_passive_env_checker.py index 84d02d73e..38ba95b48 100644 --- a/tests/utils/test_passive_env_checker.py +++ b/tests/utils/test_passive_env_checker.py @@ -35,7 +35,7 @@ def _modify_space(space: spaces.Space, attribute: str, value): [ UserWarning, spaces.Box(np.zeros(5), np.zeros(5)), - "A Box observation space maximum and minimum values are equal. Actual equal coordinates: [(0,), (1,), (2,), (3,), (4,)]", + "A Box observation space maximum and minimum values are equal.", ], [ AssertionError, @@ -106,7 +106,7 @@ def test_check_observation_space(test, space, message: str): [ UserWarning, spaces.Box(np.zeros(5), np.zeros(5)), - "A Box action space maximum and minimum values are equal. Actual equal coordinates: [(0,), (1,), (2,), (3,), (4,)]", + "A Box action space maximum and minimum values are equal.", ], [ AssertionError,