From 82bf2f5a3fc673b2d3b3a0e40d78938f834d9898 Mon Sep 17 00:00:00 2001 From: Mark Towers Date: Tue, 3 Sep 2024 12:30:58 +0100 Subject: [PATCH] Revert #1144 (#1153) --- .github/workflows/{build-docs.yml => docs-build-dev.yml} | 0 .../{docs-versioning.yml => docs-build-release.yml} | 4 ++-- .../{docs-manual-versioning.yml => docs-manual-build.yml} | 2 +- .github/workflows/{build-publish.yml => pypi-publish.yml} | 8 ++------ .github/workflows/{pre-commit.yml => run-pre-commit.yml} | 2 +- .github/workflows/{build.yml => run-pytest.yml} | 5 +++-- gymnasium/spaces/dict.py | 2 +- gymnasium/spaces/discrete.py | 4 ++-- gymnasium/spaces/oneof.py | 4 ++-- gymnasium/spaces/tuple.py | 2 +- gymnasium/wrappers/stateful_observation.py | 4 ++-- gymnasium/wrappers/stateful_reward.py | 4 ++-- gymnasium/wrappers/transform_action.py | 2 +- gymnasium/wrappers/transform_observation.py | 8 ++++---- gymnasium/wrappers/transform_reward.py | 2 +- gymnasium/wrappers/vector/dict_info_to_list.py | 4 ++-- gymnasium/wrappers/vector/stateful_observation.py | 8 ++++---- gymnasium/wrappers/vector/stateful_reward.py | 8 ++++---- gymnasium/wrappers/vector/vectorize_action.py | 2 +- gymnasium/wrappers/vector/vectorize_observation.py | 8 ++++---- 20 files changed, 40 insertions(+), 43 deletions(-) rename .github/workflows/{build-docs.yml => docs-build-dev.yml} (100%) rename .github/workflows/{docs-versioning.yml => docs-build-release.yml} (97%) rename .github/workflows/{docs-manual-versioning.yml => docs-manual-build.yml} (98%) rename .github/workflows/{build-publish.yml => pypi-publish.yml} (91%) rename .github/workflows/{pre-commit.yml => run-pre-commit.yml} (95%) rename .github/workflows/{build.yml => run-pytest.yml} (93%) diff --git a/.github/workflows/build-docs.yml b/.github/workflows/docs-build-dev.yml similarity index 100% rename from .github/workflows/build-docs.yml rename to .github/workflows/docs-build-dev.yml diff --git a/.github/workflows/docs-versioning.yml b/.github/workflows/docs-build-release.yml similarity index 97% rename from .github/workflows/docs-versioning.yml rename to .github/workflows/docs-build-release.yml index bb5d86463..544f813c3 100644 --- a/.github/workflows/docs-versioning.yml +++ b/.github/workflows/docs-build-release.yml @@ -1,10 +1,10 @@ -name: Docs Versioning +name: Build release documentation website on: push: tags: - 'v?*.*.*' - - + permissions: contents: write diff --git a/.github/workflows/docs-manual-versioning.yml b/.github/workflows/docs-manual-build.yml similarity index 98% rename from .github/workflows/docs-manual-versioning.yml rename to .github/workflows/docs-manual-build.yml index a8f7083f2..8c5d877ff 100644 --- a/.github/workflows/docs-manual-versioning.yml +++ b/.github/workflows/docs-manual-build.yml @@ -1,4 +1,4 @@ -name: Manual Docs Versioning +name: Manually Build Website on: workflow_dispatch: diff --git a/.github/workflows/build-publish.yml b/.github/workflows/pypi-publish.yml similarity index 91% rename from .github/workflows/build-publish.yml rename to .github/workflows/pypi-publish.yml index 30284096c..7611a7488 100644 --- a/.github/workflows/build-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -4,7 +4,7 @@ # - https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/ # # derived from https://github.com/Farama-Foundation/PettingZoo/blob/e230f4d80a5df3baf9bd905149f6d4e8ce22be31/.github/workflows/build-publish.yml -name: build-publish +name: Build artifact for PyPI on: push: @@ -20,11 +20,7 @@ jobs: steps: - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.8' + - uses: actions/setup-python@v5 - name: Install dependencies run: python -m pip install --upgrade pip setuptools build diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/run-pre-commit.yml similarity index 95% rename from .github/workflows/pre-commit.yml rename to .github/workflows/run-pre-commit.yml index 97b55cb6c..5d0015cd1 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/run-pre-commit.yml @@ -1,6 +1,6 @@ # https://pre-commit.com # This GitHub Action assumes that the repo contains a valid .pre-commit-config.yaml file. -name: pre-commit +name: Run pre-commit on: pull_request: push: diff --git a/.github/workflows/build.yml b/.github/workflows/run-pytest.yml similarity index 93% rename from .github/workflows/build.yml rename to .github/workflows/run-pytest.yml index 4de7cbbde..7ded82063 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/run-pytest.yml @@ -1,4 +1,4 @@ -name: build +name: Run PyTest on: [pull_request, push] permissions: @@ -8,6 +8,7 @@ jobs: build-all: runs-on: ubuntu-latest strategy: + fail-fast: true matrix: python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] numpy-version: ['>=1.21,<2.0', '>=2.0'] @@ -24,7 +25,7 @@ jobs: - name: Run tests run: docker run gymnasium-all-docker pytest tests/* - name: Run doctests - # if: ${{ matrix.numpy-version == '>=2.0' }} + if: ${{ matrix.python-version != '3.8' }} run: docker run gymnasium-all-docker pytest --doctest-modules gymnasium/ build-necessary: diff --git a/gymnasium/spaces/dict.py b/gymnasium/spaces/dict.py index 128cf8c71..49ff4c907 100644 --- a/gymnasium/spaces/dict.py +++ b/gymnasium/spaces/dict.py @@ -20,7 +20,7 @@ class Dict(Space[typing.Dict[str, Any]], typing.Mapping[str, Space[Any]]): >>> from gymnasium.spaces import Dict, Box, Discrete >>> observation_space = Dict({"position": Box(-1, 1, shape=(2,)), "color": Discrete(3)}, seed=42) >>> observation_space.sample() - {'color': 0, 'position': array([-0.3991573 , 0.21649833], dtype=float32)} + {'color': np.int64(0), 'position': array([-0.3991573 , 0.21649833], dtype=float32)} With a nested dict: diff --git a/gymnasium/spaces/discrete.py b/gymnasium/spaces/discrete.py index 41b9c356a..9a4575252 100644 --- a/gymnasium/spaces/discrete.py +++ b/gymnasium/spaces/discrete.py @@ -18,10 +18,10 @@ class Discrete(Space[np.int64]): >>> from gymnasium.spaces import Discrete >>> observation_space = Discrete(2, seed=42) # {0, 1} >>> observation_space.sample() - 0 + np.int64(0) >>> observation_space = Discrete(3, start=-1, seed=42) # {-1, 0, 1} >>> observation_space.sample() - -1 + np.int64(-1) """ def __init__( diff --git a/gymnasium/spaces/oneof.py b/gymnasium/spaces/oneof.py index 08aa50a5a..50e463be4 100644 --- a/gymnasium/spaces/oneof.py +++ b/gymnasium/spaces/oneof.py @@ -19,9 +19,9 @@ class OneOf(Space[Any]): >>> from gymnasium.spaces import OneOf, Box, Discrete >>> observation_space = OneOf((Discrete(2), Box(-1, 1, shape=(2,))), seed=123) >>> observation_space.sample() # the first element is the space index (Box in this case) and the second element is the sample from Box - (0, 0) + (np.int64(0), np.int64(0)) >>> observation_space.sample() # this time the Discrete space was sampled as index=0 - (1, array([-0.00711833, -0.7257502 ], dtype=float32)) + (np.int64(1), array([-0.00711833, -0.7257502 ], dtype=float32)) >>> observation_space[0] Discrete(2) >>> observation_space[1] diff --git a/gymnasium/spaces/tuple.py b/gymnasium/spaces/tuple.py index b14527650..05a1f652a 100644 --- a/gymnasium/spaces/tuple.py +++ b/gymnasium/spaces/tuple.py @@ -19,7 +19,7 @@ class Tuple(Space[typing.Tuple[Any, ...]], typing.Sequence[Any]): >>> from gymnasium.spaces import Tuple, Box, Discrete >>> observation_space = Tuple((Discrete(2), Box(-1, 1, shape=(2,))), seed=42) >>> observation_space.sample() - (0, array([-0.3991573 , 0.21649833], dtype=float32)) + (np.int64(0), array([-0.3991573 , 0.21649833], dtype=float32)) """ def __init__( diff --git a/gymnasium/wrappers/stateful_observation.py b/gymnasium/wrappers/stateful_observation.py index 1ac798034..edffe3d77 100644 --- a/gymnasium/wrappers/stateful_observation.py +++ b/gymnasium/wrappers/stateful_observation.py @@ -557,9 +557,9 @@ class MaxAndSkipObservation( >>> wrapped_obs0, *_ = wrapped_env.reset(seed=123) >>> wrapped_obs1, *_ = wrapped_env.step(1) >>> np.all(obs0 == wrapped_obs0) - True + np.True_ >>> np.all(wrapped_obs1 == skip_and_max_obs) - True + np.True_ Change logs: * v1.0.0 - Initially add diff --git a/gymnasium/wrappers/stateful_reward.py b/gymnasium/wrappers/stateful_reward.py index cfcc9995e..67e2b784f 100644 --- a/gymnasium/wrappers/stateful_reward.py +++ b/gymnasium/wrappers/stateful_reward.py @@ -58,7 +58,7 @@ class NormalizeReward( ... >>> env.close() >>> np.var(episode_rewards) - 0.0008876301247721108 + np.float64(0.0008876301247721108) Example with the normalize reward wrapper: >>> import numpy as np @@ -76,7 +76,7 @@ class NormalizeReward( >>> env.close() >>> # will approach 0.99 with more episodes >>> np.var(episode_rewards) - 0.010162116476634746 + np.float64(0.010162116476634746) Change logs: * v0.21.0 - Initially added diff --git a/gymnasium/wrappers/transform_action.py b/gymnasium/wrappers/transform_action.py index 8ab5bb581..a069ab04f 100644 --- a/gymnasium/wrappers/transform_action.py +++ b/gymnasium/wrappers/transform_action.py @@ -146,7 +146,7 @@ class RescaleAction( >>> wrapped_env = RescaleAction(env, min_action=min_action, max_action=max_action) >>> wrapped_env_obs, _, _, _, _ = wrapped_env.step(max_action) >>> np.all(obs == wrapped_env_obs) - True + np.True_ Change logs: * v0.15.4 - Initially added diff --git a/gymnasium/wrappers/transform_observation.py b/gymnasium/wrappers/transform_observation.py index 0dafe69a8..824a401c3 100644 --- a/gymnasium/wrappers/transform_observation.py +++ b/gymnasium/wrappers/transform_observation.py @@ -594,11 +594,11 @@ class AddRenderObservation( >>> obs, _ = env.reset(seed=123) >>> image = env.render() >>> np.all(obs == image) - True + np.True_ >>> obs, *_ = env.step(env.action_space.sample()) >>> image = env.render() >>> np.all(obs == image) - True + np.True_ Example - Add the rendered image to the original observation as a dictionary item: >>> env = gym.make("CartPole-v1", render_mode="rgb_array") @@ -611,11 +611,11 @@ class AddRenderObservation( >>> obs["state"] array([ 0.01823519, -0.0446179 , -0.02796401, -0.03156282], dtype=float32) >>> np.all(obs["pixels"] == env.render()) - True + np.True_ >>> obs, reward, terminates, truncates, info = env.step(env.action_space.sample()) >>> image = env.render() >>> np.all(obs["pixels"] == image) - True + np.True_ Change logs: * v0.15.0 - Initially added as ``PixelObservationWrapper`` diff --git a/gymnasium/wrappers/transform_reward.py b/gymnasium/wrappers/transform_reward.py index d30248b09..b17308c25 100644 --- a/gymnasium/wrappers/transform_reward.py +++ b/gymnasium/wrappers/transform_reward.py @@ -77,7 +77,7 @@ class ClipReward(TransformReward[ObsType, ActType], gym.utils.RecordConstructorA >>> _ = env.reset() >>> _, rew, _, _, _ = env.step(1) >>> rew - 0.5 + np.float64(0.5) Change logs: * v1.0.0 - Initially added diff --git a/gymnasium/wrappers/vector/dict_info_to_list.py b/gymnasium/wrappers/vector/dict_info_to_list.py index c7afa537a..c66783fc3 100644 --- a/gymnasium/wrappers/vector/dict_info_to_list.py +++ b/gymnasium/wrappers/vector/dict_info_to_list.py @@ -54,13 +54,13 @@ class DictInfoToList(VectorWrapper): >>> _ = envs.action_space.seed(123) >>> _, _, _, _, infos = envs.step(envs.action_space.sample()) >>> infos - {'x_position': array([0.03332211, 0.10172355]), '_x_position': array([ True, True]), 'x_velocity': array([-0.06296527, 0.89345848]), '_x_velocity': array([ True, True]), 'reward_run': array([-0.06296527, 0.89345848]), '_reward_run': array([ True, True]), 'reward_ctrl': array([-0.24503503, -0.21944423]), '_reward_ctrl': array([ True, True])} + {'x_position': array([0.03332211, 0.10172355]), '_x_position': array([ True, True]), 'x_velocity': array([-0.06296527, 0.89345848]), '_x_velocity': array([ True, True]), 'reward_run': array([-0.06296527, 0.89345848]), '_reward_run': array([ True, True]), 'reward_ctrl': array([-0.24503504, -0.21944423], dtype=float32), '_reward_ctrl': array([ True, True])} >>> envs = DictInfoToList(envs) >>> _ = envs.reset(seed=123) >>> _ = envs.action_space.seed(123) >>> _, _, _, _, infos = envs.step(envs.action_space.sample()) >>> infos - [{'x_position': 0.0333221090036294, 'x_velocity': -0.06296527291998574, 'reward_run': -0.06296527291998574, 'reward_ctrl': -0.2450350284576416}, {'x_position': 0.10172354684460168, 'x_velocity': 0.8934584807363618, 'reward_run': 0.8934584807363618, 'reward_ctrl': -0.21944422721862794}] + [{'x_position': np.float64(0.0333221090036294), 'x_velocity': np.float64(-0.06296527291998574), 'reward_run': np.float64(-0.06296527291998574), 'reward_ctrl': np.float32(-0.24503504)}, {'x_position': np.float64(0.10172354684460168), 'x_velocity': np.float64(0.8934584807363618), 'reward_run': np.float64(0.8934584807363618), 'reward_ctrl': np.float32(-0.21944423)}] Change logs: * v0.24.0 - Initially added as ``VectorListInfo`` diff --git a/gymnasium/wrappers/vector/stateful_observation.py b/gymnasium/wrappers/vector/stateful_observation.py index 75a80416f..266c488d1 100644 --- a/gymnasium/wrappers/vector/stateful_observation.py +++ b/gymnasium/wrappers/vector/stateful_observation.py @@ -35,9 +35,9 @@ class NormalizeObservation(VectorObservationWrapper, gym.utils.RecordConstructor >>> for _ in range(100): ... obs, *_ = envs.step(envs.action_space.sample()) >>> np.mean(obs) - 0.024251968 + np.float32(0.024251968) >>> np.std(obs) - 0.62259156 + np.float32(0.62259156) >>> envs.close() Example with the normalize reward wrapper: @@ -49,9 +49,9 @@ class NormalizeObservation(VectorObservationWrapper, gym.utils.RecordConstructor >>> for _ in range(100): ... obs, *_ = envs.step(envs.action_space.sample()) >>> np.mean(obs) - -0.2359734 + np.float32(-0.2359734) >>> np.std(obs) - 1.1938739 + np.float32(1.1938739) >>> envs.close() """ diff --git a/gymnasium/wrappers/vector/stateful_reward.py b/gymnasium/wrappers/vector/stateful_reward.py index 14cd03f4f..2e0e8ea50 100644 --- a/gymnasium/wrappers/vector/stateful_reward.py +++ b/gymnasium/wrappers/vector/stateful_reward.py @@ -50,9 +50,9 @@ class NormalizeReward(VectorWrapper, gym.utils.RecordConstructorArgs): ... >>> envs.close() >>> np.mean(episode_rewards) - -0.03359492141887935 + np.float64(-0.03359492141887935) >>> np.std(episode_rewards) - 0.029028230434438706 + np.float64(0.029028230434438706) Example with the normalize reward wrapper: >>> import gymnasium as gym @@ -68,9 +68,9 @@ class NormalizeReward(VectorWrapper, gym.utils.RecordConstructorArgs): ... >>> envs.close() >>> np.mean(episode_rewards) - -0.1598639586606745 + np.float64(-0.1598639586606745) >>> np.std(episode_rewards) - 0.27800309628058434 + np.float64(0.27800309628058434) """ def __init__( diff --git a/gymnasium/wrappers/vector/vectorize_action.py b/gymnasium/wrappers/vector/vectorize_action.py index 1f517f663..3dc4a797a 100644 --- a/gymnasium/wrappers/vector/vectorize_action.py +++ b/gymnasium/wrappers/vector/vectorize_action.py @@ -33,7 +33,7 @@ class TransformAction(VectorActionWrapper): >>> obs array([[-0.46553135, -0.00142543], [-0.498371 , -0.00715587], - [-0.4651575 , -0.00624371]], dtype=float32) + [-0.46515748, -0.00624371]], dtype=float32) Example - With action transformation: >>> import gymnasium as gym diff --git a/gymnasium/wrappers/vector/vectorize_observation.py b/gymnasium/wrappers/vector/vectorize_observation.py index 68b5ef8b6..88bd539ad 100644 --- a/gymnasium/wrappers/vector/vectorize_observation.py +++ b/gymnasium/wrappers/vector/vectorize_observation.py @@ -321,15 +321,15 @@ class RescaleObservation(VectorizeTransformObservation): >>> envs = gym.make_vec("MountainCar-v0", num_envs=3, vectorization_mode="sync") >>> obs, info = envs.reset(seed=123) >>> obs.min() - -0.46352962 + np.float32(-0.46352962) >>> obs.max() - 0.0 + np.float32(0.0) >>> envs = RescaleObservation(envs, min_obs=-5.0, max_obs=5.0) >>> obs, info = envs.reset(seed=123) >>> obs.min() - -0.90849805 + np.float32(-0.90849805) >>> obs.max() - 0.0 + np.float32(0.0) >>> envs.close() """