Revert #1144 (#1153)

Farama-Foundation · Sep 3, 2024 · 82bf2f5 · 82bf2f5
1 parent 64fac8e
commit 82bf2f5
Show file tree

Hide file tree

Showing 20 changed files with 40 additions and 43 deletions.
diff --git a/.github/workflows/build-docs.yml → .github/workflows/docs-build-dev.yml b/.github/workflows/build-docs.yml → .github/workflows/docs-build-dev.yml
diff --git a/.github/workflows/docs-versioning.yml → .github/workflows/docs-build-release.yml b/.github/workflows/docs-versioning.yml → .github/workflows/docs-build-release.yml
@@ -1,10 +1,10 @@
-name: Docs Versioning
+name: Build release documentation website
 
 on:
   push:
     tags:
       - 'v?*.*.*'
-      -
+
 permissions:
   contents: write
 

diff --git a/.github/workflows/docs-manual-versioning.yml → .github/workflows/docs-manual-build.yml b/.github/workflows/docs-manual-versioning.yml → .github/workflows/docs-manual-build.yml
@@ -1,4 +1,4 @@
-name: Manual Docs Versioning
+name: Manually Build Website
 
 on:
   workflow_dispatch:

diff --git a/.github/workflows/build-publish.yml → .github/workflows/pypi-publish.yml b/.github/workflows/build-publish.yml → .github/workflows/pypi-publish.yml
@@ -4,7 +4,7 @@
 #   - https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/
 #
 # derived from https://github.com/Farama-Foundation/PettingZoo/blob/e230f4d80a5df3baf9bd905149f6d4e8ce22be31/.github/workflows/build-publish.yml
-name: build-publish
+name: Build artifact for PyPI
 
 on:
   push:
@@ -20,11 +20,7 @@ jobs:
 
     steps:
     - uses: actions/checkout@v4
-
-    - name: Set up Python
-      uses: actions/setup-python@v5
-      with:
-        python-version: '3.8'
+    - uses: actions/setup-python@v5
 
     - name: Install dependencies
       run: python -m pip install --upgrade pip setuptools build

diff --git a/.github/workflows/pre-commit.yml → .github/workflows/run-pre-commit.yml b/.github/workflows/pre-commit.yml → .github/workflows/run-pre-commit.yml
@@ -1,6 +1,6 @@
 # https://pre-commit.com
 # This GitHub Action assumes that the repo contains a valid .pre-commit-config.yaml file.
-name: pre-commit
+name: Run pre-commit
 on:
   pull_request:
   push:

diff --git a/.github/workflows/build.yml → .github/workflows/run-pytest.yml b/.github/workflows/build.yml → .github/workflows/run-pytest.yml
@@ -1,4 +1,4 @@
-name: build
+name: Run PyTest
 on: [pull_request, push]
 
 permissions:
@@ -8,6 +8,7 @@ jobs:
   build-all:
     runs-on: ubuntu-latest
     strategy:
+      fail-fast: true
       matrix:
         python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
         numpy-version: ['>=1.21,<2.0', '>=2.0']
@@ -24,7 +25,7 @@ jobs:
       - name: Run tests
         run: docker run gymnasium-all-docker pytest tests/*
       - name: Run doctests
-        # if: ${{ matrix.numpy-version == '>=2.0' }}
+        if: ${{ matrix.python-version != '3.8' }}
         run: docker run gymnasium-all-docker pytest --doctest-modules gymnasium/
 
   build-necessary:

diff --git a/gymnasium/spaces/dict.py b/gymnasium/spaces/dict.py
@@ -20,7 +20,7 @@ class Dict(Space[typing.Dict[str, Any]], typing.Mapping[str, Space[Any]]):
         >>> from gymnasium.spaces import Dict, Box, Discrete
         >>> observation_space = Dict({"position": Box(-1, 1, shape=(2,)), "color": Discrete(3)}, seed=42)
         >>> observation_space.sample()
-        {'color': 0, 'position': array([-0.3991573 ,  0.21649833], dtype=float32)}
+        {'color': np.int64(0), 'position': array([-0.3991573 ,  0.21649833], dtype=float32)}
 
         With a nested dict:
 

diff --git a/gymnasium/spaces/discrete.py b/gymnasium/spaces/discrete.py
@@ -18,10 +18,10 @@ class Discrete(Space[np.int64]):
         >>> from gymnasium.spaces import Discrete
         >>> observation_space = Discrete(2, seed=42) # {0, 1}
         >>> observation_space.sample()
-        0
+        np.int64(0)
         >>> observation_space = Discrete(3, start=-1, seed=42)  # {-1, 0, 1}
         >>> observation_space.sample()
-        -1
+        np.int64(-1)
     """
 
     def __init__(

diff --git a/gymnasium/spaces/oneof.py b/gymnasium/spaces/oneof.py
@@ -19,9 +19,9 @@ class OneOf(Space[Any]):
         >>> from gymnasium.spaces import OneOf, Box, Discrete
         >>> observation_space = OneOf((Discrete(2), Box(-1, 1, shape=(2,))), seed=123)
         >>> observation_space.sample()  # the first element is the space index (Box in this case) and the second element is the sample from Box
-        (0, 0)
+        (np.int64(0), np.int64(0))
         >>> observation_space.sample()  # this time the Discrete space was sampled as index=0
-        (1, array([-0.00711833, -0.7257502 ], dtype=float32))
+        (np.int64(1), array([-0.00711833, -0.7257502 ], dtype=float32))
         >>> observation_space[0]
         Discrete(2)
         >>> observation_space[1]

diff --git a/gymnasium/spaces/tuple.py b/gymnasium/spaces/tuple.py
@@ -19,7 +19,7 @@ class Tuple(Space[typing.Tuple[Any, ...]], typing.Sequence[Any]):
         >>> from gymnasium.spaces import Tuple, Box, Discrete
         >>> observation_space = Tuple((Discrete(2), Box(-1, 1, shape=(2,))), seed=42)
         >>> observation_space.sample()
-        (0, array([-0.3991573 ,  0.21649833], dtype=float32))
+        (np.int64(0), array([-0.3991573 ,  0.21649833], dtype=float32))
     """
 
     def __init__(

diff --git a/gymnasium/wrappers/stateful_observation.py b/gymnasium/wrappers/stateful_observation.py
@@ -557,9 +557,9 @@ class MaxAndSkipObservation(
         >>> wrapped_obs0, *_ = wrapped_env.reset(seed=123)
         >>> wrapped_obs1, *_ = wrapped_env.step(1)
         >>> np.all(obs0 == wrapped_obs0)
-        True
+        np.True_
         >>> np.all(wrapped_obs1 == skip_and_max_obs)
-        True
+        np.True_
 
     Change logs:
      * v1.0.0 - Initially add

diff --git a/gymnasium/wrappers/stateful_reward.py b/gymnasium/wrappers/stateful_reward.py
@@ -58,7 +58,7 @@ class NormalizeReward(
         ...
         >>> env.close()
         >>> np.var(episode_rewards)
-        0.0008876301247721108
+        np.float64(0.0008876301247721108)
 
     Example with the normalize reward wrapper:
         >>> import numpy as np
@@ -76,7 +76,7 @@ class NormalizeReward(
         >>> env.close()
         >>> # will approach 0.99 with more episodes
         >>> np.var(episode_rewards)
-        0.010162116476634746
+        np.float64(0.010162116476634746)
 
     Change logs:
      * v0.21.0 - Initially added

diff --git a/gymnasium/wrappers/transform_action.py b/gymnasium/wrappers/transform_action.py
@@ -146,7 +146,7 @@ class RescaleAction(
         >>> wrapped_env = RescaleAction(env, min_action=min_action, max_action=max_action)
         >>> wrapped_env_obs, _, _, _, _ = wrapped_env.step(max_action)
         >>> np.all(obs == wrapped_env_obs)
-        True
+        np.True_
 
     Change logs:
      * v0.15.4 - Initially added

diff --git a/gymnasium/wrappers/transform_observation.py b/gymnasium/wrappers/transform_observation.py
@@ -594,11 +594,11 @@ class AddRenderObservation(
         >>> obs, _ = env.reset(seed=123)
         >>> image = env.render()
         >>> np.all(obs == image)
-        True
+        np.True_
         >>> obs, *_ = env.step(env.action_space.sample())
         >>> image = env.render()
         >>> np.all(obs == image)
-        True
+        np.True_
 
     Example - Add the rendered image to the original observation as a dictionary item:
         >>> env = gym.make("CartPole-v1", render_mode="rgb_array")
@@ -611,11 +611,11 @@ class AddRenderObservation(
         >>> obs["state"]
         array([ 0.01823519, -0.0446179 , -0.02796401, -0.03156282], dtype=float32)
         >>> np.all(obs["pixels"] == env.render())
-        True
+        np.True_
         >>> obs, reward, terminates, truncates, info = env.step(env.action_space.sample())
         >>> image = env.render()
         >>> np.all(obs["pixels"] == image)
-        True
+        np.True_
 
     Change logs:
      * v0.15.0 - Initially added as ``PixelObservationWrapper``

diff --git a/gymnasium/wrappers/transform_reward.py b/gymnasium/wrappers/transform_reward.py
@@ -77,7 +77,7 @@ class ClipReward(TransformReward[ObsType, ActType], gym.utils.RecordConstructorA
         >>> _ = env.reset()
         >>> _, rew, _, _, _ = env.step(1)
         >>> rew
-        0.5
+        np.float64(0.5)
 
     Change logs:
      * v1.0.0 - Initially added

diff --git a/gymnasium/wrappers/vector/dict_info_to_list.py b/gymnasium/wrappers/vector/dict_info_to_list.py
@@ -54,13 +54,13 @@ class DictInfoToList(VectorWrapper):
         >>> _ = envs.action_space.seed(123)
         >>> _, _, _, _, infos = envs.step(envs.action_space.sample())
         >>> infos
-        {'x_position': array([0.03332211, 0.10172355]), '_x_position': array([ True,  True]), 'x_velocity': array([-0.06296527,  0.89345848]), '_x_velocity': array([ True,  True]), 'reward_run': array([-0.06296527,  0.89345848]), '_reward_run': array([ True,  True]), 'reward_ctrl': array([-0.24503503, -0.21944423]), '_reward_ctrl': array([ True,  True])}
+        {'x_position': array([0.03332211, 0.10172355]), '_x_position': array([ True,  True]), 'x_velocity': array([-0.06296527,  0.89345848]), '_x_velocity': array([ True,  True]), 'reward_run': array([-0.06296527,  0.89345848]), '_reward_run': array([ True,  True]), 'reward_ctrl': array([-0.24503504, -0.21944423], dtype=float32), '_reward_ctrl': array([ True,  True])}
         >>> envs = DictInfoToList(envs)
         >>> _ = envs.reset(seed=123)
         >>> _ = envs.action_space.seed(123)
         >>> _, _, _, _, infos = envs.step(envs.action_space.sample())
         >>> infos
-        [{'x_position': 0.0333221090036294, 'x_velocity': -0.06296527291998574, 'reward_run': -0.06296527291998574, 'reward_ctrl': -0.2450350284576416}, {'x_position': 0.10172354684460168, 'x_velocity': 0.8934584807363618, 'reward_run': 0.8934584807363618, 'reward_ctrl': -0.21944422721862794}]
+        [{'x_position': np.float64(0.0333221090036294), 'x_velocity': np.float64(-0.06296527291998574), 'reward_run': np.float64(-0.06296527291998574), 'reward_ctrl': np.float32(-0.24503504)}, {'x_position': np.float64(0.10172354684460168), 'x_velocity': np.float64(0.8934584807363618), 'reward_run': np.float64(0.8934584807363618), 'reward_ctrl': np.float32(-0.21944423)}]
 
     Change logs:
      * v0.24.0 - Initially added as ``VectorListInfo``

diff --git a/gymnasium/wrappers/vector/stateful_observation.py b/gymnasium/wrappers/vector/stateful_observation.py
@@ -35,9 +35,9 @@ class NormalizeObservation(VectorObservationWrapper, gym.utils.RecordConstructor
         >>> for _ in range(100):
         ...     obs, *_ = envs.step(envs.action_space.sample())
         >>> np.mean(obs)
-        0.024251968
+        np.float32(0.024251968)
         >>> np.std(obs)
-        0.62259156
+        np.float32(0.62259156)
         >>> envs.close()
 
     Example with the normalize reward wrapper:
@@ -49,9 +49,9 @@ class NormalizeObservation(VectorObservationWrapper, gym.utils.RecordConstructor
         >>> for _ in range(100):
         ...     obs, *_ = envs.step(envs.action_space.sample())
         >>> np.mean(obs)
-        -0.2359734
+        np.float32(-0.2359734)
         >>> np.std(obs)
-        1.1938739
+        np.float32(1.1938739)
         >>> envs.close()
     """
 

diff --git a/gymnasium/wrappers/vector/stateful_reward.py b/gymnasium/wrappers/vector/stateful_reward.py
@@ -50,9 +50,9 @@ class NormalizeReward(VectorWrapper, gym.utils.RecordConstructorArgs):
         ...
         >>> envs.close()
         >>> np.mean(episode_rewards)
-        -0.03359492141887935
+        np.float64(-0.03359492141887935)
         >>> np.std(episode_rewards)
-        0.029028230434438706
+        np.float64(0.029028230434438706)
 
     Example with the normalize reward wrapper:
         >>> import gymnasium as gym
@@ -68,9 +68,9 @@ class NormalizeReward(VectorWrapper, gym.utils.RecordConstructorArgs):
         ...
         >>> envs.close()
         >>> np.mean(episode_rewards)
-        -0.1598639586606745
+        np.float64(-0.1598639586606745)
         >>> np.std(episode_rewards)
-        0.27800309628058434
+        np.float64(0.27800309628058434)
     """
 
     def __init__(

diff --git a/gymnasium/wrappers/vector/vectorize_action.py b/gymnasium/wrappers/vector/vectorize_action.py
@@ -33,7 +33,7 @@ class TransformAction(VectorActionWrapper):
         >>> obs
         array([[-0.46553135, -0.00142543],
                [-0.498371  , -0.00715587],
-               [-0.4651575 , -0.00624371]], dtype=float32)
+               [-0.46515748, -0.00624371]], dtype=float32)
 
     Example - With action transformation:
         >>> import gymnasium as gym

diff --git a/gymnasium/wrappers/vector/vectorize_observation.py b/gymnasium/wrappers/vector/vectorize_observation.py
@@ -321,15 +321,15 @@ class RescaleObservation(VectorizeTransformObservation):
         >>> envs = gym.make_vec("MountainCar-v0", num_envs=3, vectorization_mode="sync")
         >>> obs, info = envs.reset(seed=123)
         >>> obs.min()
-        -0.46352962
+        np.float32(-0.46352962)
         >>> obs.max()
-        0.0
+        np.float32(0.0)
         >>> envs = RescaleObservation(envs, min_obs=-5.0, max_obs=5.0)
         >>> obs, info = envs.reset(seed=123)
         >>> obs.min()
-        -0.90849805
+        np.float32(-0.90849805)
         >>> obs.max()
-        0.0
+        np.float32(0.0)
         >>> envs.close()
     """