Merge branch 'master' into master

Farama-Foundation · Dec 4, 2024 · 00cb35c · 00cb35c
2 parents fddc369 + 6d0a827
commit 00cb35c
Show file tree

Hide file tree

Showing 58 changed files with 638 additions and 309 deletions.
diff --git a/.github/workflows/build-publish.yml b/.github/workflows/build-publish.yml
@@ -31,6 +31,9 @@ jobs:
                     - os: ubuntu-latest
                       python: 311
                       platform: manylinux_x86_64
+                    - os: ubuntu-latest
+                      python: 312
+                      platform: manylinux_x86_64
 
         steps:
             - uses: actions/checkout@v4

diff --git a/.github/workflows/linux-test.yml b/.github/workflows/linux-test.yml
@@ -15,7 +15,7 @@ jobs:
         runs-on: ubuntu-latest
         strategy:
             matrix:
-                python-version: ['3.8', '3.9', '3.10', '3.11']
+                python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
         steps:
             - uses: actions/checkout@v4
             - name: Set up Python ${{ matrix.python-version }}

diff --git a/.github/workflows/linux-tutorials-test.yml b/.github/workflows/linux-tutorials-test.yml
@@ -15,9 +15,35 @@ jobs:
         runs-on: ubuntu-latest
         strategy:
             fail-fast: false
+
             matrix:
                 python-version: ['3.8', '3.9', '3.10', '3.11']
-                tutorial: [Tianshou, CustomEnvironment, CleanRL, SB3/kaz, SB3/waterworld, SB3/connect_four, SB3/test, AgileRL]  # TODO: fix tutorials and add back Ray
+                tutorial: [Tianshou, CustomEnvironment, CleanRL, SB3/kaz, SB3/waterworld, SB3/test]  # TODO: fix tutorials and add back Ray, fix SB3/connect_four tutorial
+        steps:
+            - uses: actions/checkout@v4
+            - name: Set up Python ${{ matrix.python-version }}
+              uses: actions/setup-python@v4
+              with:
+                  python-version: ${{ matrix.python-version }}
+            - name: Install dependencies and run tutorials
+              run: |
+                  sudo apt-get install python3-opengl xvfb parallel
+                  export PATH=/path/to/parallel:$PATH
+                  export root_dir=$(pwd)
+                  cd tutorials/${{ matrix.tutorial }}
+                  pip install -r requirements.txt
+                  pip uninstall -y pettingzoo
+                  pip install -e $root_dir[testing]
+                  AutoROM -v
+                  for f in *.py; do xvfb-run -a -s "-screen 0 1024x768x24" python "$f"; done
+
+    agilerl-tutorial-test:
+        runs-on: ubuntu-latest
+        strategy:
+            fail-fast: false
+            matrix:
+                python-version: ['3.9', '3.10', '3.11']
+                tutorial: [AgileRL]
         steps:
             - uses: actions/checkout@v4
             - name: Set up Python ${{ matrix.python-version }}

diff --git a/.github/workflows/macos-test.yml b/.github/workflows/macos-test.yml
@@ -15,7 +15,7 @@ jobs:
             matrix:
             # Big Sur, Monterey
                 os: [macos-11, macos-12]
-                python-version: ['3.8', '3.9', '3.10', '3.11']
+                python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
         steps:
             - uses: actions/checkout@v4
             - name: Set up Python ${{ matrix.python-version }}

diff --git a/README.md b/README.md
@@ -73,11 +73,6 @@ SuperSuit is a library that includes all commonly used wrappers in RL (frame sta
 
 PettingZoo keeps strict versioning for reproducibility reasons. All environments end in a suffix like "\_v0".  When changes are made to environments that might impact learning results, the number is increased by one to prevent potential confusion.
 
-## Project Maintainers
-Project Manager: [Elliot Tower](https://github.com/elliottower/)
-
-Maintenance for this project is also contributed by the broader Farama team: [farama.org/team](https://farama.org/team).
-
 ## Citation
 
 To cite this project in publication, please use
@@ -92,3 +87,6 @@ To cite this project in publication, please use
   year={2021}
 }
 ```
+## Project Maintainers
+- Project Manager: [David Gerard](https://github.com/David-GERARD) - `[email protected]`.
+- Maintenance for this project is also contributed by the broader Farama team: [farama.org/team](https://farama.org/team).
diff --git a/docs/api/aec.md b/docs/api/aec.md
@@ -94,8 +94,8 @@ The [_Agent Environment Cycle_](https://arxiv.org/abs/2009.13051) (AEC) model wa
 
 In an AEC environment, agents act sequentially, receiving updated observations and rewards before taking an action. The environment updates after each agent's step, making it a natural way of representing sequential games such as Chess. The AEC model is flexible enough to handle any type of game that multi-agent RL can consider.
 
-with the underlying environment updating after each agent's step. Agents receive updated observations and rewards at the beginning of their . The environment is updated after every step,
-This is a natural way of representing sequential games such as Chess, and
+with the underlying environment updating after each agent's step. Agents receive updated observations and rewards at the beginning of their turn. The environment is updated after every step,
+This is a natural way of representing sequential games such as Chess and Go.
 
 ```{figure} /_static/img/aec_cycle_figure.png
     :width: 480px

diff --git a/docs/api/utils.md b/docs/api/utils.md
@@ -165,7 +165,7 @@ Base class which is used by [CaptureStdoutWrapper](https://pettingzoo.farama.org
 
 The agent selector utility allows for easy cycling of agents in an AEC environment. At any time it can be reset or reinitialized with a new order, allowing for changes in turn order or handling a dynamic number of agents (see [Knights-Archers-Zombies](https://pettingzoo.farama.org/environments/butterfly/knights_archers_zombies/) for an example of spawning/killing agents)
 
-Note: while many PettingZoo environments use agent_selector to manage agent cycling internally, it is not intended to be used externally when interacting with an environment. Instead, use `for agent in env.agent_iter()` (see [AEC API Usage](https://pettingzoo.farama.org/api/aec/#usage)).
+Note: while many PettingZoo environments use AgentSelector to manage agent cycling internally, it is not intended to be used externally when interacting with an environment. Instead, use `for agent in env.agent_iter()` (see [AEC API Usage](https://pettingzoo.farama.org/api/aec/#usage)).
 
 ```{eval-rst}
 .. currentmodule:: pettingzoo.utils

diff --git a/docs/code_examples/aec_rps.py b/docs/code_examples/aec_rps.py
@@ -5,7 +5,7 @@
 from gymnasium.spaces import Discrete
 
 from pettingzoo import AECEnv
-from pettingzoo.utils import agent_selector, wrappers
+from pettingzoo.utils import AgentSelector, wrappers
 
 ROCK = 0
 PAPER = 1
@@ -156,9 +156,9 @@ def reset(self, seed=None, options=None):
         self.observations = {agent: NONE for agent in self.agents}
         self.num_moves = 0
         """
-        Our agent_selector utility allows easy cyclic stepping through the agents list.
+        Our AgentSelector utility allows easy cyclic stepping through the agents list.
         """
-        self._agent_selector = agent_selector(self.agents)
+        self._agent_selector = AgentSelector(self.agents)
         self.agent_selection = self._agent_selector.next()
 
     def step(self, action):

diff --git a/docs/code_examples/aec_rps_usage.py b/docs/code_examples/aec_rps_usage.py
@@ -1,4 +1,4 @@
-import aec_rps
+from . import aec_rps
 
 env = aec_rps.env(render_mode="human")
 env.reset(seed=42)

diff --git a/docs/code_examples/parallel_rps_usage.py b/docs/code_examples/parallel_rps_usage.py
@@ -1,4 +1,4 @@
-import parallel_rps
+from . import parallel_rps
 
 env = parallel_rps.parallel_env(render_mode="human")
 observations, infos = env.reset()

diff --git a/docs/content/environment_creation.md b/docs/content/environment_creation.md
@@ -62,14 +62,14 @@ The utils directory also contain some classes which are only helpful for develop
 
 ### Agent selector
 
-The `agent_selector` class steps through agents in a cycle
+The `AgentSelector` class steps through agents in a cycle
 
 It can be used as follows to cycle through the list of agents:
 
 ```python
-from pettingzoo.utils import agent_selector
+from pettingzoo.utils import AgentSelector
 agents = ["agent_1", "agent_2", "agent_3"]
-selector = agent_selector(agents)
+selector = AgentSelector(agents)
 agent_selection = selector.reset()
 # agent_selection will be "agent_1"
 for i in range(100):

diff --git a/docs/environments/third_party_envs.md b/docs/environments/third_party_envs.md
@@ -12,6 +12,18 @@ lastpage:
 ## Environments using the latest versions of PettingZoo
 *Due to a very recent major release of PettingZoo, there are currently few contributed third-party environments. If you'd like to contribute one, please reach out on [Discord](https://discord.gg/nHg2JRN489).*
 
+### [gfootball-gymnasium-pettingzoo](https://github.com/xihuai18/gfootball-gymnasium-pettingzoo)
+[![PettingZoo version dependency](https://img.shields.io/badge/PettingZoo-v1.24.3-blue)]()
+[![GitHub stars](https://img.shields.io/github/stars/xihuai18/gfootball-gymnasium-pettingzoo)]()
+
+Google Research Football ([GRF](https://github.com/google-research/football)) with Gymnasium and PettingZoo Compatibility.
+
+### [SMAC and SMACv2 with latest PettingZoo APIs](https://github.com/xihuai18/SMAC-PettingZoo)
+[![PettingZoo version dependency](https://img.shields.io/badge/PettingZoo-v1.24.3-blue)]()
+[![GitHub stars](https://img.shields.io/github/stars/xihuai18/gfootball-gymnasium-pettingzoo)]()
+
+[SMAC](https://github.com/oxwhirl/smac) and [SMACv2](https://github.com/oxwhirl/smacv2) with the latest PettingZoo Parallel APIs.
+
 ### [Sumo-RL](https://github.com/LucasAlegre/sumo-rl)
 
 [![PettingZoo version dependency](https://img.shields.io/badge/PettingZoo-v1.22.2-blue)]()
@@ -57,6 +69,12 @@ CookingZoo: a gym-cooking derivative to simulate a complex cooking environment.
 
 A library for doing reinforcement learning using [Crazyflie](https://www.bitcraze.io/products/crazyflie-2-1/) drones.
 
+### [DSSE: Drone Swarm Search Environment](https://github.com/pfeinsper/drone-swarm-search)
+[![PettingZoo version dependency](https://img.shields.io/badge/PettingZoo-v1.22.3-blue)]()
+![GitHub stars](https://img.shields.io/github/stars/pfeinsper/drone-swarm-search)
+
+A single and multi-agent environment to train swarms of drones for maritime search.
+
 
 ### [PettingZoo Dilemma Envs](https://github.com/tianyu-z/pettingzoo_dilemma_envs)
 

diff --git a/docs/tutorials/sb3/connect_four.md b/docs/tutorials/sb3/connect_four.md
@@ -4,6 +4,13 @@ title: "SB3: Action Masked PPO for Connect Four"
 
 # SB3: Action Masked PPO for Connect Four
 
+```{eval-rst}
+.. warning::
+
+   Currently, this tutorial doesn't work with versions of gymnasium>0.29.1. We are looking into fixing it but it might take some time.
+
+```
+
 This tutorial shows how to train a agents using Maskable [Proximal Policy Optimization](https://sb3-contrib.readthedocs.io/en/master/modules/ppo_mask.html) (PPO) on the [Connect Four](/environments/classic/chess/) environment ([AEC](/api/aec/)).
 
 It creates a custom Wrapper to convert to a [Gymnasium](https://gymnasium.farama.org/)-like environment which is compatible with [SB3 action masking](https://sb3-contrib.readthedocs.io/en/master/modules/ppo_mask.html).

diff --git a/docs/tutorials/tianshou/index.md b/docs/tutorials/tianshou/index.md
@@ -21,7 +21,7 @@ It boasts a large number of algorithms and high quality software engineering sta
 
 ## Examples using PettingZoo
 
-* [Multi-Agent RL](https://tianshou.readthedocs.io/en/master/tutorials/tictactoe.html)
+* [Multi-Agent RL](https://tianshou.org/en/master/01_tutorials/04_tictactoe.html)
 
 ## Architecture
 

diff --git a/pettingzoo/__init__.py b/pettingzoo/__init__.py
@@ -12,7 +12,7 @@
 
 os.environ["PYGAME_HIDE_SUPPORT_PROMPT"] = "hide"
 
-__version__ = "1.24.3"
+__version__ = "1.24.4"
 
 try:
     import sys

diff --git a/pettingzoo/butterfly/cooperative_pong/cooperative_pong.py b/pettingzoo/butterfly/cooperative_pong/cooperative_pong.py
@@ -79,7 +79,7 @@
 from pettingzoo.butterfly.cooperative_pong.manual_policy import ManualPolicy
 from pettingzoo.butterfly.cooperative_pong.paddle import Paddle
 from pettingzoo.utils import wrappers
-from pettingzoo.utils.agent_selector import agent_selector
+from pettingzoo.utils.agent_selector import AgentSelector
 from pettingzoo.utils.conversions import parallel_wrapper_fn
 
 FPS = 15
@@ -370,7 +370,7 @@ def __init__(self, **kwargs):
 
         self.agents = self.env.agents[:]
         self.possible_agents = self.agents[:]
-        self._agent_selector = agent_selector(self.agents)
+        self._agent_selector = AgentSelector(self.agents)
         self.agent_selection = self._agent_selector.reset()
         # spaces
         self.action_spaces = dict(zip(self.agents, self.env.action_space))

diff --git a/pettingzoo/butterfly/knights_archers_zombies/knights_archers_zombies.py b/pettingzoo/butterfly/knights_archers_zombies/knights_archers_zombies.py
@@ -194,7 +194,7 @@
 from pettingzoo.butterfly.knights_archers_zombies.src.players import Archer, Knight
 from pettingzoo.butterfly.knights_archers_zombies.src.weapons import Arrow, Sword
 from pettingzoo.butterfly.knights_archers_zombies.src.zombie import Zombie
-from pettingzoo.utils import agent_selector, wrappers
+from pettingzoo.utils import AgentSelector, wrappers
 from pettingzoo.utils.conversions import parallel_wrapper_fn
 
 sys.dont_write_bytecode = True
@@ -370,7 +370,7 @@ def __init__(
         self.floor_patch3 = get_image(os.path.join("img", "patch3.png"))
         self.floor_patch4 = get_image(os.path.join("img", "patch4.png"))
 
-        self._agent_selector = agent_selector(self.agents)
+        self._agent_selector = AgentSelector(self.agents)
         self.reinit()
 
     def observation_space(self, agent):

diff --git a/pettingzoo/butterfly/pistonball/pistonball.py b/pettingzoo/butterfly/pistonball/pistonball.py
@@ -89,7 +89,7 @@
 
 from pettingzoo import AECEnv
 from pettingzoo.butterfly.pistonball.manual_policy import ManualPolicy
-from pettingzoo.utils import agent_selector, wrappers
+from pettingzoo.utils import AgentSelector, wrappers
 from pettingzoo.utils.conversions import parallel_wrapper_fn
 
 _image_library = {}
@@ -180,7 +180,7 @@ def __init__(
         self.agents = ["piston_" + str(r) for r in range(self.n_pistons)]
         self.possible_agents = self.agents[:]
         self.agent_name_mapping = dict(zip(self.agents, list(range(self.n_pistons))))
-        self._agent_selector = agent_selector(self.agents)
+        self._agent_selector = AgentSelector(self.agents)
 
         self.observation_spaces = dict(
             zip(

diff --git a/pettingzoo/classic/chess/chess.py b/pettingzoo/classic/chess/chess.py
@@ -116,7 +116,7 @@
 from pettingzoo import AECEnv
 from pettingzoo.classic.chess import chess_utils
 from pettingzoo.utils import wrappers
-from pettingzoo.utils.agent_selector import agent_selector
+from pettingzoo.utils.agent_selector import AgentSelector
 
 
 def env(**kwargs):
@@ -144,7 +144,7 @@ def __init__(self, render_mode: str | None = None, screen_height: int | None = 8
         self.agents = [f"player_{i}" for i in range(2)]
         self.possible_agents = self.agents[:]
 
-        self._agent_selector = agent_selector(self.agents)
+        self._agent_selector = AgentSelector(self.agents)
 
         self.action_spaces = {name: spaces.Discrete(8 * 8 * 73) for name in self.agents}
         self.observation_spaces = {
@@ -238,7 +238,7 @@ def reset(self, seed=None, options=None):
 
         self.board = chess.Board()
 
-        self._agent_selector = agent_selector(self.agents)
+        self._agent_selector = AgentSelector(self.agents)
         self.agent_selection = self._agent_selector.reset()
 
         self.rewards = {name: 0 for name in self.agents}

diff --git a/pettingzoo/classic/connect_four/connect_four.py b/pettingzoo/classic/connect_four/connect_four.py
@@ -69,7 +69,7 @@
 
 from pettingzoo import AECEnv
 from pettingzoo.utils import wrappers
-from pettingzoo.utils.agent_selector import agent_selector
+from pettingzoo.utils.agent_selector import AgentSelector
 
 
 def get_image(path):
@@ -220,7 +220,7 @@ def reset(self, seed=None, options=None):
         self.truncations = {i: False for i in self.agents}
         self.infos = {i: {} for i in self.agents}
 
-        self._agent_selector = agent_selector(self.agents)
+        self._agent_selector = AgentSelector(self.agents)
 
         self.agent_selection = self._agent_selector.reset()
 

diff --git a/pettingzoo/classic/go/go.py b/pettingzoo/classic/go/go.py
@@ -81,14 +81,14 @@
 
 |                          Action ID                           | Description                                                  |
 | :----------------------------------------------------------: | ------------------------------------------------------------ |
-| <img src="https://render.githubusercontent.com/render/math?math=0 \ldots (N-1)"> | Place a stone on the 1st row of the board.<br>_`0`: (0,0), `1`: (0,1), ..., `N-1`: (0,N-1)_ |
-| <img src="https://render.githubusercontent.com/render/math?math=N \ldots (2N- 1)"> | Place a stone on the 2nd row of the board.<br>_`N`: (1,0), `N+1`: (1,1), ..., `2N-1`: (1,N-1)_ |
+| $0 \ldots (N-1)$ | Place a stone on the 1st row of the board.<br>_`0`: (0,0), `1`: (0,1), ..., `N-1`: (0,N-1)_ |
+| $N \ldots (2N- 1)$ | Place a stone on the 2nd row of the board.<br>_`N`: (1,0), `N+1`: (1,1), ..., `2N-1`: (1,N-1)_ |
 |                             ...                              | ...                                                          |
-| <img src="https://render.githubusercontent.com/render/math?math=N^2-N \ldots N^2-1"> | Place a stone on the Nth row of the board.<br>_`N^2-N`: (N-1,0), `N^2-N+1`: (N-1,1), ..., `N^2-1`: (N-1,N-1)_ |
-| <img src="https://render.githubusercontent.com/render/math?math=N^2"> | Pass                                                         |
+| $(N^2-N) \ldots (N^2-1)$ | Place a stone on the Nth row of the board.<br>_`N^2-N`: (N-1,0), `N^2-N+1`: (N-1,1), ..., `N^2-1`: (N-1,N-1)_ |
+| $N^2$ | Pass                                                         |
 
-For example, you would use action `4` to place a stone on the board at the (0,3) location or action `N^2` to pass. You can transform a non-pass action `a` back into its 2D (x,y) coordinate by computing `(a//N, a%N)` The total action space is
-<img src="https://render.githubusercontent.com/render/math?math=N^2 %2B 1">.
+For example, you would use action `4` to place a stone on the board at the (0,3) location or action `N^2` to pass. You can transform a non-pass action `a` back into its 2D (x,y) coordinate by computing `(a//N, a%N)`. The total action space is
+$N^2+1$.
 
 ### Rewards
 
@@ -119,7 +119,7 @@
 from pettingzoo import AECEnv
 from pettingzoo.classic.go import coords, go_base
 from pettingzoo.utils import wrappers
-from pettingzoo.utils.agent_selector import agent_selector
+from pettingzoo.utils.agent_selector import AgentSelector
 
 
 def get_image(path):
@@ -191,7 +191,7 @@ def __init__(
             [spaces.Discrete(self._N * self._N + 1) for _ in range(self.num_agents)]
         )
 
-        self._agent_selector = agent_selector(self.agents)
+        self._agent_selector = AgentSelector(self.agents)
 
         self.board_history = np.zeros((self._N, self._N, 16), dtype=bool)
 

diff --git a/pettingzoo/classic/hanabi/hanabi.py b/pettingzoo/classic/hanabi/hanabi.py
@@ -171,7 +171,7 @@
 
 from pettingzoo import AECEnv
 from pettingzoo.utils import wrappers
-from pettingzoo.utils.agent_selector import agent_selector
+from pettingzoo.utils.agent_selector import AgentSelector
 
 
 def env(**kwargs):
@@ -441,7 +441,7 @@ def reset(self, seed=None, options=None):
         self.truncations = self.hanabi_env.truncations
         self.infos = self.hanabi_env.infos
 
-        self._agent_selector = agent_selector(self.agents)
+        self._agent_selector = AgentSelector(self.agents)
         self.agent_selection = self._agent_selector.reset()
 
     def step(

diff --git a/pettingzoo/classic/rps/rps.py b/pettingzoo/classic/rps/rps.py
@@ -121,7 +121,7 @@
 from gymnasium.utils import EzPickle
 
 from pettingzoo import AECEnv
-from pettingzoo.utils import agent_selector, wrappers
+from pettingzoo.utils import AgentSelector, wrappers
 from pettingzoo.utils.conversions import parallel_wrapper_fn
 
 
@@ -419,7 +419,7 @@ def close(self):
 
     def reset(self, seed=None, options=None):
         self.agents = self.possible_agents[:]
-        self._agent_selector = agent_selector(self.agents)
+        self._agent_selector = AgentSelector(self.agents)
         self.agent_selection = self._agent_selector.next()
         self.rewards = {agent: 0 for agent in self.agents}
         self._cumulative_rewards = {agent: 0 for agent in self.agents}