diff --git a/.github/workflows/build-publish.yml b/.github/workflows/build-publish.yml index 88079fe4e..a4b69870a 100644 --- a/.github/workflows/build-publish.yml +++ b/.github/workflows/build-publish.yml @@ -31,6 +31,9 @@ jobs: - os: ubuntu-latest python: 311 platform: manylinux_x86_64 + - os: ubuntu-latest + python: 312 + platform: manylinux_x86_64 steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/linux-test.yml b/.github/workflows/linux-test.yml index 7a7139b6b..536cd2e72 100644 --- a/.github/workflows/linux-test.yml +++ b/.github/workflows/linux-test.yml @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.8', '3.9', '3.10', '3.11'] + python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} diff --git a/.github/workflows/linux-tutorials-test.yml b/.github/workflows/linux-tutorials-test.yml index 858203d13..d559302eb 100644 --- a/.github/workflows/linux-tutorials-test.yml +++ b/.github/workflows/linux-tutorials-test.yml @@ -15,9 +15,35 @@ jobs: runs-on: ubuntu-latest strategy: fail-fast: false + matrix: python-version: ['3.8', '3.9', '3.10', '3.11'] - tutorial: [Tianshou, CustomEnvironment, CleanRL, SB3/kaz, SB3/waterworld, SB3/connect_four, SB3/test, AgileRL] # TODO: fix tutorials and add back Ray + tutorial: [Tianshou, CustomEnvironment, CleanRL, SB3/kaz, SB3/waterworld, SB3/test] # TODO: fix tutorials and add back Ray, fix SB3/connect_four tutorial + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies and run tutorials + run: | + sudo apt-get install python3-opengl xvfb parallel + export PATH=/path/to/parallel:$PATH + export root_dir=$(pwd) + cd tutorials/${{ matrix.tutorial }} + pip install -r requirements.txt + pip uninstall -y pettingzoo + pip install -e $root_dir[testing] + AutoROM -v + for f in *.py; do xvfb-run -a -s "-screen 0 1024x768x24" python "$f"; done + + agilerl-tutorial-test: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ['3.9', '3.10', '3.11'] + tutorial: [AgileRL] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} diff --git a/.github/workflows/macos-test.yml b/.github/workflows/macos-test.yml index 82e7a3d15..83d91809a 100644 --- a/.github/workflows/macos-test.yml +++ b/.github/workflows/macos-test.yml @@ -15,7 +15,7 @@ jobs: matrix: # Big Sur, Monterey os: [macos-11, macos-12] - python-version: ['3.8', '3.9', '3.10', '3.11'] + python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} diff --git a/README.md b/README.md index 395c9cbe9..adb30215b 100644 --- a/README.md +++ b/README.md @@ -73,11 +73,6 @@ SuperSuit is a library that includes all commonly used wrappers in RL (frame sta PettingZoo keeps strict versioning for reproducibility reasons. All environments end in a suffix like "\_v0". When changes are made to environments that might impact learning results, the number is increased by one to prevent potential confusion. -## Project Maintainers -Project Manager: [Elliot Tower](https://github.com/elliottower/) - -Maintenance for this project is also contributed by the broader Farama team: [farama.org/team](https://farama.org/team). - ## Citation To cite this project in publication, please use @@ -92,3 +87,6 @@ To cite this project in publication, please use year={2021} } ``` +## Project Maintainers +- Project Manager: [David Gerard](https://github.com/David-GERARD) - `david.gerard.23@ucl.ac.uk`. +- Maintenance for this project is also contributed by the broader Farama team: [farama.org/team](https://farama.org/team). diff --git a/docs/api/aec.md b/docs/api/aec.md index 8396c71c9..9248adccc 100644 --- a/docs/api/aec.md +++ b/docs/api/aec.md @@ -94,8 +94,8 @@ The [_Agent Environment Cycle_](https://arxiv.org/abs/2009.13051) (AEC) model wa In an AEC environment, agents act sequentially, receiving updated observations and rewards before taking an action. The environment updates after each agent's step, making it a natural way of representing sequential games such as Chess. The AEC model is flexible enough to handle any type of game that multi-agent RL can consider. -with the underlying environment updating after each agent's step. Agents receive updated observations and rewards at the beginning of their . The environment is updated after every step, -This is a natural way of representing sequential games such as Chess, and +with the underlying environment updating after each agent's step. Agents receive updated observations and rewards at the beginning of their turn. The environment is updated after every step, +This is a natural way of representing sequential games such as Chess and Go. ```{figure} /_static/img/aec_cycle_figure.png :width: 480px diff --git a/docs/api/utils.md b/docs/api/utils.md index abc9d01fc..0b0e319cb 100644 --- a/docs/api/utils.md +++ b/docs/api/utils.md @@ -165,7 +165,7 @@ Base class which is used by [CaptureStdoutWrapper](https://pettingzoo.farama.org The agent selector utility allows for easy cycling of agents in an AEC environment. At any time it can be reset or reinitialized with a new order, allowing for changes in turn order or handling a dynamic number of agents (see [Knights-Archers-Zombies](https://pettingzoo.farama.org/environments/butterfly/knights_archers_zombies/) for an example of spawning/killing agents) -Note: while many PettingZoo environments use agent_selector to manage agent cycling internally, it is not intended to be used externally when interacting with an environment. Instead, use `for agent in env.agent_iter()` (see [AEC API Usage](https://pettingzoo.farama.org/api/aec/#usage)). +Note: while many PettingZoo environments use AgentSelector to manage agent cycling internally, it is not intended to be used externally when interacting with an environment. Instead, use `for agent in env.agent_iter()` (see [AEC API Usage](https://pettingzoo.farama.org/api/aec/#usage)). ```{eval-rst} .. currentmodule:: pettingzoo.utils diff --git a/docs/code_examples/aec_rps.py b/docs/code_examples/aec_rps.py index 7ae982167..7272f75bd 100644 --- a/docs/code_examples/aec_rps.py +++ b/docs/code_examples/aec_rps.py @@ -5,7 +5,7 @@ from gymnasium.spaces import Discrete from pettingzoo import AECEnv -from pettingzoo.utils import agent_selector, wrappers +from pettingzoo.utils import AgentSelector, wrappers ROCK = 0 PAPER = 1 @@ -156,9 +156,9 @@ def reset(self, seed=None, options=None): self.observations = {agent: NONE for agent in self.agents} self.num_moves = 0 """ - Our agent_selector utility allows easy cyclic stepping through the agents list. + Our AgentSelector utility allows easy cyclic stepping through the agents list. """ - self._agent_selector = agent_selector(self.agents) + self._agent_selector = AgentSelector(self.agents) self.agent_selection = self._agent_selector.next() def step(self, action): diff --git a/docs/code_examples/aec_rps_usage.py b/docs/code_examples/aec_rps_usage.py index 71edc4e73..da7d2111d 100644 --- a/docs/code_examples/aec_rps_usage.py +++ b/docs/code_examples/aec_rps_usage.py @@ -1,4 +1,4 @@ -import aec_rps +from . import aec_rps env = aec_rps.env(render_mode="human") env.reset(seed=42) diff --git a/docs/code_examples/parallel_rps_usage.py b/docs/code_examples/parallel_rps_usage.py index 38949eb78..a75aa153d 100644 --- a/docs/code_examples/parallel_rps_usage.py +++ b/docs/code_examples/parallel_rps_usage.py @@ -1,4 +1,4 @@ -import parallel_rps +from . import parallel_rps env = parallel_rps.parallel_env(render_mode="human") observations, infos = env.reset() diff --git a/docs/content/environment_creation.md b/docs/content/environment_creation.md index 8b4332872..4347c49c7 100644 --- a/docs/content/environment_creation.md +++ b/docs/content/environment_creation.md @@ -62,14 +62,14 @@ The utils directory also contain some classes which are only helpful for develop ### Agent selector -The `agent_selector` class steps through agents in a cycle +The `AgentSelector` class steps through agents in a cycle It can be used as follows to cycle through the list of agents: ```python -from pettingzoo.utils import agent_selector +from pettingzoo.utils import AgentSelector agents = ["agent_1", "agent_2", "agent_3"] -selector = agent_selector(agents) +selector = AgentSelector(agents) agent_selection = selector.reset() # agent_selection will be "agent_1" for i in range(100): diff --git a/docs/environments/third_party_envs.md b/docs/environments/third_party_envs.md index aeca31fb9..167f14de9 100644 --- a/docs/environments/third_party_envs.md +++ b/docs/environments/third_party_envs.md @@ -12,6 +12,18 @@ lastpage: ## Environments using the latest versions of PettingZoo *Due to a very recent major release of PettingZoo, there are currently few contributed third-party environments. If you'd like to contribute one, please reach out on [Discord](https://discord.gg/nHg2JRN489).* +### [gfootball-gymnasium-pettingzoo](https://github.com/xihuai18/gfootball-gymnasium-pettingzoo) +[![PettingZoo version dependency](https://img.shields.io/badge/PettingZoo-v1.24.3-blue)]() +[![GitHub stars](https://img.shields.io/github/stars/xihuai18/gfootball-gymnasium-pettingzoo)]() + +Google Research Football ([GRF](https://github.com/google-research/football)) with Gymnasium and PettingZoo Compatibility. + +### [SMAC and SMACv2 with latest PettingZoo APIs](https://github.com/xihuai18/SMAC-PettingZoo) +[![PettingZoo version dependency](https://img.shields.io/badge/PettingZoo-v1.24.3-blue)]() +[![GitHub stars](https://img.shields.io/github/stars/xihuai18/gfootball-gymnasium-pettingzoo)]() + +[SMAC](https://github.com/oxwhirl/smac) and [SMACv2](https://github.com/oxwhirl/smacv2) with the latest PettingZoo Parallel APIs. + ### [Sumo-RL](https://github.com/LucasAlegre/sumo-rl) [![PettingZoo version dependency](https://img.shields.io/badge/PettingZoo-v1.22.2-blue)]() @@ -57,6 +69,12 @@ CookingZoo: a gym-cooking derivative to simulate a complex cooking environment. A library for doing reinforcement learning using [Crazyflie](https://www.bitcraze.io/products/crazyflie-2-1/) drones. +### [DSSE: Drone Swarm Search Environment](https://github.com/pfeinsper/drone-swarm-search) +[![PettingZoo version dependency](https://img.shields.io/badge/PettingZoo-v1.22.3-blue)]() +![GitHub stars](https://img.shields.io/github/stars/pfeinsper/drone-swarm-search) + +A single and multi-agent environment to train swarms of drones for maritime search. + ### [PettingZoo Dilemma Envs](https://github.com/tianyu-z/pettingzoo_dilemma_envs) diff --git a/docs/tutorials/sb3/connect_four.md b/docs/tutorials/sb3/connect_four.md index 8b85f8cca..eef34deac 100644 --- a/docs/tutorials/sb3/connect_four.md +++ b/docs/tutorials/sb3/connect_four.md @@ -4,6 +4,13 @@ title: "SB3: Action Masked PPO for Connect Four" # SB3: Action Masked PPO for Connect Four +```{eval-rst} +.. warning:: + + Currently, this tutorial doesn't work with versions of gymnasium>0.29.1. We are looking into fixing it but it might take some time. + +``` + This tutorial shows how to train a agents using Maskable [Proximal Policy Optimization](https://sb3-contrib.readthedocs.io/en/master/modules/ppo_mask.html) (PPO) on the [Connect Four](/environments/classic/chess/) environment ([AEC](/api/aec/)). It creates a custom Wrapper to convert to a [Gymnasium](https://gymnasium.farama.org/)-like environment which is compatible with [SB3 action masking](https://sb3-contrib.readthedocs.io/en/master/modules/ppo_mask.html). diff --git a/docs/tutorials/tianshou/index.md b/docs/tutorials/tianshou/index.md index eef3a7d0c..1a879f12c 100644 --- a/docs/tutorials/tianshou/index.md +++ b/docs/tutorials/tianshou/index.md @@ -21,7 +21,7 @@ It boasts a large number of algorithms and high quality software engineering sta ## Examples using PettingZoo -* [Multi-Agent RL](https://tianshou.readthedocs.io/en/master/tutorials/tictactoe.html) +* [Multi-Agent RL](https://tianshou.org/en/master/01_tutorials/04_tictactoe.html) ## Architecture diff --git a/pettingzoo/__init__.py b/pettingzoo/__init__.py index bef7e58da..4000bdbdc 100644 --- a/pettingzoo/__init__.py +++ b/pettingzoo/__init__.py @@ -12,7 +12,7 @@ os.environ["PYGAME_HIDE_SUPPORT_PROMPT"] = "hide" -__version__ = "1.24.3" +__version__ = "1.24.4" try: import sys diff --git a/pettingzoo/butterfly/cooperative_pong/cooperative_pong.py b/pettingzoo/butterfly/cooperative_pong/cooperative_pong.py index 0751a12e7..4573769fc 100644 --- a/pettingzoo/butterfly/cooperative_pong/cooperative_pong.py +++ b/pettingzoo/butterfly/cooperative_pong/cooperative_pong.py @@ -79,7 +79,7 @@ from pettingzoo.butterfly.cooperative_pong.manual_policy import ManualPolicy from pettingzoo.butterfly.cooperative_pong.paddle import Paddle from pettingzoo.utils import wrappers -from pettingzoo.utils.agent_selector import agent_selector +from pettingzoo.utils.agent_selector import AgentSelector from pettingzoo.utils.conversions import parallel_wrapper_fn FPS = 15 @@ -370,7 +370,7 @@ def __init__(self, **kwargs): self.agents = self.env.agents[:] self.possible_agents = self.agents[:] - self._agent_selector = agent_selector(self.agents) + self._agent_selector = AgentSelector(self.agents) self.agent_selection = self._agent_selector.reset() # spaces self.action_spaces = dict(zip(self.agents, self.env.action_space)) diff --git a/pettingzoo/butterfly/knights_archers_zombies/knights_archers_zombies.py b/pettingzoo/butterfly/knights_archers_zombies/knights_archers_zombies.py index 0f21753e6..68a9bdfdc 100644 --- a/pettingzoo/butterfly/knights_archers_zombies/knights_archers_zombies.py +++ b/pettingzoo/butterfly/knights_archers_zombies/knights_archers_zombies.py @@ -194,7 +194,7 @@ from pettingzoo.butterfly.knights_archers_zombies.src.players import Archer, Knight from pettingzoo.butterfly.knights_archers_zombies.src.weapons import Arrow, Sword from pettingzoo.butterfly.knights_archers_zombies.src.zombie import Zombie -from pettingzoo.utils import agent_selector, wrappers +from pettingzoo.utils import AgentSelector, wrappers from pettingzoo.utils.conversions import parallel_wrapper_fn sys.dont_write_bytecode = True @@ -370,7 +370,7 @@ def __init__( self.floor_patch3 = get_image(os.path.join("img", "patch3.png")) self.floor_patch4 = get_image(os.path.join("img", "patch4.png")) - self._agent_selector = agent_selector(self.agents) + self._agent_selector = AgentSelector(self.agents) self.reinit() def observation_space(self, agent): diff --git a/pettingzoo/butterfly/pistonball/pistonball.py b/pettingzoo/butterfly/pistonball/pistonball.py index b15ea2872..65415593b 100644 --- a/pettingzoo/butterfly/pistonball/pistonball.py +++ b/pettingzoo/butterfly/pistonball/pistonball.py @@ -89,7 +89,7 @@ from pettingzoo import AECEnv from pettingzoo.butterfly.pistonball.manual_policy import ManualPolicy -from pettingzoo.utils import agent_selector, wrappers +from pettingzoo.utils import AgentSelector, wrappers from pettingzoo.utils.conversions import parallel_wrapper_fn _image_library = {} @@ -180,7 +180,7 @@ def __init__( self.agents = ["piston_" + str(r) for r in range(self.n_pistons)] self.possible_agents = self.agents[:] self.agent_name_mapping = dict(zip(self.agents, list(range(self.n_pistons)))) - self._agent_selector = agent_selector(self.agents) + self._agent_selector = AgentSelector(self.agents) self.observation_spaces = dict( zip( diff --git a/pettingzoo/classic/chess/chess.py b/pettingzoo/classic/chess/chess.py index 5100f8fc3..81b2ccb31 100644 --- a/pettingzoo/classic/chess/chess.py +++ b/pettingzoo/classic/chess/chess.py @@ -116,7 +116,7 @@ from pettingzoo import AECEnv from pettingzoo.classic.chess import chess_utils from pettingzoo.utils import wrappers -from pettingzoo.utils.agent_selector import agent_selector +from pettingzoo.utils.agent_selector import AgentSelector def env(**kwargs): @@ -144,7 +144,7 @@ def __init__(self, render_mode: str | None = None, screen_height: int | None = 8 self.agents = [f"player_{i}" for i in range(2)] self.possible_agents = self.agents[:] - self._agent_selector = agent_selector(self.agents) + self._agent_selector = AgentSelector(self.agents) self.action_spaces = {name: spaces.Discrete(8 * 8 * 73) for name in self.agents} self.observation_spaces = { @@ -238,7 +238,7 @@ def reset(self, seed=None, options=None): self.board = chess.Board() - self._agent_selector = agent_selector(self.agents) + self._agent_selector = AgentSelector(self.agents) self.agent_selection = self._agent_selector.reset() self.rewards = {name: 0 for name in self.agents} diff --git a/pettingzoo/classic/connect_four/connect_four.py b/pettingzoo/classic/connect_four/connect_four.py index e2a2390e9..48ce61ce1 100644 --- a/pettingzoo/classic/connect_four/connect_four.py +++ b/pettingzoo/classic/connect_four/connect_four.py @@ -69,7 +69,7 @@ from pettingzoo import AECEnv from pettingzoo.utils import wrappers -from pettingzoo.utils.agent_selector import agent_selector +from pettingzoo.utils.agent_selector import AgentSelector def get_image(path): @@ -220,7 +220,7 @@ def reset(self, seed=None, options=None): self.truncations = {i: False for i in self.agents} self.infos = {i: {} for i in self.agents} - self._agent_selector = agent_selector(self.agents) + self._agent_selector = AgentSelector(self.agents) self.agent_selection = self._agent_selector.reset() diff --git a/pettingzoo/classic/go/go.py b/pettingzoo/classic/go/go.py index 3360f520e..d9a865c67 100644 --- a/pettingzoo/classic/go/go.py +++ b/pettingzoo/classic/go/go.py @@ -81,14 +81,14 @@ | Action ID | Description | | :----------------------------------------------------------: | ------------------------------------------------------------ | -| | Place a stone on the 1st row of the board.
_`0`: (0,0), `1`: (0,1), ..., `N-1`: (0,N-1)_ | -| | Place a stone on the 2nd row of the board.
_`N`: (1,0), `N+1`: (1,1), ..., `2N-1`: (1,N-1)_ | +| $0 \ldots (N-1)$ | Place a stone on the 1st row of the board.
_`0`: (0,0), `1`: (0,1), ..., `N-1`: (0,N-1)_ | +| $N \ldots (2N- 1)$ | Place a stone on the 2nd row of the board.
_`N`: (1,0), `N+1`: (1,1), ..., `2N-1`: (1,N-1)_ | | ... | ... | -| | Place a stone on the Nth row of the board.
_`N^2-N`: (N-1,0), `N^2-N+1`: (N-1,1), ..., `N^2-1`: (N-1,N-1)_ | -| | Pass | +| $(N^2-N) \ldots (N^2-1)$ | Place a stone on the Nth row of the board.
_`N^2-N`: (N-1,0), `N^2-N+1`: (N-1,1), ..., `N^2-1`: (N-1,N-1)_ | +| $N^2$ | Pass | -For example, you would use action `4` to place a stone on the board at the (0,3) location or action `N^2` to pass. You can transform a non-pass action `a` back into its 2D (x,y) coordinate by computing `(a//N, a%N)` The total action space is -. +For example, you would use action `4` to place a stone on the board at the (0,3) location or action `N^2` to pass. You can transform a non-pass action `a` back into its 2D (x,y) coordinate by computing `(a//N, a%N)`. The total action space is +$N^2+1$. ### Rewards @@ -119,7 +119,7 @@ from pettingzoo import AECEnv from pettingzoo.classic.go import coords, go_base from pettingzoo.utils import wrappers -from pettingzoo.utils.agent_selector import agent_selector +from pettingzoo.utils.agent_selector import AgentSelector def get_image(path): @@ -191,7 +191,7 @@ def __init__( [spaces.Discrete(self._N * self._N + 1) for _ in range(self.num_agents)] ) - self._agent_selector = agent_selector(self.agents) + self._agent_selector = AgentSelector(self.agents) self.board_history = np.zeros((self._N, self._N, 16), dtype=bool) diff --git a/pettingzoo/classic/hanabi/hanabi.py b/pettingzoo/classic/hanabi/hanabi.py index bd2f7480f..bd4441401 100644 --- a/pettingzoo/classic/hanabi/hanabi.py +++ b/pettingzoo/classic/hanabi/hanabi.py @@ -171,7 +171,7 @@ from pettingzoo import AECEnv from pettingzoo.utils import wrappers -from pettingzoo.utils.agent_selector import agent_selector +from pettingzoo.utils.agent_selector import AgentSelector def env(**kwargs): @@ -441,7 +441,7 @@ def reset(self, seed=None, options=None): self.truncations = self.hanabi_env.truncations self.infos = self.hanabi_env.infos - self._agent_selector = agent_selector(self.agents) + self._agent_selector = AgentSelector(self.agents) self.agent_selection = self._agent_selector.reset() def step( diff --git a/pettingzoo/classic/rps/rps.py b/pettingzoo/classic/rps/rps.py index 1b9eb6ad6..83c5abb3f 100644 --- a/pettingzoo/classic/rps/rps.py +++ b/pettingzoo/classic/rps/rps.py @@ -121,7 +121,7 @@ from gymnasium.utils import EzPickle from pettingzoo import AECEnv -from pettingzoo.utils import agent_selector, wrappers +from pettingzoo.utils import AgentSelector, wrappers from pettingzoo.utils.conversions import parallel_wrapper_fn @@ -419,7 +419,7 @@ def close(self): def reset(self, seed=None, options=None): self.agents = self.possible_agents[:] - self._agent_selector = agent_selector(self.agents) + self._agent_selector = AgentSelector(self.agents) self.agent_selection = self._agent_selector.next() self.rewards = {agent: 0 for agent in self.agents} self._cumulative_rewards = {agent: 0 for agent in self.agents} diff --git a/pettingzoo/classic/tictactoe/board.py b/pettingzoo/classic/tictactoe/board.py index 35186a57a..e6fee6853 100644 --- a/pettingzoo/classic/tictactoe/board.py +++ b/pettingzoo/classic/tictactoe/board.py @@ -1,79 +1,102 @@ +TTT_PLAYER1_WIN = 0 +TTT_PLAYER2_WIN = 1 +TTT_TIE = -1 +TTT_GAME_NOT_OVER = -2 + + class Board: + """Board for a TicTacToe Game. + + This tracks the position and identity of marks on the game board + and allows checking for a winner. + + Example of usage: + + import random + board = Board() + + # random legal moves - for example purposes + def choose_move(board_obj: Board) -> int: + legal_moves = [i for i, mark in enumerate(board_obj.squares) if mark == 0] + return random.choice(legal_moves) + + player = 0 + while True: + move = choose_move(board) + board.play_turn(player, move) + status = board.game_status() + if status != TTT_GAME_NOT_OVER: + if status in [TTT_PLAYER1_WIN, TTT_PLAYER2_WIN]: + print(f"player {status} won") + else: # status == TTT_TIE + print("Tie Game") + break + player = player ^ 1 # swaps between players 0 and 1 + """ + + # indices of the winning lines: vertical(x3), horizontal(x3), diagonal(x2) + winning_combinations = [ + (0, 1, 2), + (3, 4, 5), + (6, 7, 8), + (0, 3, 6), + (1, 4, 7), + (2, 5, 8), + (0, 4, 8), + (2, 4, 6), + ] + def __init__(self): - # internally self.board.squares holds a flat representation of tic tac toe board - # where an empty board is [0, 0, 0, 0, 0, 0, 0, 0, 0] - # where indexes are column wise order + # self.squares holds a flat representation of the tic tac toe board. + # an empty board is [0, 0, 0, 0, 0, 0, 0, 0, 0]. + # player 1's squares are marked 1, while player 2's are marked 2. + # mapping of the flat indices to the 3x3 grid is as follows: # 0 3 6 # 1 4 7 # 2 5 8 - - # empty -- 0 - # player 0 -- 1 - # player 1 -- 2 self.squares = [0] * 9 - # precommute possible winning combinations - self.calculate_winners() + @property + def _n_empty_squares(self): + """The current number of empty squares on the board.""" + return self.squares.count(0) - def setup(self): - self.calculate_winners() + def reset(self): + """Remove all marks from the board.""" + self.squares = [0] * 9 def play_turn(self, agent, pos): - # if spot is empty - if self.squares[pos] != 0: - return - if agent == 0: - self.squares[pos] = 1 - elif agent == 1: - self.squares[pos] = 2 - return - - def calculate_winners(self): - winning_combinations = [] - indices = [x for x in range(0, 9)] - - # Vertical combinations - winning_combinations += [ - tuple(indices[i : (i + 3)]) for i in range(0, len(indices), 3) - ] - - # Horizontal combinations - winning_combinations += [ - tuple(indices[x] for x in range(y, len(indices), 3)) for y in range(0, 3) - ] - - # Diagonal combinations - winning_combinations.append(tuple(x for x in range(0, len(indices), 4))) - winning_combinations.append(tuple(x for x in range(2, len(indices) - 1, 2))) - - self.winning_combinations = winning_combinations - - # returns: - # -1 for no winner - # 1 -- agent 0 wins - # 2 -- agent 1 wins - def check_for_winner(self): - winner = -1 - for combination in self.winning_combinations: - states = [] - for index in combination: - states.append(self.squares[index]) - if all(x == 1 for x in states): - winner = 1 - if all(x == 2 for x in states): - winner = 2 - return winner - - def check_game_over(self): - winner = self.check_for_winner() - - if winner == -1 and all(square in [1, 2] for square in self.squares): - # tie - return True - elif winner in [1, 2]: - return True - else: - return False + """Place a mark by the agent in the spot given. + + The following are required for a move to be valid: + * The agent must be a known agent ID (either 0 or 1). + * The spot must be be empty. + * The spot must be in the board (integer: 0 <= spot <= 8) + + If any of those are not true, an assertion will fail. + """ + assert pos >= 0 and pos <= 8, "Invalid move location" + assert agent in [0, 1], "Invalid agent" + assert self.squares[pos] == 0, "Location is not empty" + + # agent is [0, 1]. board values are stored as [1, 2]. + self.squares[pos] = agent + 1 + + def game_status(self): + """Return status (winner, TTT_TIE if no winner, or TTT_GAME_NOT_OVER).""" + for indices in self.winning_combinations: + states = [self.squares[idx] for idx in indices] + if states == [1, 1, 1]: + return TTT_PLAYER1_WIN + if states == [2, 2, 2]: + return TTT_PLAYER2_WIN + if self._n_empty_squares == 0: + return TTT_TIE + return TTT_GAME_NOT_OVER def __str__(self): return str(self.squares) + + def legal_moves(self): + """Return list of legal moves (as flat indices for spaces on the board).""" + return [i for i, mark in enumerate(self.squares) if mark == 0] diff --git a/pettingzoo/classic/tictactoe/test_board.py b/pettingzoo/classic/tictactoe/test_board.py new file mode 100644 index 000000000..b8f7e9248 --- /dev/null +++ b/pettingzoo/classic/tictactoe/test_board.py @@ -0,0 +1,127 @@ +"""Test cases for TicTacToe board.""" + +from __future__ import annotations + +from typing import Any + +import pytest + +from pettingzoo.classic.tictactoe.board import ( # type: ignore + TTT_GAME_NOT_OVER, + TTT_PLAYER1_WIN, + TTT_PLAYER2_WIN, + TTT_TIE, + Board, +) + +# Note: mapping of moves to board positions are: +# 0 3 6 +# 1 4 7 +# 2 5 8 + +agent2_win = { + "moves": [ + # agent_id, position, board after move + (0, 4, [0, 0, 0, 0, 1, 0, 0, 0, 0]), + (1, 0, [2, 0, 0, 0, 1, 0, 0, 0, 0]), + (0, 2, [2, 0, 1, 0, 1, 0, 0, 0, 0]), + (1, 6, [2, 0, 1, 0, 1, 0, 2, 0, 0]), + (0, 3, [2, 0, 1, 1, 1, 0, 2, 0, 0]), + (1, 7, [2, 0, 1, 1, 1, 0, 2, 2, 0]), + (0, 1, [2, 1, 1, 1, 1, 0, 2, 2, 0]), + (1, 8, [2, 1, 1, 1, 1, 0, 2, 2, 2]), # agent 2 wins here + (0, 5, [2, 1, 1, 1, 1, 1, 2, 2, 2]), + ], + "max_step": 7, # should not get past here + "winner": TTT_PLAYER2_WIN, +} + +tie = { + "moves": [ # should be tie + (0, 0, [1, 0, 0, 0, 0, 0, 0, 0, 0]), + (1, 3, [1, 0, 0, 2, 0, 0, 0, 0, 0]), + (0, 1, [1, 1, 0, 2, 0, 0, 0, 0, 0]), + (1, 4, [1, 1, 0, 2, 2, 0, 0, 0, 0]), + (0, 5, [1, 1, 0, 2, 2, 1, 0, 0, 0]), + (1, 2, [1, 1, 2, 2, 2, 1, 0, 0, 0]), + (0, 6, [1, 1, 2, 2, 2, 1, 1, 0, 0]), + (1, 7, [1, 1, 2, 2, 2, 1, 1, 2, 0]), + (0, 8, [1, 1, 2, 2, 2, 1, 1, 2, 1]), + ], + "max_step": 8, + "winner": TTT_TIE, +} + +agent1_win = { + "moves": [ + (0, 0, [1, 0, 0, 0, 0, 0, 0, 0, 0]), + (1, 3, [1, 0, 0, 2, 0, 0, 0, 0, 0]), + (0, 1, [1, 1, 0, 2, 0, 0, 0, 0, 0]), + (1, 4, [1, 1, 0, 2, 2, 0, 0, 0, 0]), + (0, 2, [1, 1, 1, 2, 2, 0, 0, 0, 0]), # agent 1 should win here + (1, 5, [1, 1, 1, 2, 2, 2, 0, 0, 0]), + (0, 6, [1, 1, 1, 2, 2, 2, 1, 0, 0]), + (1, 7, [1, 1, 1, 2, 2, 2, 1, 2, 0]), + (0, 8, [1, 1, 1, 2, 2, 2, 1, 2, 1]), + ], + "max_step": 4, + "winner": TTT_PLAYER1_WIN, +} + + +@pytest.mark.parametrize("values", [agent1_win, agent2_win, tie]) +def test_tictactoe_board_games(values: dict[str, Any]) -> None: + """Test that TicTacToe games go as expected.""" + expected_winner = values["winner"] + max_step = values["max_step"] + + board = Board() + for i, (agent, pos, board_layout) in enumerate(values["moves"]): + assert i <= max_step, "max step exceed in tictactoe game" + board.play_turn(agent, pos) + assert board_layout == board.squares, "wrong tictactoe layout after move" + status = board.game_status() + if status != TTT_GAME_NOT_OVER: + assert i == max_step, "tictactoe game ended on wrong step" + assert status == expected_winner, "wrong winner in tictactoe board test" + break + + +def test_tictactoe_winning_boards() -> None: + """Test that winning board configurations actually win.""" + # these are the winning lines for player 1. Note that moves + # for player 2 are included to make it a legal board. + winning_lines = [ # vertical(x3), horizontal(x3), diagonal(x2) + [1, 1, 1, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 1, 1, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 1, 1, 1], + [1, 0, 0, 1, 0, 0, 1, 0, 0], + [0, 1, 0, 0, 1, 0, 0, 1, 0], + [0, 0, 1, 0, 0, 1, 0, 0, 1], + [1, 0, 0, 0, 1, 0, 0, 0, 1], + [0, 0, 1, 0, 1, 0, 1, 0, 0], + ] + for line in winning_lines: + board = Board() + board.squares = line + assert board.game_status() == TTT_PLAYER1_WIN, "Bad win check in TicTacToe" + + +def test_tictactoe_bad_move() -> None: + """Test that illegal TicTacToe moves are rejected.""" + board = Board() + # 1) move out of bounds should be rejected + for outside_space in [-1, 9]: + with pytest.raises(AssertionError, match="Invalid move location"): + board.play_turn(0, outside_space) + + # 2) move by unknown agent should be rejected + for unknown_agent in [-1, 2]: + with pytest.raises(AssertionError, match="Invalid agent"): + board.play_turn(unknown_agent, 0) + + # 3) move in occupied space by either agent should be rejected + board.play_turn(0, 4) # this is fine + for agent in [0, 1]: + with pytest.raises(AssertionError, match="Location is not empty"): + board.play_turn(agent, 4) # repeating move is not valid diff --git a/pettingzoo/classic/tictactoe/tictactoe.py b/pettingzoo/classic/tictactoe/tictactoe.py index 45d357b6f..e3c219c5a 100644 --- a/pettingzoo/classic/tictactoe/tictactoe.py +++ b/pettingzoo/classic/tictactoe/tictactoe.py @@ -79,11 +79,12 @@ from gymnasium.utils import EzPickle from pettingzoo import AECEnv -from pettingzoo.classic.tictactoe.board import Board -from pettingzoo.utils import agent_selector, wrappers +from pettingzoo.classic.tictactoe.board import TTT_GAME_NOT_OVER, TTT_TIE, Board +from pettingzoo.utils import AgentSelector, wrappers def get_image(path): + """Return a pygame image loaded from the given path.""" from os import path as os_path cwd = os_path.dirname(__file__) @@ -92,6 +93,7 @@ def get_image(path): def get_font(path, size): + """Return a pygame font loaded from the given path.""" from os import path as os_path cwd = os_path.dirname(__file__) @@ -141,9 +143,9 @@ def __init__( self.rewards = {i: 0 for i in self.agents} self.terminations = {i: False for i in self.agents} self.truncations = {i: False for i in self.agents} - self.infos = {i: {"legal_moves": list(range(0, 9))} for i in self.agents} + self.infos = {i: {} for i in self.agents} - self._agent_selector = agent_selector(self.agents) + self._agent_selector = AgentSelector(self.agents) self.agent_selection = self._agent_selector.reset() self.render_mode = render_mode @@ -153,42 +155,38 @@ def __init__( if self.render_mode == "human": self.clock = pygame.time.Clock() - # Key - # ---- - # blank space = 0 - # agent 0 = 1 - # agent 1 = 2 - # An observation is list of lists, where each list represents a row - # - # [[0,0,2] - # [1,2,1] - # [2,1,0]] def observe(self, agent): board_vals = np.array(self.board.squares).reshape(3, 3) cur_player = self.possible_agents.index(agent) opp_player = (cur_player + 1) % 2 - cur_p_board = np.equal(board_vals, cur_player + 1) - opp_p_board = np.equal(board_vals, opp_player + 1) - - observation = np.stack([cur_p_board, opp_p_board], axis=2).astype(np.int8) - legal_moves = self._legal_moves() if agent == self.agent_selection else [] + observation = np.empty((3, 3, 2), dtype=np.int8) + # this will give a copy of the board that is 1 for player 1's + # marks and zero for every other square, whether empty or not. + observation[:, :, 0] = np.equal(board_vals, cur_player + 1) + observation[:, :, 1] = np.equal(board_vals, opp_player + 1) - action_mask = np.zeros(9, "int8") - for i in legal_moves: - action_mask[i] = 1 + action_mask = self._get_mask(agent) return {"observation": observation, "action_mask": action_mask} + def _get_mask(self, agent): + action_mask = np.zeros(9, dtype=np.int8) + + # Per the documentation, the mask of any agent other than the + # currently selected one is all zeros. + if agent == self.agent_selection: + for i in self.board.legal_moves(): + action_mask[i] = 1 + + return action_mask + def observation_space(self, agent): return self.observation_spaces[agent] def action_space(self, agent): return self.action_spaces[agent] - def _legal_moves(self): - return [i for i in range(len(self.board.squares)) if self.board.squares[i] == 0] - # action in this case is a value from 0 to 8 indicating position to move on tictactoe board def step(self, action): if ( @@ -196,45 +194,30 @@ def step(self, action): or self.truncations[self.agent_selection] ): return self._was_dead_step(action) - # check if input action is a valid move (0 == empty spot) - assert self.board.squares[action] == 0, "played illegal move" - # play turn - self.board.play_turn(self.agents.index(self.agent_selection), action) - - # update infos - # list of valid actions (indexes in board) - # next_agent = self.agents[(self.agents.index(self.agent_selection) + 1) % len(self.agents)] - next_agent = self._agent_selector.next() - if self.board.check_game_over(): - winner = self.board.check_for_winner() + self.board.play_turn(self.agents.index(self.agent_selection), action) - if winner == -1: - # tie + status = self.board.game_status() + if status != TTT_GAME_NOT_OVER: + if status == TTT_TIE: pass - elif winner == 1: - # agent 0 won - self.rewards[self.agents[0]] += 1 - self.rewards[self.agents[1]] -= 1 else: - # agent 1 won - self.rewards[self.agents[1]] += 1 - self.rewards[self.agents[0]] -= 1 + winner = status # either TTT_PLAYER1_WIN or TTT_PLAYER2_WIN + loser = winner ^ 1 # 0 -> 1; 1 -> 0 + self.rewards[self.agents[winner]] += 1 + self.rewards[self.agents[loser]] -= 1 # once either play wins or there is a draw, game over, both players are done self.terminations = {i: True for i in self.agents} + self._accumulate_rewards() - # Switch selection to next agents - self._cumulative_rewards[self.agent_selection] = 0 - self.agent_selection = next_agent + self.agent_selection = self._agent_selector.next() - self._accumulate_rewards() if self.render_mode == "human": self.render() def reset(self, seed=None, options=None): - # reset environment - self.board = Board() + self.board.reset() self.agents = self.possible_agents[:] self.rewards = {i: 0 for i in self.agents} @@ -244,10 +227,9 @@ def reset(self, seed=None, options=None): self.infos = {i: {} for i in self.agents} # selects the first agent self._agent_selector.reinit(self.agents) - self._agent_selector.reset() self.agent_selection = self._agent_selector.reset() - if self.screen is None: + if self.render_mode is not None and self.screen is None: pygame.init() if self.render_mode == "human": @@ -255,7 +237,7 @@ def reset(self, seed=None, options=None): (self.screen_height, self.screen_height) ) pygame.display.set_caption("Tic-Tac-Toe") - else: + elif self.render_mode == "rgb_array": self.screen = pygame.Surface((self.screen_height, self.screen_height)) def close(self): diff --git a/pettingzoo/mpe/_mpe_utils/simple_env.py b/pettingzoo/mpe/_mpe_utils/simple_env.py index 6d420fe76..6cc9bb3d2 100644 --- a/pettingzoo/mpe/_mpe_utils/simple_env.py +++ b/pettingzoo/mpe/_mpe_utils/simple_env.py @@ -9,7 +9,7 @@ from pettingzoo import AECEnv from pettingzoo.mpe._mpe_utils.core import Agent from pettingzoo.utils import wrappers -from pettingzoo.utils.agent_selector import agent_selector +from pettingzoo.utils.agent_selector import AgentSelector alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" @@ -42,6 +42,7 @@ def __init__( render_mode=None, continuous_actions=False, local_ratio=None, + dynamic_rescaling=False, ): super().__init__() @@ -66,6 +67,7 @@ def __init__( self.world = world self.continuous_actions = continuous_actions self.local_ratio = local_ratio + self.dynamic_rescaling = dynamic_rescaling self.scenario.reset_world(self.world, self.np_random) @@ -75,7 +77,7 @@ def __init__( agent.name: idx for idx, agent in enumerate(self.world.agents) } - self._agent_selector = agent_selector(self.agents) + self._agent_selector = AgentSelector(self.agents) # set spaces self.action_spaces = dict() @@ -116,6 +118,11 @@ def __init__( dtype=np.float32, ) + # Get the original cam_range + # This will be used to scale the rendering + all_poses = [entity.state.p_pos for entity in self.world.entities] + self.original_cam_range = np.max(np.abs(np.array(all_poses))) + self.steps = 0 self.current_actions = [None] * self.num_agents @@ -295,6 +302,10 @@ def draw(self): all_poses = [entity.state.p_pos for entity in self.world.entities] cam_range = np.max(np.abs(np.array(all_poses))) + # The scaling factor is used for dynamic rescaling of the rendering - a.k.a Zoom In/Zoom Out effect + # The 0.9 is a factor to keep the entities from appearing "too" out-of-bounds + scaling_factor = 0.9 * self.original_cam_range / cam_range + # update geometry and text positions text_line = 0 for e, entity in enumerate(self.world.entities): @@ -309,12 +320,15 @@ def draw(self): y = (y / cam_range) * self.height // 2 * 0.9 x += self.width // 2 y += self.height // 2 - pygame.draw.circle( - self.screen, entity.color * 200, (x, y), entity.size * 350 - ) # 350 is an arbitrary scale factor to get pygame to render similar sizes as pyglet - pygame.draw.circle( - self.screen, (0, 0, 0), (x, y), entity.size * 350, 1 - ) # borders + + # 350 is an arbitrary scale factor to get pygame to render similar sizes as pyglet + if self.dynamic_rescaling: + radius = entity.size * 350 * scaling_factor + else: + radius = entity.size * 350 + + pygame.draw.circle(self.screen, entity.color * 200, (x, y), radius) + pygame.draw.circle(self.screen, (0, 0, 0), (x, y), radius, 1) # borders assert ( 0 < x < self.width and 0 < y < self.height ), f"Coordinates {(x, y)} are out of bounds." diff --git a/pettingzoo/mpe/simple/simple.py b/pettingzoo/mpe/simple/simple.py index b9d6f255a..7431c4fb1 100644 --- a/pettingzoo/mpe/simple/simple.py +++ b/pettingzoo/mpe/simple/simple.py @@ -31,7 +31,7 @@ ### Arguments ``` python -simple_v3.env(max_cycles=25, continuous_actions=False) +simple_v3.env(max_cycles=25, continuous_actions=False, dynamic_rescaling=False) ``` @@ -40,6 +40,8 @@ `continuous_actions`: Whether agent action spaces are discrete(default) or continuous +`dynamic_rescaling`: Whether to rescale the size of agents and landmarks based on the screen size + """ import numpy as np @@ -52,7 +54,13 @@ class raw_env(SimpleEnv, EzPickle): - def __init__(self, max_cycles=25, continuous_actions=False, render_mode=None): + def __init__( + self, + max_cycles=25, + continuous_actions=False, + render_mode=None, + dynamic_rescaling=False, + ): EzPickle.__init__( self, max_cycles=max_cycles, @@ -68,6 +76,7 @@ def __init__(self, max_cycles=25, continuous_actions=False, render_mode=None): render_mode=render_mode, max_cycles=max_cycles, continuous_actions=continuous_actions, + dynamic_rescaling=dynamic_rescaling, ) self.metadata["name"] = "simple_v3" diff --git a/pettingzoo/mpe/simple_adversary/simple_adversary.py b/pettingzoo/mpe/simple_adversary/simple_adversary.py index 674790c38..cf7a38499 100644 --- a/pettingzoo/mpe/simple_adversary/simple_adversary.py +++ b/pettingzoo/mpe/simple_adversary/simple_adversary.py @@ -39,7 +39,7 @@ ### Arguments ``` python -simple_adversary_v3.env(N=2, max_cycles=25, continuous_actions=False) +simple_adversary_v3.env(N=2, max_cycles=25, continuous_actions=False, dynamic_rescaling=False) ``` @@ -50,6 +50,8 @@ `continuous_actions`: Whether agent action spaces are discrete(default) or continuous +`dynamic_rescaling`: Whether to rescale the size of agents and landmarks based on the screen size + """ import numpy as np @@ -62,7 +64,14 @@ class raw_env(SimpleEnv, EzPickle): - def __init__(self, N=2, max_cycles=25, continuous_actions=False, render_mode=None): + def __init__( + self, + N=2, + max_cycles=25, + continuous_actions=False, + render_mode=None, + dynamic_rescaling=False, + ): EzPickle.__init__( self, N=N, @@ -79,6 +88,7 @@ def __init__(self, N=2, max_cycles=25, continuous_actions=False, render_mode=Non render_mode=render_mode, max_cycles=max_cycles, continuous_actions=continuous_actions, + dynamic_rescaling=dynamic_rescaling, ) self.metadata["name"] = "simple_adversary_v3" diff --git a/pettingzoo/mpe/simple_crypto/simple_crypto.py b/pettingzoo/mpe/simple_crypto/simple_crypto.py index 66a8d2ad1..f74b5f0d1 100644 --- a/pettingzoo/mpe/simple_crypto/simple_crypto.py +++ b/pettingzoo/mpe/simple_crypto/simple_crypto.py @@ -45,7 +45,7 @@ ### Arguments ``` python -simple_crypto_v3.env(max_cycles=25, continuous_actions=False) +simple_crypto_v3.env(max_cycles=25, continuous_actions=False, dynamic_rescaling=False) ``` @@ -54,6 +54,8 @@ `continuous_actions`: Whether agent action spaces are discrete(default) or continuous +`dynamic_rescaling`: Whether to rescale the size of agents and landmarks based on the screen size + """ import numpy as np @@ -73,7 +75,13 @@ class raw_env(SimpleEnv, EzPickle): - def __init__(self, max_cycles=25, continuous_actions=False, render_mode=None): + def __init__( + self, + max_cycles=25, + continuous_actions=False, + render_mode=None, + dynamic_rescaling=False, + ): EzPickle.__init__( self, max_cycles=max_cycles, @@ -89,6 +97,7 @@ def __init__(self, max_cycles=25, continuous_actions=False, render_mode=None): render_mode=render_mode, max_cycles=max_cycles, continuous_actions=continuous_actions, + dynamic_rescaling=dynamic_rescaling, ) self.metadata["name"] = "simple_crypto_v3" diff --git a/pettingzoo/mpe/simple_push/simple_push.py b/pettingzoo/mpe/simple_push/simple_push.py index 1a11a98d8..46b352803 100644 --- a/pettingzoo/mpe/simple_push/simple_push.py +++ b/pettingzoo/mpe/simple_push/simple_push.py @@ -38,13 +38,16 @@ ### Arguments ``` python -simple_push_v3.env(max_cycles=25, continuous_actions=False) +simple_push_v3.env(max_cycles=25, continuous_actions=False, dynamic_rescaling=False) ``` `max_cycles`: number of frames (a step for each agent) until game terminates +`dynamic_rescaling`: Whether to rescale the size of agents and landmarks based on the screen size + + """ import numpy as np @@ -57,7 +60,13 @@ class raw_env(SimpleEnv, EzPickle): - def __init__(self, max_cycles=25, continuous_actions=False, render_mode=None): + def __init__( + self, + max_cycles=25, + continuous_actions=False, + render_mode=None, + dynamic_rescaling=False, + ): EzPickle.__init__( self, max_cycles=max_cycles, @@ -73,6 +82,7 @@ def __init__(self, max_cycles=25, continuous_actions=False, render_mode=None): render_mode=render_mode, max_cycles=max_cycles, continuous_actions=continuous_actions, + dynamic_rescaling=dynamic_rescaling, ) self.metadata["name"] = "simple_push_v3" diff --git a/pettingzoo/mpe/simple_reference/simple_reference.py b/pettingzoo/mpe/simple_reference/simple_reference.py index a934b9014..d058e7d21 100644 --- a/pettingzoo/mpe/simple_reference/simple_reference.py +++ b/pettingzoo/mpe/simple_reference/simple_reference.py @@ -40,7 +40,7 @@ ``` python -simple_reference_v3.env(local_ratio=0.5, max_cycles=25, continuous_actions=False) +simple_reference_v3.env(local_ratio=0.5, max_cycles=25, continuous_actions=False, dynamic_rescaling=False) ``` @@ -51,6 +51,8 @@ `continuous_actions`: Whether agent action spaces are discrete(default) or continuous +`dynamic_rescaling`: Whether to rescale the size of agents and landmarks based on the screen size + """ import numpy as np @@ -64,7 +66,12 @@ class raw_env(SimpleEnv, EzPickle): def __init__( - self, local_ratio=0.5, max_cycles=25, continuous_actions=False, render_mode=None + self, + local_ratio=0.5, + max_cycles=25, + continuous_actions=False, + render_mode=None, + dynamic_rescaling=False, ): EzPickle.__init__( self, @@ -86,6 +93,7 @@ def __init__( max_cycles=max_cycles, continuous_actions=continuous_actions, local_ratio=local_ratio, + dynamic_rescaling=dynamic_rescaling, ) self.metadata["name"] = "simple_reference_v3" diff --git a/pettingzoo/mpe/simple_speaker_listener/simple_speaker_listener.py b/pettingzoo/mpe/simple_speaker_listener/simple_speaker_listener.py index fbfbe9c85..4fc09e6a3 100644 --- a/pettingzoo/mpe/simple_speaker_listener/simple_speaker_listener.py +++ b/pettingzoo/mpe/simple_speaker_listener/simple_speaker_listener.py @@ -37,7 +37,7 @@ ### Arguments ``` python -simple_speaker_listener_v4.env(max_cycles=25, continuous_actions=False) +simple_speaker_listener_v4.env(max_cycles=25, continuous_actions=False, dynamic_rescaling=False) ``` @@ -46,6 +46,8 @@ `continuous_actions`: Whether agent action spaces are discrete(default) or continuous +`dynamic_rescaling`: Whether to rescale the size of agents and landmarks based on the screen size + """ import numpy as np @@ -58,7 +60,13 @@ class raw_env(SimpleEnv, EzPickle): - def __init__(self, max_cycles=25, continuous_actions=False, render_mode=None): + def __init__( + self, + max_cycles=25, + continuous_actions=False, + render_mode=None, + dynamic_rescaling=False, + ): EzPickle.__init__( self, max_cycles=max_cycles, @@ -74,6 +82,7 @@ def __init__(self, max_cycles=25, continuous_actions=False, render_mode=None): render_mode=render_mode, max_cycles=max_cycles, continuous_actions=continuous_actions, + dynamic_rescaling=dynamic_rescaling, ) self.metadata["name"] = "simple_speaker_listener_v4" diff --git a/pettingzoo/mpe/simple_spread/simple_spread.py b/pettingzoo/mpe/simple_spread/simple_spread.py index 83e79e53e..4313780ae 100644 --- a/pettingzoo/mpe/simple_spread/simple_spread.py +++ b/pettingzoo/mpe/simple_spread/simple_spread.py @@ -36,7 +36,7 @@ ### Arguments ``` python -simple_spread_v3.env(N=3, local_ratio=0.5, max_cycles=25, continuous_actions=False) +simple_spread_v3.env(N=3, local_ratio=0.5, max_cycles=25, continuous_actions=False, dynamic_rescaling=False) ``` @@ -49,6 +49,8 @@ `continuous_actions`: Whether agent action spaces are discrete(default) or continuous +`dynamic_rescaling`: Whether to rescale the size of agents and landmarks based on the screen size + """ import numpy as np @@ -68,6 +70,7 @@ def __init__( max_cycles=25, continuous_actions=False, render_mode=None, + dynamic_rescaling=False, ): EzPickle.__init__( self, @@ -90,6 +93,7 @@ def __init__( max_cycles=max_cycles, continuous_actions=continuous_actions, local_ratio=local_ratio, + dynamic_rescaling=dynamic_rescaling, ) self.metadata["name"] = "simple_spread_v3" diff --git a/pettingzoo/mpe/simple_tag/simple_tag.py b/pettingzoo/mpe/simple_tag/simple_tag.py index 7727eb425..1f6c3b48f 100644 --- a/pettingzoo/mpe/simple_tag/simple_tag.py +++ b/pettingzoo/mpe/simple_tag/simple_tag.py @@ -45,7 +45,7 @@ def bound(x): ### Arguments ``` python -simple_tag_v3.env(num_good=1, num_adversaries=3, num_obstacles=2, max_cycles=25, continuous_actions=False) +simple_tag_v3.env(num_good=1, num_adversaries=3, num_obstacles=2, max_cycles=25, continuous_actions=False, dynamic_rescaling=False) ``` @@ -60,6 +60,8 @@ def bound(x): `continuous_actions`: Whether agent action spaces are discrete(default) or continuous +`dynamic_rescaling`: Whether to rescale the size of agents and landmarks based on the screen size + """ import numpy as np @@ -80,6 +82,7 @@ def __init__( max_cycles=25, continuous_actions=False, render_mode=None, + dynamic_rescaling=False, ): EzPickle.__init__( self, @@ -99,6 +102,7 @@ def __init__( render_mode=render_mode, max_cycles=max_cycles, continuous_actions=continuous_actions, + dynamic_rescaling=dynamic_rescaling, ) self.metadata["name"] = "simple_tag_v3" diff --git a/pettingzoo/mpe/simple_world_comm/simple_world_comm.py b/pettingzoo/mpe/simple_world_comm/simple_world_comm.py index 598c0d23e..0f2932743 100644 --- a/pettingzoo/mpe/simple_world_comm/simple_world_comm.py +++ b/pettingzoo/mpe/simple_world_comm/simple_world_comm.py @@ -30,11 +30,11 @@ In particular, the good agents reward, is -5 for every collision with an adversary, -2 x bound by the `bound` function described in simple_tag, +2 for every collision with a food, and -0.05 x minimum distance to any food. The adversarial agents are rewarded +5 for collisions and -0.1 x minimum distance to a good agent. s -Good agent observations: `[self_vel, self_pos, landmark_rel_positions, other_agent_rel_positions, other_agent_velocities, self_in_forest]` +Good agent observations: `[self_vel, self_pos, landmark_rel_positions, other_agent_rel_positions, self_in_forest, other_agent_velocities]` Normal adversary observations:`[self_vel, self_pos, landmark_rel_positions, other_agent_rel_positions, other_agent_velocities, self_in_forest, leader_comm]` -Adversary leader observations: `[self_vel, self_pos, landmark_rel_positions, other_agent_rel_positions, other_agent_velocities, leader_comm]` +Adversary leader observations: `[self_vel, self_pos, landmark_rel_positions, other_agent_rel_positions, other_agent_velocities, self_in_forest, leader_comm]` *Note that when the forests prevent an agent from being seen, the observation of that agents relative position is set to (0,0).* @@ -52,7 +52,7 @@ ``` python simple_world_comm_v3.env(num_good=2, num_adversaries=4, num_obstacles=1, - num_food=2, max_cycles=25, num_forests=2, continuous_actions=False) + num_food=2, max_cycles=25, num_forests=2, continuous_actions=False, dynamic_rescaling=False) ``` @@ -71,6 +71,8 @@ `continuous_actions`: Whether agent action spaces are discrete(default) or continuous +`dynamic_rescaling`: Whether to rescale the size of agents and landmarks based on the screen size + """ import numpy as np @@ -93,6 +95,7 @@ def __init__( num_forests=2, continuous_actions=False, render_mode=None, + dynamic_rescaling=False, ): EzPickle.__init__( self, @@ -116,6 +119,7 @@ def __init__( render_mode=render_mode, max_cycles=max_cycles, continuous_actions=continuous_actions, + dynamic_rescaling=dynamic_rescaling, ) self.metadata["name"] = "simple_world_comm_v3" diff --git a/pettingzoo/sisl/multiwalker/multiwalker.py b/pettingzoo/sisl/multiwalker/multiwalker.py index 8edf250d1..30adb9fe0 100644 --- a/pettingzoo/sisl/multiwalker/multiwalker.py +++ b/pettingzoo/sisl/multiwalker/multiwalker.py @@ -125,7 +125,7 @@ from pettingzoo import AECEnv from pettingzoo.sisl.multiwalker.multiwalker_base import FPS from pettingzoo.sisl.multiwalker.multiwalker_base import MultiWalkerEnv as _env -from pettingzoo.utils import agent_selector, wrappers +from pettingzoo.utils import AgentSelector, wrappers from pettingzoo.utils.conversions import parallel_wrapper_fn @@ -156,7 +156,7 @@ def __init__(self, *args, **kwargs): self.agent_name_mapping = dict( zip(self.agents, list(range(self.env.n_walkers))) ) - self._agent_selector = agent_selector(self.agents) + self._agent_selector = AgentSelector(self.agents) # spaces self.action_spaces = dict(zip(self.agents, self.env.action_space)) self.observation_spaces = dict(zip(self.agents, self.env.observation_space)) diff --git a/pettingzoo/sisl/pursuit/pursuit.py b/pettingzoo/sisl/pursuit/pursuit.py index c75728d31..c68f189bb 100644 --- a/pettingzoo/sisl/pursuit/pursuit.py +++ b/pettingzoo/sisl/pursuit/pursuit.py @@ -85,7 +85,7 @@ from pettingzoo import AECEnv from pettingzoo.sisl.pursuit.manual_policy import ManualPolicy from pettingzoo.sisl.pursuit.pursuit_base import Pursuit as _env -from pettingzoo.utils import agent_selector, wrappers +from pettingzoo.utils import AgentSelector, wrappers from pettingzoo.utils.conversions import parallel_wrapper_fn __all__ = ["ManualPolicy", "env", "parallel_env", "raw_env"] @@ -118,7 +118,7 @@ def __init__(self, *args, **kwargs): self.agents = ["pursuer_" + str(a) for a in range(self.env.num_agents)] self.possible_agents = self.agents[:] self.agent_name_mapping = dict(zip(self.agents, list(range(self.num_agents)))) - self._agent_selector = agent_selector(self.agents) + self._agent_selector = AgentSelector(self.agents) # spaces self.n_act_agents = self.env.act_dims[0] self.action_spaces = dict(zip(self.agents, self.env.action_space)) diff --git a/pettingzoo/sisl/waterworld/waterworld.py b/pettingzoo/sisl/waterworld/waterworld.py index d2de2eb21..df1e31549 100644 --- a/pettingzoo/sisl/waterworld/waterworld.py +++ b/pettingzoo/sisl/waterworld/waterworld.py @@ -33,7 +33,7 @@ poison respectively. The number of features per sensor is 8 by default with `speed_features` enabled, or 5 if `speed_features` is turned off. Therefore with `speed_features` enabled, the observation shape takes the full form of `(8 × n_sensors) + 2`. Elements of the observation vector take on values in the range [-1, 1]. -For example, by default there are 5 agents (purple), 5 food targets (red) and 10 poison targets (green). Each agent has 30 range-limited sensors, depicted by the black lines, to detect neighboring entities (food and poison targets) resulting in 242 element vector of computed values about the +For example, by default there are 5 agents (purple), 5 food targets (green) and 10 poison targets (red). Each agent has 30 range-limited sensors, depicted by the black lines, to detect neighboring entities (food and poison targets) resulting in 242 element vector of computed values about the environment for the observation space. These values represent the distances and speeds sensed by each sensor on the archea. Sensors that do not sense any objects within their range report 0 for speed and 1 for distance. This has been fixed from the reference environments to keep items floating off screen and being lost forever. @@ -141,7 +141,7 @@ from pettingzoo import AECEnv from pettingzoo.sisl.waterworld.waterworld_base import FPS from pettingzoo.sisl.waterworld.waterworld_base import WaterworldBase as _env -from pettingzoo.utils import agent_selector, wrappers +from pettingzoo.utils import AgentSelector, wrappers from pettingzoo.utils.conversions import parallel_wrapper_fn @@ -171,7 +171,7 @@ def __init__(self, *args, **kwargs): self.agents = ["pursuer_" + str(r) for r in range(self.env.num_agents)] self.possible_agents = self.agents[:] self.agent_name_mapping = dict(zip(self.agents, list(range(self.num_agents)))) - self._agent_selector = agent_selector(self.agents) + self._agent_selector = AgentSelector(self.agents) # spaces self.action_spaces = dict(zip(self.agents, self.env.action_space)) diff --git a/pettingzoo/sisl/waterworld/waterworld_base.py b/pettingzoo/sisl/waterworld/waterworld_base.py index b6705b76b..7c82da4e4 100644 --- a/pettingzoo/sisl/waterworld/waterworld_base.py +++ b/pettingzoo/sisl/waterworld/waterworld_base.py @@ -313,6 +313,8 @@ def draw(self): def add_handlers(self): # Collision handlers for pursuers v.s. evaders & poisons + self.handlers = [] + for pursuer in self.pursuers: for obj in self.evaders: self.handlers.append( diff --git a/pettingzoo/test/example_envs/generated_agents_env_action_mask_info_v0.py b/pettingzoo/test/example_envs/generated_agents_env_action_mask_info_v0.py index 2985a07c6..1c48d6083 100644 --- a/pettingzoo/test/example_envs/generated_agents_env_action_mask_info_v0.py +++ b/pettingzoo/test/example_envs/generated_agents_env_action_mask_info_v0.py @@ -5,7 +5,7 @@ from pettingzoo import AECEnv from pettingzoo.utils import wrappers -from pettingzoo.utils.agent_selector import agent_selector +from pettingzoo.utils.agent_selector import AgentSelector def env(): @@ -105,7 +105,7 @@ def reset(self, seed=None, options=None): for i in range(5): self.add_agent(self.np_random.choice(self.types)) - self._agent_selector = agent_selector(self.agents) + self._agent_selector = AgentSelector(self.agents) self.agent_selection = self._agent_selector.reset() # seed observation and action spaces diff --git a/pettingzoo/test/example_envs/generated_agents_env_action_mask_obs_v0.py b/pettingzoo/test/example_envs/generated_agents_env_action_mask_obs_v0.py index b7cbf2b30..726afa6a9 100644 --- a/pettingzoo/test/example_envs/generated_agents_env_action_mask_obs_v0.py +++ b/pettingzoo/test/example_envs/generated_agents_env_action_mask_obs_v0.py @@ -5,7 +5,7 @@ from pettingzoo import AECEnv from pettingzoo.utils import wrappers -from pettingzoo.utils.agent_selector import agent_selector +from pettingzoo.utils.agent_selector import AgentSelector def env(): @@ -107,7 +107,7 @@ def reset(self, seed=None, options=None): for i in range(5): self.add_agent(self.np_random.choice(self.types)) - self._agent_selector = agent_selector(self.agents) + self._agent_selector = AgentSelector(self.agents) self.agent_selection = self._agent_selector.reset() # seed observation and action spaces diff --git a/pettingzoo/test/example_envs/generated_agents_env_cust_agentid_v0.py b/pettingzoo/test/example_envs/generated_agents_env_cust_agentid_v0.py index 7f307d5e8..5b966b174 100644 --- a/pettingzoo/test/example_envs/generated_agents_env_cust_agentid_v0.py +++ b/pettingzoo/test/example_envs/generated_agents_env_cust_agentid_v0.py @@ -5,7 +5,7 @@ from pettingzoo import AECEnv from pettingzoo.utils import wrappers -from pettingzoo.utils.agent_selector import agent_selector +from pettingzoo.utils.agent_selector import AgentSelector def env(): @@ -99,7 +99,7 @@ def reset(self, seed=None, options=None): for i in range(5): self.add_agent(self.np_random.choice(self.types)) - self._agent_selector = agent_selector(self.agents) + self._agent_selector = AgentSelector(self.agents) self.agent_selection = self._agent_selector.reset() # seed observation and action spaces diff --git a/pettingzoo/test/example_envs/generated_agents_env_v0.py b/pettingzoo/test/example_envs/generated_agents_env_v0.py index 28f11469b..827465382 100644 --- a/pettingzoo/test/example_envs/generated_agents_env_v0.py +++ b/pettingzoo/test/example_envs/generated_agents_env_v0.py @@ -5,7 +5,7 @@ from pettingzoo import AECEnv from pettingzoo.utils import wrappers -from pettingzoo.utils.agent_selector import agent_selector +from pettingzoo.utils.agent_selector import AgentSelector def env(): @@ -99,7 +99,7 @@ def reset(self, seed=None, options=None): for i in range(5): self.add_agent(self.np_random.choice(self.types)) - self._agent_selector = agent_selector(self.agents) + self._agent_selector = AgentSelector(self.agents) self.agent_selection = self._agent_selector.reset() # seed observation and action spaces diff --git a/pettingzoo/utils/__init__.py b/pettingzoo/utils/__init__.py index af9445539..1d16fe76b 100644 --- a/pettingzoo/utils/__init__.py +++ b/pettingzoo/utils/__init__.py @@ -1,4 +1,4 @@ -from pettingzoo.utils.agent_selector import agent_selector +from pettingzoo.utils.agent_selector import AgentSelector from pettingzoo.utils.average_total_reward import average_total_reward from pettingzoo.utils.conversions import ( aec_to_parallel, diff --git a/pettingzoo/utils/agent_selector.py b/pettingzoo/utils/agent_selector.py index 0b6222990..2643b1c9a 100644 --- a/pettingzoo/utils/agent_selector.py +++ b/pettingzoo/utils/agent_selector.py @@ -1,16 +1,17 @@ from __future__ import annotations from typing import Any +from warnings import warn -class agent_selector: +class AgentSelector: """Outputs an agent in the given order whenever agent_select is called. Can reinitialize to a new order. Example: - >>> from pettingzoo.utils import agent_selector - >>> agent_selector = agent_selector(agent_order=["player1", "player2"]) + >>> from pettingzoo.utils import AgentSelector + >>> agent_selector = AgentSelector(agent_order=["player1", "player2"]) >>> agent_selector.reset() 'player1' >>> agent_selector.next() @@ -52,8 +53,8 @@ def is_first(self) -> bool: """Check if the current agent is the first agent in the cycle.""" return self.selected_agent == self.agent_order[0] - def __eq__(self, other: agent_selector) -> bool: - if not isinstance(other, agent_selector): + def __eq__(self, other: AgentSelector) -> bool: + if not isinstance(other, AgentSelector): return NotImplemented return ( @@ -61,3 +62,14 @@ def __eq__(self, other: agent_selector) -> bool: and self._current_agent == other._current_agent and self.selected_agent == other.selected_agent ) + + +class agent_selector(AgentSelector): + """Deprecated version of AgentSelector. Use that instead.""" + + def __init__(self, *args, **kwargs): + warn( + "agent_selector is deprecated, please use AgentSelector", + DeprecationWarning, + ) + super().__init__(*args, **kwargs) diff --git a/pettingzoo/utils/conversions.py b/pettingzoo/utils/conversions.py index 601a1fb06..7cf99f6d9 100644 --- a/pettingzoo/utils/conversions.py +++ b/pettingzoo/utils/conversions.py @@ -4,7 +4,7 @@ from collections import defaultdict from typing import Callable, Dict, Optional -from pettingzoo.utils import agent_selector +from pettingzoo.utils import AgentSelector from pettingzoo.utils.env import ActionType, AECEnv, AgentID, ObsType, ParallelEnv from pettingzoo.utils.wrappers import OrderEnforcingWrapper @@ -309,7 +309,7 @@ def reset(self, seed=None, options=None): self._actions: Dict[AgentID, Optional[ActionType]] = { agent: None for agent in self.agents } - self._agent_selector = agent_selector(self._live_agents) + self._agent_selector = AgentSelector(self._live_agents) self.agent_selection = self._agent_selector.reset() self.terminations = {agent: False for agent in self.agents} self.truncations = {agent: False for agent in self.agents} @@ -377,7 +377,7 @@ def step(self, action: Optional[ActionType]): ] if len(self.env.agents): - self._agent_selector = agent_selector(self.env.agents) + self._agent_selector = AgentSelector(self.env.agents) self.agent_selection = self._agent_selector.reset() self._deads_step_first() diff --git a/pettingzoo/utils/env_logger.py b/pettingzoo/utils/env_logger.py index c5e640e47..bd505e2e3 100644 --- a/pettingzoo/utils/env_logger.py +++ b/pettingzoo/utils/env_logger.py @@ -61,20 +61,6 @@ def warn_action_out_of_bound( f"[WARNING]: Received an action {action} that was outside action space {action_space}. Environment is {backup_policy}" ) - @staticmethod - def warn_close_unrendered_env() -> None: - """Warns: ``[WARNING]: Called close on an unrendered environment.``.""" - EnvLogger._generic_warning( - "[WARNING]: Called close on an unrendered environment." - ) - - @staticmethod - def warn_close_before_reset() -> None: - """Warns: ``[WARNING]: reset() needs to be called before close.``.""" - EnvLogger._generic_warning( - "[WARNING]: reset() needs to be called before close." - ) - @staticmethod def warn_on_illegal_move() -> None: """Warns: ``[WARNING]: Illegal move made, game terminating with current player losing.``.""" diff --git a/pettingzoo/utils/wrappers/order_enforcing.py b/pettingzoo/utils/wrappers/order_enforcing.py index 649c23caa..4a1255682 100644 --- a/pettingzoo/utils/wrappers/order_enforcing.py +++ b/pettingzoo/utils/wrappers/order_enforcing.py @@ -19,11 +19,13 @@ class OrderEnforcingWrapper(BaseWrapper[AgentID, ObsType, ActionType]): """Checks if function calls or attribute access are in a disallowed order. - * error on getting rewards, terminations, truncations, infos, agent_selection before reset - * error on calling step, observe before reset - * error on iterating without stepping or resetting environment. - * warn on calling close before render or reset - * warn on calling step after environment is terminated or truncated + The following are raised: + * AttributeError if any of the following are accessed before reset(): + rewards, terminations, truncations, infos, agent_selection, + num_agents, agents. + * An error if any of the following are called before reset: + render(), step(), observe(), state(), agent_iter() + * A warning if step() is called when there are no agents remaining. """ def __init__(self, env: AECEnv[AgentID, ObsType, ActionType]): @@ -31,37 +33,12 @@ def __init__(self, env: AECEnv[AgentID, ObsType, ActionType]): env, AECEnv ), "OrderEnforcingWrapper is only compatible with AEC environments" self._has_reset = False - self._has_rendered = False self._has_updated = False super().__init__(env) def __getattr__(self, value: str) -> Any: - """Raises an error message when data is gotten from the env. - - Should only be gotten after reset - """ - if value == "unwrapped": - return self.env.unwrapped - elif value == "render_mode" and hasattr(self.env, "render_mode"): - return self.env.render_mode # pyright: ignore[reportGeneralTypeIssues] - elif value == "possible_agents": - try: - return self.env.possible_agents - except AttributeError: - EnvLogger.error_possible_agents_attribute_missing("possible_agents") - elif value == "observation_spaces": - raise AttributeError( - "The base environment does not have an possible_agents attribute. Use the environments `observation_space` method instead" - ) - elif value == "action_spaces": - raise AttributeError( - "The base environment does not have an possible_agents attribute. Use the environments `action_space` method instead" - ) - elif value == "agent_order": - raise AttributeError( - "agent_order has been removed from the API. Please consider using agent_iter instead." - ) - elif ( + """Raises an error if certain data is accessed before reset.""" + if ( value in { "rewards", @@ -75,13 +52,11 @@ def __getattr__(self, value: str) -> Any: and not self._has_reset ): raise AttributeError(f"{value} cannot be accessed before reset") - else: - return super().__getattr__(value) + return super().__getattr__(value) def render(self) -> None | np.ndarray | str | list: if not self._has_reset: EnvLogger.error_render_before_reset() - self._has_rendered = True return super().render() def step(self, action: ActionType) -> None: @@ -90,7 +65,6 @@ def step(self, action: ActionType) -> None: elif not self.agents: self._has_updated = True EnvLogger.warn_step_after_terminated_truncated() - return None else: self._has_updated = True super().step(action) @@ -124,8 +98,7 @@ def __str__(self) -> str: if self.__class__ is OrderEnforcingWrapper else f"{type(self).__name__}<{str(self.env)}>" ) - else: - return repr(self) + return repr(self) class AECOrderEnforcingIterable(AECIterable[AgentID, ObsType, ActionType]): @@ -134,11 +107,16 @@ def __iter__(self) -> AECOrderEnforcingIterator[AgentID, ObsType, ActionType]: class AECOrderEnforcingIterator(AECIterator[AgentID, ObsType, ActionType]): + def __init__( + self, env: OrderEnforcingWrapper[AgentID, ObsType, ActionType], max_iter: int + ): + assert isinstance( + env, OrderEnforcingWrapper + ), "env must be wrapped by OrderEnforcingWrapper" + super().__init__(env, max_iter) + def __next__(self) -> AgentID: agent = super().__next__() - assert hasattr( - self.env, "_has_updated" - ), "env must be wrapped by OrderEnforcingWrapper" assert ( self.env._has_updated # pyright: ignore[reportGeneralTypeIssues] ), "need to call step() or reset() in a loop over `agent_iter`" diff --git a/pettingzoo/utils/wrappers/terminate_illegal.py b/pettingzoo/utils/wrappers/terminate_illegal.py index a49d9a0be..79f95504a 100644 --- a/pettingzoo/utils/wrappers/terminate_illegal.py +++ b/pettingzoo/utils/wrappers/terminate_illegal.py @@ -1,4 +1,3 @@ -# pyright reportGeneralTypeIssues=false from __future__ import annotations from pettingzoo.utils.env import ActionType, AECEnv, AgentID, ObsType @@ -20,6 +19,7 @@ def __init__( self._illegal_value = illegal_reward self._prev_obs = None self._prev_info = None + self._terminated = False # terminated by an illegal move def reset(self, seed: int | None = None, options: dict | None = None) -> None: self._terminated = False @@ -42,7 +42,6 @@ def step(self, action: ActionType) -> None: if self._prev_obs is None: self.observe(self.agent_selection) if isinstance(self._prev_obs, dict): - assert self._prev_obs is not None assert ( "action_mask" in self._prev_obs ), f"`action_mask` not found in dictionary observation: {self._prev_obs}. Action mask must either be in `observation['action_mask']` or `info['action_mask']` to use TerminateIllegalWrapper." @@ -60,7 +59,7 @@ def step(self, action: ActionType) -> None: self.terminations[self.agent_selection] or self.truncations[self.agent_selection] ): - self._was_dead_step(action) # pyright: ignore[reportGeneralTypeIssues] + self.env.unwrapped._was_dead_step(action) elif ( not self.terminations[self.agent_selection] and not self.truncations[self.agent_selection] @@ -70,12 +69,10 @@ def step(self, action: ActionType) -> None: self.env.unwrapped._cumulative_rewards[self.agent_selection] = 0 self.env.unwrapped.terminations = {d: True for d in self.agents} self.env.unwrapped.truncations = {d: True for d in self.agents} - self._prev_obs = None - self._prev_info = None self.env.unwrapped.rewards = {d: 0 for d in self.truncations} self.env.unwrapped.rewards[current_agent] = float(self._illegal_value) - self._accumulate_rewards() - self._deads_step_first() + self.env.unwrapped._accumulate_rewards() + self.env.unwrapped._deads_step_first() self._terminated = True else: super().step(action) diff --git a/pyproject.toml b/pyproject.toml index c0160ab17..73c99ea4c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ classifiers = [ "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", 'Intended Audience :: Science/Research', 'Topic :: Scientific/Engineering :: Artificial Intelligence', ] @@ -31,34 +32,34 @@ dynamic = ["version"] [project.optional-dependencies] # Update dependencies in `all` if any are added or removed -atari = ["multi_agent_ale_py==0.1.11", "pygame==2.3.0"] +atari = ["multi_agent_ale_py>=0.1.11", "pygame>=2.3.0"] classic = [ - "chess==1.9.4", - "rlcard==1.0.5", - "pygame==2.3.0", + "chess>=1.9.4", + "rlcard>=1.0.5", + "pygame>=2.3.0", "shimmy[openspiel]>=1.2.0" ] -butterfly = ["pygame==2.3.0", "pymunk==6.2.0"] -mpe = ["pygame==2.3.0"] -sisl = ["pygame==2.3.0", "pymunk==6.2.0", "box2d-py==2.3.5", "scipy>=1.4.1"] +butterfly = ["pygame>=2.3.0", "pymunk>=6.2.0"] +mpe = ["pygame>=2.3.0"] +sisl = ["pygame>=2.3.0", "pymunk>=6.2.0", "box2d-py>=2.3.5", "scipy>=1.4.1"] other = ["pillow>=8.0.1"] testing = [ - "pynput==1.7.6", - "pytest==8.0.0", - "AutoROM==0.6.1", - "pytest-cov==4.1.0", - "pytest-xdist==3.5.0", - "pre-commit==3.5.0", - "pytest-markdown-docs==0.5.0" + "pynput>=1.7.6", + "pytest>=8.0.0", + "AutoROM>=0.6.1", + "pytest-cov>=4.1.0", + "pytest-xdist>=3.5.0", + "pre-commit>=3.5.0", + "pytest-markdown-docs>=0.5.0" ] all = [ - "multi_agent_ale_py==0.1.11", - "pygame==2.3.0", - "chess==1.9.4", - "rlcard==1.0.5", + "multi_agent_ale_py>=0.1.11", + "pygame>=2.3.0", + "chess>=1.9.4", + "rlcard>=1.0.5", "shimmy[openspiel]>=1.2.0", - "pymunk==6.2.0", - "box2d-py==2.3.5", + "pymunk>=6.2.0", + "box2d-py>=2.3.5", "scipy>=1.4.1", "pillow>=8.0.1", ] diff --git a/test/wrapper_test.py b/test/wrapper_test.py index 650fe328b..a03bd81b3 100644 --- a/test/wrapper_test.py +++ b/test/wrapper_test.py @@ -3,8 +3,13 @@ import pytest from pettingzoo.butterfly import pistonball_v6 -from pettingzoo.classic import texas_holdem_no_limit_v6 -from pettingzoo.utils.wrappers import MultiEpisodeEnv, MultiEpisodeParallelEnv +from pettingzoo.classic import texas_holdem_no_limit_v6, tictactoe_v3 +from pettingzoo.utils.wrappers import ( + BaseWrapper, + MultiEpisodeEnv, + MultiEpisodeParallelEnv, + TerminateIllegalWrapper, +) @pytest.mark.parametrize(("num_episodes"), [1, 2, 3, 4, 5, 6]) @@ -67,3 +72,65 @@ def test_multi_episode_parallel_env_wrapper(num_episodes) -> None: assert ( steps == num_episodes * 125 ), f"Expected to have 125 steps per episode, got {steps / num_episodes}." + + +def _do_game(env: TerminateIllegalWrapper, seed: int) -> None: + """Run a single game with reproducible random moves.""" + assert isinstance( + env, TerminateIllegalWrapper + ), "test_terminate_illegal must use TerminateIllegalWrapper" + env.reset(seed) + for agent in env.agents: + # make the random moves reproducible + env.action_space(agent).seed(seed) + + for agent in env.agent_iter(): + _, _, termination, truncation, _ = env.last() + + if termination or truncation: + env.step(None) + else: + action = env.action_space(agent).sample() + env.step(action) + + +def test_terminate_illegal() -> None: + """Test for a problem with terminate illegal wrapper. + + The problem is that env variables, including agent_selection, are set by + calls from TerminateIllegalWrapper to env functions. However, they are + called by the wrapper object, not the env so they are set in the wrapper + object rather than the base env object. When the code later tries to run, + the values get updated in the env code, but the wrapper pulls it's own + values that shadow them. + + The test here confirms that is fixed. + """ + # not using env() because we need to ensure that the env is + # wrapped by TerminateIllegalWrapper + raw_env = tictactoe_v3.raw_env() + env = TerminateIllegalWrapper(raw_env, illegal_reward=-1) + + _do_game(env, 42) + # bug is triggered by a corrupted state after a game is terminated + # due to an illegal move. So we need to run the game twice to + # see the effect. + _do_game(env, 42) + + # get a list of what all the agent_selection values in the wrapper stack + unwrapped = env + agent_selections = [] + while unwrapped != env.unwrapped: + # the actual value for this wrapper (or None if no value) + agent_selections.append(unwrapped.__dict__.get("agent_selection", None)) + assert isinstance(unwrapped, BaseWrapper) + unwrapped = unwrapped.env + + # last one from the actual env + agent_selections.append(unwrapped.__dict__.get("agent_selection", None)) + + # remove None from agent_selections + agent_selections = [x for x in agent_selections if x is not None] + + # all values must be the same, or else the wrapper and env are mismatched + assert len(set(agent_selections)) == 1, "agent_selection mismatch" diff --git a/tutorials/AgileRL/requirements.txt b/tutorials/AgileRL/requirements.txt index 35b6d42a9..1262ee83c 100644 --- a/tutorials/AgileRL/requirements.txt +++ b/tutorials/AgileRL/requirements.txt @@ -1,4 +1,4 @@ -agilerl==1.0.16; python_version >= '3.10' +agilerl==0.1.22; python_version >= '3.9' pettingzoo[classic,atari,mpe]>=1.23.1 SuperSuit>=3.9.0 torch>=2.0.1 diff --git a/tutorials/CustomEnvironment/tutorial3_action_masking.py b/tutorials/CustomEnvironment/tutorial3_action_masking.py index 24676373f..c0dfe2170 100644 --- a/tutorials/CustomEnvironment/tutorial3_action_masking.py +++ b/tutorials/CustomEnvironment/tutorial3_action_masking.py @@ -193,7 +193,7 @@ def step(self, actions): def render(self): """Renders the environment.""" - grid = np.zeros((7, 7)) + grid = np.zeros((8, 8), dtype=object) grid[self.prisoner_y, self.prisoner_x] = "P" grid[self.guard_y, self.guard_x] = "G" grid[self.escape_y, self.escape_x] = "E" diff --git a/tutorials/SB3/connect_four/requirements.txt b/tutorials/SB3/connect_four/requirements.txt index bf7c59673..e8ed650ab 100644 --- a/tutorials/SB3/connect_four/requirements.txt +++ b/tutorials/SB3/connect_four/requirements.txt @@ -1,3 +1,4 @@ pettingzoo[classic]>=1.24.0 stable-baselines3>=2.0.0 sb3-contrib>=2.0.0 +gymnasium<=0.29.1 diff --git a/tutorials/SB3/connect_four/sb3_connect_four_action_mask.py b/tutorials/SB3/connect_four/sb3_connect_four_action_mask.py index d8d890362..e3dc63d34 100644 --- a/tutorials/SB3/connect_four/sb3_connect_four_action_mask.py +++ b/tutorials/SB3/connect_four/sb3_connect_four_action_mask.py @@ -9,6 +9,7 @@ import os import time +import gymnasium as gym from sb3_contrib import MaskablePPO from sb3_contrib.common.maskable.policies import MaskableActorCriticPolicy from sb3_contrib.common.wrappers import ActionMasker @@ -37,9 +38,23 @@ def reset(self, seed=None, options=None): return self.observe(self.agent_selection), {} def step(self, action): - """Gymnasium-like step function, returning observation, reward, termination, truncation, info.""" + """Gymnasium-like step function, returning observation, reward, termination, truncation, info. + + The observation is for the next agent (used to determine the next action), while the remaining + items are for the agent that just acted (used to understand what just happened). + """ + current_agent = self.agent_selection + super().step(action) - return super().last() + + next_agent = self.agent_selection + return ( + self.observe(next_agent), + self._cumulative_rewards[current_agent], + self.terminations[current_agent], + self.truncations[current_agent], + self.infos[current_agent], + ) def observe(self, agent): """Return only raw observation, removing action mask.""" @@ -160,6 +175,11 @@ def eval_action_mask(env_fn, num_games=100, render_mode=None, **env_kwargs): if __name__ == "__main__": + if gym.__version__ > "0.29.1": + raise ImportError( + f"This script requires gymnasium version 0.29.1 or lower, but you have version {gym.__version__}." + ) + env_fn = connect_four_v3 env_kwargs = {} diff --git a/tutorials/SB3/test/test_sb3_action_mask.py b/tutorials/SB3/test/test_sb3_action_mask.py index 3835af393..2be85b1d8 100644 --- a/tutorials/SB3/test/test_sb3_action_mask.py +++ b/tutorials/SB3/test/test_sb3_action_mask.py @@ -23,14 +23,14 @@ EASY_ENVS = [ gin_rummy_v4, texas_holdem_no_limit_v6, # texas holdem human rendered game ends instantly, but with random actions it works fine - texas_holdem_v4, + tictactoe_v3, + leduc_holdem_v4, ] # More difficult environments which will likely take more training time MEDIUM_ENVS = [ - leduc_holdem_v4, # with 10x as many steps it gets higher total rewards (9 vs -9), 0.52 winrate, and 0.92 vs 0.83 total scores hanabi_v5, # even with 10x as many steps, total score seems to always be tied between the two agents - tictactoe_v3, # even with 10x as many steps, agent still loses every time (most likely an error somewhere) + texas_holdem_v4, # this performs poorly with updates to SB3 wrapper chess_v6, # difficult to train because games take so long, performance varies heavily ] @@ -50,8 +50,7 @@ def test_action_mask_easy(env_fn): env_kwargs = {} - # Leduc Hold`em takes slightly longer to outperform random - steps = 8192 if env_fn != leduc_holdem_v4 else 8192 * 4 + steps = 8192 * 4 # Train a model against itself (takes ~2 minutes on GPU) train_action_mask(env_fn, steps=steps, seed=0, **env_kwargs) @@ -92,7 +91,7 @@ def test_action_mask_medium(env_fn): assert ( winrate < 0.75 - ), "Policy should not perform better than 75% winrate" # 30-40% for leduc, 0% for hanabi, 0% for tic-tac-toe + ), "Policy should not perform better than 75% winrate" # 30-40% for leduc, 0% for hanabi # Watch two games (disabled by default) # eval_action_mask(env_fn, num_games=2, render_mode="human", **env_kwargs) diff --git a/tutorials/Tianshou/requirements.txt b/tutorials/Tianshou/requirements.txt index b7b8d4a47..b92064488 100644 --- a/tutorials/Tianshou/requirements.txt +++ b/tutorials/Tianshou/requirements.txt @@ -1,3 +1,4 @@ -pettingzoo[classic]==1.23.0 -packaging==21.3 +numpy<2.0.0 +pettingzoo[classic]>=1.23.0 +packaging>=21.3 tianshou==0.5.0