Skip to content

Commit

Permalink
Change TicTacToe from medium to easy in SB3 test.
Browse files Browse the repository at this point in the history
  • Loading branch information
dm-ackerman committed Mar 22, 2024
1 parent 4f35a22 commit 1f95299
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions tutorials/SB3/test/test_sb3_action_mask.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@
gin_rummy_v4,
texas_holdem_no_limit_v6, # texas holdem human rendered game ends instantly, but with random actions it works fine
texas_holdem_v4,
tictactoe_v4,
]

# More difficult environments which will likely take more training time
MEDIUM_ENVS = [
leduc_holdem_v4, # with 10x as many steps it gets higher total rewards (9 vs -9), 0.52 winrate, and 0.92 vs 0.83 total scores
hanabi_v5, # even with 10x as many steps, total score seems to always be tied between the two agents
tictactoe_v4, # even with 10x as many steps, agent still loses every time (most likely an error somewhere)
chess_v6, # difficult to train because games take so long, performance varies heavily
]

Expand All @@ -50,8 +50,10 @@ def test_action_mask_easy(env_fn):

env_kwargs = {}

# Leduc Hold`em takes slightly longer to outperform random
steps = 8192 if env_fn != leduc_holdem_v4 else 8192 * 4
steps = 8192
# These take slightly longer to outperform random
if env_fn in [leduc_holdem_v4, tictactoe_v4]:
steps *= 4

# Train a model against itself (takes ~2 minutes on GPU)
train_action_mask(env_fn, steps=steps, seed=0, **env_kwargs)
Expand Down Expand Up @@ -92,7 +94,7 @@ def test_action_mask_medium(env_fn):

assert (
winrate < 0.75
), "Policy should not perform better than 75% winrate" # 30-40% for leduc, 0% for hanabi, 0% for tic-tac-toe
), "Policy should not perform better than 75% winrate" # 30-40% for leduc, 0% for hanabi

# Watch two games (disabled by default)
# eval_action_mask(env_fn, num_games=2, render_mode="human", **env_kwargs)
Expand Down

0 comments on commit 1f95299

Please sign in to comment.