Skip to content

Commit

Permalink
Merge pull request #518 from LLNL/abmarl-391-gymnasium-migration
Browse files Browse the repository at this point in the history
Abmarl 391 gymnasium migration
  • Loading branch information
rusu24edward authored Apr 17, 2024
2 parents 2309e73 + 5024ad4 commit 0a41cba
Show file tree
Hide file tree
Showing 56 changed files with 162 additions and 187 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.7", "3.8", "3.9", "3.10"]
python-version: ["3.8", "3.9", "3.10", "3.11"]

steps:
- uses: actions/checkout@v3
Expand Down
6 changes: 3 additions & 3 deletions abmarl/algs/monte_carlo.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# 5.1 Monte Carlo prediction p92.
from gym.spaces import Discrete
from gymnasium.spaces import Discrete
import numpy as np

from abmarl.managers import SimulationManager
Expand All @@ -13,13 +13,13 @@ def generate_episode(sim, policy, horizon=200):
Returns: sequence of state, action, reward.
"""
obs = sim.reset()
obs, _ = sim.reset()
policy.reset() # Reset the policy too so that it knows its the beginning of the episode.
states, actions, rewards = [], [], []
states.append(obs)
for _ in range(horizon):
action = policy.compute_action(obs)
obs, reward, done, _ = sim.step(action)
obs, reward, done, _, _ = sim.step(action)
states.append(obs)
actions.append(action)
rewards.append(reward)
Expand Down
2 changes: 1 addition & 1 deletion abmarl/debug.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def debug(params, episodes=1, steps_per_episode=200, render=False, **kwargs):
trainer = DebugTrainer(
sim=sim.sim,
name=title,
output_dir=params['ray_tune'].get('local_dir')
output_dir=params['ray_tune'].get('storage_path')
)

trainer.train(
Expand Down
2 changes: 1 addition & 1 deletion abmarl/examples/sim/comms_blocking.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

from gym.spaces import Discrete, Dict
from gymnasium.spaces import Discrete, Dict
import numpy as np
from matplotlib import pyplot as plt

Expand Down
2 changes: 1 addition & 1 deletion abmarl/examples/sim/multi_agent_sim.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

from gym.spaces import Discrete, MultiBinary, MultiDiscrete, Tuple, Dict
from gymnasium.spaces import Discrete, MultiBinary, MultiDiscrete, Tuple, Dict

from abmarl.tools import Box
from abmarl.sim import PrincipleAgent, AgentBasedSimulation, is_agent, Agent
Expand Down
2 changes: 1 addition & 1 deletion abmarl/examples/sim/multi_corridor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from enum import IntEnum

from gym.spaces import Discrete, MultiBinary
from gymnasium.spaces import Discrete, MultiBinary
import numpy as np

from abmarl.tools import Box
Expand Down
5 changes: 3 additions & 2 deletions abmarl/external/gym_env_wrapper.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from gym import Env as GymEnv
from gymnasium import Env as GymEnv

from abmarl.sim import is_agent

Expand Down Expand Up @@ -49,7 +49,7 @@ def reset(self, **kwargs):
Return the observation from the single agent.
"""
obs = self.sim.reset(**kwargs)
return obs[self.agent_id]
return obs[self.agent_id], {}

def step(self, action, **kwargs):
"""
Expand All @@ -61,6 +61,7 @@ def step(self, action, **kwargs):
return obs[self.agent_id], \
reward[self.agent_id], \
done[self.agent_id], \
False, \
info[self.agent_id]

def render(self, **kwargs):
Expand Down
2 changes: 1 addition & 1 deletion abmarl/external/open_spiel_env_wrapper.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

from gym.spaces import Discrete
from gymnasium.spaces import Discrete

from abmarl.sim.agent_based_simulation import is_agent
from abmarl.managers import TurnBasedManager, SimulationManager
Expand Down
11 changes: 6 additions & 5 deletions abmarl/external/rllib_multiagentenv_wrapper.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

from gym.spaces import Dict
from gymnasium.spaces import Dict

from abmarl.sim.agent_based_simulation import ActingAgent, ObservingAgent, is_agent

Expand Down Expand Up @@ -48,13 +48,14 @@ def unwrapped(self):
except AttributeError:
return self.sim

def reset(self):
def reset(self, *args, **kwargs):
"""See SimulationManager."""
return self.sim.reset()
return self.sim.reset(), {}

def step(self, actions):
def step(self, actions, *args, **kwargs):
"""See SimulationManager."""
return self.sim.step(actions)
obs, rewards, dones, infos = self.sim.step(actions)
return obs, rewards, dones, {"__all__": False}, infos

def render(self, *args, **kwargs):
"""See SimulationManager."""
Expand Down
2 changes: 1 addition & 1 deletion abmarl/make_runnable.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
params['experiment']['title'], time.strftime('%Y-%m-%d_%H-%M')
)
)
params['ray_tune']['local_dir'] = output_dir
params['ray_tune']['storage_path'] = output_dir
if not os.path.exists(output_dir):
os.makedirs(output_dir)
Expand Down
2 changes: 1 addition & 1 deletion abmarl/policies/policy.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from abc import ABC, abstractmethod

from gym.spaces import Space
from gymnasium.spaces import Space


class Policy(ABC):
Expand Down
2 changes: 1 addition & 1 deletion abmarl/policies/q_table_policy.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

from abc import ABC, abstractmethod

from gym.spaces import Discrete
from gymnasium.spaces import Discrete
import numpy as np

from .policy import Policy
Expand Down
2 changes: 1 addition & 1 deletion abmarl/sim/gridworld/actor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from abc import ABC, abstractmethod

import numpy as np
from gym.spaces import Discrete, MultiDiscrete, Dict
from gymnasium.spaces import Discrete, MultiDiscrete, Dict

from abmarl.tools import Box
from abmarl.sim.gridworld.base import GridWorldBaseComponent
Expand Down
2 changes: 1 addition & 1 deletion abmarl/sim/gridworld/wrapper.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

from abc import abstractmethod

from gym.spaces import Discrete, Dict
from gymnasium.spaces import Discrete, Dict

from abmarl.sim.gridworld.actor import ActorBaseComponent
from abmarl.sim.gridworld.observer import ObserverBaseComponent
Expand Down
2 changes: 1 addition & 1 deletion abmarl/sim/wrappers/communication_wrapper.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from .wrapper import Wrapper

from gym.spaces import Discrete, Dict
from gymnasium.spaces import Discrete, Dict

from abmarl.sim import is_agent

Expand Down
4 changes: 2 additions & 2 deletions abmarl/sim/wrappers/flatten_wrapper.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

from gym.spaces import Discrete, Tuple, Dict, MultiDiscrete, MultiBinary
from gym.spaces import Box as GymBox
from gymnasium.spaces import Discrete, Tuple, Dict, MultiDiscrete, MultiBinary
from gymnasium.spaces import Box as GymBox
import numpy as np

from abmarl.tools import Box
Expand Down
6 changes: 3 additions & 3 deletions abmarl/sim/wrappers/ravel_discrete_wrapper.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import itertools

import numpy as np
from gym.spaces import Discrete, MultiDiscrete, MultiBinary, Dict, Tuple
from gym.spaces import Box as GymBox
from gym.spaces.box import get_inf
from gymnasium.spaces import Discrete, MultiDiscrete, MultiBinary, Dict, Tuple
from gymnasium.spaces import Box as GymBox
from gymnasium.spaces.box import get_inf

from abmarl.sim import is_agent

Expand Down
2 changes: 1 addition & 1 deletion abmarl/sim/wrappers/super_agent_wrapper.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

import warnings

from gym.spaces import Dict, MultiBinary
from gymnasium.spaces import Dict, MultiBinary

from abmarl.sim.agent_based_simulation import is_agent, Agent
from abmarl.sim.wrappers import Wrapper
Expand Down
14 changes: 7 additions & 7 deletions abmarl/stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def _stage_setup(params, full_trained_directory, seed=None, checkpoint=None):
params['ray_tune']['config']['num_envs_per_worker'] = 1
params['ray_tune']['config']['seed'] = seed

checkpoint_dir, checkpoint_value = adu.checkpoint_from_trained_directory(
checkpoint_dir = adu.checkpoint_from_trained_directory(
full_trained_directory, checkpoint
)
print(checkpoint_dir)
Expand All @@ -27,7 +27,7 @@ def _stage_setup(params, full_trained_directory, seed=None, checkpoint=None):
env=params['ray_tune']['config']['env'],
config=params['ray_tune']['config']
)
trainer.restore(os.path.join(checkpoint_dir, 'checkpoint-' + str(checkpoint_value)))
trainer.restore(checkpoint_dir)

# Get the simulation
sim = params['experiment']['sim_creator'](
Expand Down Expand Up @@ -98,14 +98,14 @@ def _multi_get_action(obs, done=None, sim=None, policy_agent_mapping=None, **kwa
for agent_id, agent_obs in obs.items():
if done[agent_id]: continue # Don't get actions for done agents
policy_id = policy_agent_mapping(agent_id)
action = trainer.compute_action(
action = trainer.compute_single_action(
agent_obs, policy_id=policy_id, explore=explore
)
joint_action[agent_id] = action
return joint_action

def _single_get_action(obs, trainer=None, **kwargs):
return trainer.compute_action(obs, explore=explore)
return trainer.compute_single_action(obs, explore=explore)

def _multi_get_done(done):
return done['__all__']
Expand All @@ -115,7 +115,7 @@ def _single_get_done(done):

policy_agent_mapping = None
if isinstance(sim, MultiAgentEnv):
policy_agent_mapping = trainer.config['multiagent']['policy_mapping_fn']
policy_agent_mapping = trainer.config['policy_mapping_fn']
_get_action = _multi_get_action
_get_done = _multi_get_done
else:
Expand All @@ -124,7 +124,7 @@ def _single_get_done(done):

for episode in range(episodes):
print('Episode: {}'.format(episode))
obs = sim.reset()
obs, _ = sim.reset()
done = None
all_done = False
fig = plt.figure()
Expand All @@ -144,7 +144,7 @@ def animate(i):
action = _get_action(
obs, done=done, sim=sim, trainer=trainer, policy_agent_mapping=policy_agent_mapping
)
obs, _, done, _ = sim.step(action)
obs, _, done, _, _ = sim.step(action)
if _get_done(done) or i >= steps_per_episode:
nonlocal all_done
all_done = True
Expand Down
4 changes: 2 additions & 2 deletions abmarl/tools/gym_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from gym.spaces import Space, Discrete, MultiBinary, MultiDiscrete, Dict, Tuple
from gym.spaces import Box as GymBox
from gymnasium.spaces import Space, Discrete, MultiBinary, MultiDiscrete, Dict, Tuple
from gymnasium.spaces import Box as GymBox
import numpy as np


Expand Down
12 changes: 6 additions & 6 deletions abmarl/tools/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def checkpoint_from_trained_directory(full_trained_directory, checkpoint_desired
if checkpoint_desired is not None: # checkpoint specified
for checkpoint in checkpoint_dirs:
if checkpoint_desired == int(checkpoint.split('/')[-1].split('_')[-1]):
return checkpoint, checkpoint_desired
return checkpoint
import warnings
warnings.warn(
f'Could not find checkpoint_{checkpoint_desired}. Attempting to load the last '
Expand All @@ -39,7 +39,7 @@ def checkpoint_from_trained_directory(full_trained_directory, checkpoint_desired

# Load the last checkpoint
max_checkpoint = None
max_checkpoint_value = 0
max_checkpoint_value = -1
for checkpoint in checkpoint_dirs:
checkpoint_value = int(checkpoint.split('/')[-1].split('_')[-1])
if checkpoint_value > max_checkpoint_value:
Expand All @@ -49,7 +49,7 @@ def checkpoint_from_trained_directory(full_trained_directory, checkpoint_desired
if max_checkpoint is None:
raise FileNotFoundError("Did not find a checkpoint file in the given directory.")

return max_checkpoint, max_checkpoint_value
return max_checkpoint


def find_dirs_in_dir(pattern, path):
Expand Down Expand Up @@ -112,11 +112,11 @@ def set_output_directory(params):
import os
import time
title = params['experiment']['title']
base = params['ray_tune'].get('local_dir', os.path.expanduser("~"))
base = params['ray_tune'].get('storage_path', os.path.expanduser("~"))
output_dir = os.path.join(
base, 'abmarl_results/{}_{}'.format(
os.getcwd(), base, 'abmarl_results/{}_{}'.format(
title, time.strftime('%Y-%m-%d_%H-%M')
)
)
params['ray_tune']['local_dir'] = output_dir
params['ray_tune']['storage_path'] = output_dir
return output_dir
2 changes: 1 addition & 1 deletion docs/src/gridworld.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1504,7 +1504,7 @@ force the agent to only attack one encoding per turn, like so:
from abmarl.sim.gridworld.state import PositionState, HealthState
from abmarl.sim.gridworld.actor import EncodingBasedAttackActor
from abmarl.sim.gridworld.wrapper import ExclusiveChannelActionWrapper
from gym.spaces import Dict, Discrete
from gymnasium.spaces import Dict, Discrete
agents = {
'agent0': AttackingAgent(
Expand Down
18 changes: 9 additions & 9 deletions docs/src/overview.rst
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ An agent can be created like so:

.. code-block:: python
from gym.spaces import Discrete
from gymnasium.spaces import Discrete
from abmarl.tools import Box
from abmarl.sim import Agent
agent = Agent(
Expand Down Expand Up @@ -199,7 +199,7 @@ is ravelled to a Discrete space:

.. code-block:: python
from gym.spaces import Dict, MultiBinary, MultiDiscrete, Discrete, Tuple
from gymnasium.spaces import Dict, MultiBinary, MultiDiscrete, Discrete, Tuple
import numpy as np
from abmarl.tools import Box
from abmarl.sim.wrappers.ravel_discrete_wrapper import ravel_space, ravel
Expand Down Expand Up @@ -255,7 +255,7 @@ it will cast up to float. See how the following nested space is flattened:

.. code-block:: python
from gym.spaces import Dict, MultiBinary, MultiDiscrete, Discrete, Tuple
from gymnasium.spaces import Dict, MultiBinary, MultiDiscrete, Discrete, Tuple
import numpy as np
from abmarl.tools import Box
from abmarl.sim.wrappers.flatten_wrapper import flatten_space, flatten
Expand Down Expand Up @@ -507,7 +507,7 @@ simple corridor simulation with multiple agents.
'episodes_total': 2000,
},
'verbose': 2,
'local_dir': 'output_dir',
'storage_path': 'output_dir',
'config': {
# --- simulation ---
'disable_env_checking': False,
Expand Down Expand Up @@ -558,17 +558,17 @@ is the name of our configuration file. This will launch
Abmarl, which will process the file and launch RLlib according to the
specified parameters. This particular example should take 1-10 minutes to
train, depending on your compute capabilities. You can view the performance
in real time in tensorboard with ``tensorboard --logdir <local_dir>/abmarl_results``.
in real time in tensorboard with ``tensorboard --logdir <storage_path>/abmarl_results``.

.. NOTE::

By default, the "base" of the output directory is the home directory, and Abmarl will
create the `abmarl_results` directory there. The base directory can by configured
in the `params` under `ray_tune` using the `local_dir` parameter. This value
can be a full path, like ``'local_dir': '/usr/local/scratch'``, or it can be
a relative path, like ``'local_dir': output_dir``, where the path is relative
in the `params` under `ray_tune` using the `storage_path` parameter. This value
can be a full path, like ``'storage_path': '/usr/local/scratch'``, or it can be
a relative path, like ``'storage_path': output_dir``, where the path is relative
from the directory where Abmarl was launched, not from the configuration file.
If a path is given, the output will be under ``<local_dir>/abmarl_results``.
If a path is given, the output will be under ``<storage_path>/abmarl_results``.


.. _debugging:
Expand Down
4 changes: 2 additions & 2 deletions docs/src/tutorials/gridworld/communication.rst
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ a compatible encoding, and (3) is not blocked.

.. code-block:: python
from gym.spaces import Discrete
from gymnasium.spaces import Discrete
from abmarl.sim.gridworld.actor import ActorBaseComponent
import abmarl.sim.gridworld.utils as gu
Expand Down Expand Up @@ -312,7 +312,7 @@ component, which will have a small impact in how we initialize the simulation.

.. code-block:: python
from gym.spaces import Dict
from gymnasium.spaces import Dict
from abmarl.tools import Box
from abmarl.sim.gridworld.observer import ObserverBaseComponent
Expand Down
Loading

0 comments on commit 0a41cba

Please sign in to comment.