Skip to content

Commit

Permalink
Merge pull request #5 from rradules/multiwalker
Browse files Browse the repository at this point in the history
SISL/Multiwalker env MO Port
  • Loading branch information
umutucak authored Oct 19, 2023
2 parents 8eef899 + debe0b4 commit 94ecaeb
Show file tree
Hide file tree
Showing 9 changed files with 337 additions and 8 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
pip install pytest
sudo apt-get update
sudo apt-get install libglu1-mesa-dev libgl1-mesa-dev libosmesa6-dev xvfb patchelf ffmpeg cmake swig
pip install pettingzoo
pip install pettingzoo[all]
pip install -e .[all]
- name: Full Python tests
run: |
Expand Down
10 changes: 5 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# See https://pre-commit.com/hooks.html for more hooks
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
rev: v4.5.0
hooks:
- id: check-symlinks
- id: destroyed-symlinks
Expand All @@ -17,13 +17,13 @@ repos:
- id: detect-private-key
- id: debug-statements
- repo: https://github.com/codespell-project/codespell
rev: v2.2.4
rev: v2.2.6
hooks:
- id: codespell
args:
- --ignore-words-list=reacher, mor
- repo: https://github.com/PyCQA/flake8
rev: 6.0.0
rev: 6.1.0
hooks:
- id: flake8
args:
Expand All @@ -34,7 +34,7 @@ repos:
- --show-source
- --statistics
- repo: https://github.com/asottile/pyupgrade
rev: v3.3.1
rev: v3.15.0
hooks:
- id: pyupgrade
args: ["--py37-plus"]
Expand All @@ -43,7 +43,7 @@ repos:
hooks:
- id: isort
- repo: https://github.com/python/black
rev: 23.3.0
rev: 23.9.1
hooks:
- id: black
- repo: https://github.com/pycqa/pydocstyle
Expand Down
5 changes: 5 additions & 0 deletions momadm_benchmarks/envs/multiwalker/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""Adapted from the Multiwalker problem.
From Gupta, J. K., Egorov, M., and Kochenderfer, M. (2017). Cooperative multi-agent control using
deep reinforcement learning. International Conference on Autonomous Agents and Multiagent Systems
"""
5 changes: 5 additions & 0 deletions momadm_benchmarks/envs/multiwalker/momultiwalker_v0.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""Multiwalker domain environment for MOMARL."""
from momadm_benchmarks.envs.multiwalker.multiwalker import env, parallel_env, raw_env


__all__ = ["env", "parallel_env", "raw_env"]
96 changes: 96 additions & 0 deletions momadm_benchmarks/envs/multiwalker/multiwalker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
"""Adapted form of the Multiwalker problem.
From Gupta, J. K., Egorov, M., and Kochenderfer, M. (2017). Cooperative multi-agent control using
deep reinforcement learning. International Conference on Autonomous Agents and Multiagent Systems
"""

from typing_extensions import override

import numpy as np
from pettingzoo.sisl.multiwalker.multiwalker import FPS
from pettingzoo.sisl.multiwalker.multiwalker import raw_env as pz_multiwalker
from pettingzoo.utils import wrappers

from momadm_benchmarks.envs.multiwalker.multiwalker_base import MOMultiWalkerEnv as _env
from momadm_benchmarks.utils.conversions import mo_aec_to_parallel
from momadm_benchmarks.utils.env import MOAECEnv


def env(**kwargs):
"""Returns the wrapped environment in `AEC` format.
Args:
**kwargs: keyword args to forward to the raw_env function.
Returns:
A fully wrapped AEC env.
"""
env = raw_env(**kwargs)
env = wrappers.ClipOutOfBoundsWrapper(env)
return env


def parallel_env(**kwargs):
"""Returns the wrapped env in `parallel` format.
Args:
**kwargs: keyword args to forward to the raw_env function.
Returns:
A fully wrapped parallel env.
"""
env = raw_env(**kwargs)
env = mo_aec_to_parallel(env)
return env


def raw_env(**kwargs):
"""Returns the environment in `AEC` format.
Args:
**kwargs: keyword args to forward to create the `MOMultiwalker` environment.
Returns:
A raw env.
"""
env = MOMultiwalker(**kwargs)
return env


class MOMultiwalker(MOAECEnv, pz_multiwalker):
"""Environment for MO Multiwalker problem domain.
The init method takes in environment arguments and should define the following attributes:
- possible_agents
- action_spaces
- observation_spaces
- reward_spaces
These attributes should not be changed after initialization.
"""

metadata = {
"render_modes": ["human", "rgb_array"],
"name": "momultiwalker_v0",
"is_parallelizable": True,
"render_fps": FPS,
}

@override
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.env = _env(*args, **kwargs) # override engine
# spaces
self.reward_spaces = dict(zip(self.agents, self.env.reward_space))

def reward_space(self, agent):
"""Returns the reward space for the given agent."""
return self.reward_spaces[agent]

@override
def reset(self, seed=None, options=None):
super().reset(seed, options) # super
zero_reward = np.zeros(
self.reward_spaces[self.agents[0]].shape, dtype=np.float32
) # np.copy() makes different copies of this.
self._cumulative_rewards = dict(zip(self.agents, [zero_reward.copy() for _ in self.agents]))
self.rewards = dict(zip(self.agents, [zero_reward.copy() for _ in self.agents]))
221 changes: 221 additions & 0 deletions momadm_benchmarks/envs/multiwalker/multiwalker_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
"""Adapted from the Multiwalker problem.
From Gupta, J. K., Egorov, M., and Kochenderfer, M. (2017). Cooperative multi-agent control using
deep reinforcement learning. International Conference on Autonomous Agents and Multiagent Systems
"""

from typing_extensions import override

import numpy as np
from gymnasium import spaces
from pettingzoo.sisl.multiwalker.multiwalker_base import (
FPS,
LEG_H,
SCALE,
TERRAIN_GRASS,
TERRAIN_HEIGHT,
TERRAIN_LENGTH,
TERRAIN_STARTPAD,
TERRAIN_STEP,
VIEWPORT_W,
WALKER_SEPERATION,
)
from pettingzoo.sisl.multiwalker.multiwalker_base import (
BipedalWalker as pz_bipedalwalker,
)
from pettingzoo.sisl.multiwalker.multiwalker_base import (
MultiWalkerEnv as pz_multiwalker_base,
)


class MOBipedalWalker(pz_bipedalwalker):
"""Walker Object with the physics implemented."""

@override
def __init__(
self,
world,
forward_reward,
fall_reward,
terminate_reward,
init_x=TERRAIN_STEP * TERRAIN_STARTPAD / 2,
init_y=TERRAIN_HEIGHT + 2 * LEG_H,
n_walkers=2,
seed=None,
terrain_length=TERRAIN_LENGTH,
terrain_step=TERRAIN_STEP,
):
super().__init__(world, init_x, init_y, n_walkers, seed)
self.forward_reward = forward_reward
self.fall_reward = fall_reward
self.terminate_reward = terminate_reward
self.terrain_length = terrain_length
self.terrain_step = terrain_step

@property
def reward_space(self):
"""Reward space shape = 3 element 1D array, each element representing 1 objective.
1. package moving forward.
2. no walkers falling.
3. package not falling.
"""
return spaces.Box(
low=np.array(
[-(self.terrain_step * self.forward_reward), self.fall_reward + self.terminate_reward, self.terminate_reward]
),
high=np.array([self.terrain_step * self.forward_reward, 0, 0]),
shape=(3,),
dtype=np.float32,
)


class MOMultiWalkerEnv(pz_multiwalker_base):
"""Multiwalker problem domain environment engine.
Deals with the simulation of the environment.
"""

@override
def __init__(
self,
n_walkers=3,
position_noise=1e-3,
angle_noise=1e-3,
forward_reward=1.0,
terminate_reward=-100.0,
fall_reward=-10.0,
shared_reward=True,
terminate_on_fall=True,
remove_on_fall=True,
terrain_length=TERRAIN_LENGTH,
max_cycles=500,
render_mode=None,
):
super().__init__(
n_walkers=n_walkers,
position_noise=position_noise,
angle_noise=angle_noise,
forward_reward=forward_reward,
terminate_reward=terminate_reward,
fall_reward=fall_reward,
shared_reward=shared_reward,
terminate_on_fall=terminate_on_fall,
remove_on_fall=remove_on_fall,
terrain_length=terrain_length,
max_cycles=max_cycles,
render_mode=render_mode,
)
self.setup()
self.last_rewards = [np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)]

@override
def setup(self):
"""Continuation of the `__init__`."""
super().setup()
init_y = TERRAIN_HEIGHT + 2 * LEG_H
self.walkers = [
MOBipedalWalker(
self.world,
self.forward_reward,
self.fall_reward,
self.terminate_reward,
init_x=sx,
init_y=init_y,
seed=self.seed_val,
)
for sx in self.start_x
]
self.reward_space = [agent.reward_space for agent in self.walkers]

@override
def reset(self):
obs = super().reset()
self.last_rewards = [np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)]
return obs

@override
def step(self, action, agent_id, is_last):
# action is array of size 4
action = action.reshape(4)
assert self.walkers[agent_id].hull is not None, agent_id
self.walkers[agent_id].apply_action(action)
if is_last:
self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
rewards, done, mod_obs = self.scroll_subroutine()
self.last_obs = mod_obs
global_reward = np.mean(rewards, axis=0) # modified shared MO rewards
local_reward = rewards * self.local_ratio
self.last_rewards = global_reward * (1.0 - self.local_ratio) + local_reward * self.local_ratio
self.last_dones = done
self.frames = self.frames + 1

if self.render_mode == "human":
self.render()

@override
def scroll_subroutine(self):
"""This is the step engine of the environment.
Here we have vectorized the reward math from the PettingZoo env to be multi-objective.
"""
xpos = np.zeros(self.n_walkers)
obs = []
done = False
rewards = np.array([np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)])

for i in range(self.n_walkers):
if self.walkers[i].hull is None:
obs.append(np.zeros_like(self.observation_space[i].low))
continue
pos = self.walkers[i].hull.position
x, y = pos.x, pos.y
xpos[i] = x

walker_obs = self.walkers[i].get_observation()
neighbor_obs = []
for j in [i - 1, i + 1]:
# if no neighbor (for edge walkers)
if j < 0 or j == self.n_walkers or self.walkers[j].hull is None:
neighbor_obs.append(0.0)
neighbor_obs.append(0.0)
else:
xm = (self.walkers[j].hull.position.x - x) / self.package_length
ym = (self.walkers[j].hull.position.y - y) / self.package_length
neighbor_obs.append(self.np_random.normal(xm, self.position_noise))
neighbor_obs.append(self.np_random.normal(ym, self.position_noise))
xd = (self.package.position.x - x) / self.package_length
yd = (self.package.position.y - y) / self.package_length
neighbor_obs.append(self.np_random.normal(xd, self.position_noise))
neighbor_obs.append(self.np_random.normal(yd, self.position_noise))
neighbor_obs.append(self.np_random.normal(self.package.angle, self.angle_noise))
obs.append(np.array(walker_obs + neighbor_obs))

# Below this point is the MO reward computation. Above this point is the original PZ code.
package_shaping = self.forward_reward * self.package.position.x
rewards[:, 0] = package_shaping - self.prev_package_shaping # obj1: move forward
self.prev_package_shaping = package_shaping

self.scroll = xpos.mean() - VIEWPORT_W / SCALE / 5 - (self.n_walkers - 1) * WALKER_SEPERATION * TERRAIN_STEP

done = [False] * self.n_walkers
for i, (fallen, walker) in enumerate(zip(self.fallen_walkers, self.walkers)):
if fallen: # obj2: agent does not fall
rewards[i, 1] = self.fall_reward # not all, only the one that fell
if self.remove_on_fall:
walker._destroy()
if self.terminate_on_fall:
rewards[:, 1] += self.terminate_reward
done[i] = True

if self.terminate_on_fall and np.sum(self.fallen_walkers) > 0: # terminate_on_fall global termination
done = [True] * self.n_walkers

if self.game_over or self.package.position.x < 0: # obj3: package doesn't fall
done = [True] * self.n_walkers
rewards[:, 2] = self.terminate_reward

elif self.package.position.x > (self.terrain_length - TERRAIN_GRASS) * TERRAIN_STEP:
done = [True] * self.n_walkers

return rewards, done, obs
2 changes: 1 addition & 1 deletion momadm_benchmarks/test/api_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def play_test(env, observation_0, num_cycles):
}
for agent in env.agent_iter(env.num_agents * num_cycles):
generated_agents.add(agent)
assert agent not in has_finished, "agents cannot resurect! Generate a new agent with a new name."
assert agent not in has_finished, "agents cannot resurrect! Generate a new agent with a new name."
assert isinstance(env.infos[agent], dict), "an environment agent's info must be a dictionary"
prev_observe, reward, terminated, truncated, info = env.last()
if terminated or truncated:
Expand Down
Loading

0 comments on commit 94ecaeb

Please sign in to comment.