Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SISL/Multiwalker env MO Port #5

Merged
merged 23 commits into from
Oct 19, 2023
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
pip install pytest
sudo apt-get update
sudo apt-get install libglu1-mesa-dev libgl1-mesa-dev libosmesa6-dev xvfb patchelf ffmpeg cmake swig
pip install pettingzoo
pip install pettingzoo[all]
pip install -e .[all]
- name: Full Python tests
run: |
Expand Down
10 changes: 5 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# See https://pre-commit.com/hooks.html for more hooks
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
rev: v4.5.0
hooks:
- id: check-symlinks
- id: destroyed-symlinks
Expand All @@ -17,13 +17,13 @@ repos:
- id: detect-private-key
- id: debug-statements
- repo: https://github.com/codespell-project/codespell
rev: v2.2.4
rev: v2.2.6
hooks:
- id: codespell
args:
- --ignore-words-list=reacher, mor
- repo: https://github.com/PyCQA/flake8
rev: 6.0.0
rev: 6.1.0
hooks:
- id: flake8
args:
Expand All @@ -34,7 +34,7 @@ repos:
- --show-source
- --statistics
- repo: https://github.com/asottile/pyupgrade
rev: v3.3.1
rev: v3.15.0
hooks:
- id: pyupgrade
args: ["--py37-plus"]
Expand All @@ -43,7 +43,7 @@ repos:
hooks:
- id: isort
- repo: https://github.com/python/black
rev: 23.3.0
rev: 23.9.1
hooks:
- id: black
- repo: https://github.com/pycqa/pydocstyle
Expand Down
5 changes: 5 additions & 0 deletions momadm_benchmarks/envs/multiwalker/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""Adapted from the Multiwalker problem.

From Gupta, J. K., Egorov, M., and Kochenderfer, M. (2017). Cooperative multi-agent control using
umutucak marked this conversation as resolved.
Show resolved Hide resolved
deep reinforcement learning. International Conference on Autonomous Agents and Multiagent Systems
"""
5 changes: 5 additions & 0 deletions momadm_benchmarks/envs/multiwalker/momultiwalker_v0.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""Multiwalker domain environment for MOMARL."""
from momadm_benchmarks.envs.multiwalker.multiwalker import env, parallel_env, raw_env


__all__ = ["env", "parallel_env", "raw_env"]
96 changes: 96 additions & 0 deletions momadm_benchmarks/envs/multiwalker/multiwalker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
"""Adapted form of the Multiwalker problem.

From Gupta, J. K., Egorov, M., and Kochenderfer, M. (2017). Cooperative multi-agent control using
umutucak marked this conversation as resolved.
Show resolved Hide resolved
deep reinforcement learning. International Conference on Autonomous Agents and Multiagent Systems
"""

from typing_extensions import override

import numpy as np
from pettingzoo.sisl.multiwalker.multiwalker import FPS
from pettingzoo.sisl.multiwalker.multiwalker import raw_env as pz_multiwalker
from pettingzoo.utils import wrappers

from momadm_benchmarks.envs.multiwalker.multiwalker_base import MOMultiWalkerEnv as _env
from momadm_benchmarks.utils.conversions import mo_aec_to_parallel
from momadm_benchmarks.utils.env import MOAECEnv


def env(**kwargs):
"""Returns the wrapped environment in `AEC` format.

Args:
**kwargs: keyword args to forward to the raw_env function.

Returns:
A fully wrapped AEC env.
"""
env = raw_env(**kwargs)
env = wrappers.ClipOutOfBoundsWrapper(env)
return env


def parallel_env(**kwargs):
"""Returns the wrapped env in `parallel` format.

Args:
**kwargs: keyword args to forward to the raw_env function.

Returns:
A fully wrapped parallel env.
"""
env = raw_env(**kwargs)
env = mo_aec_to_parallel(env)
return env


def raw_env(**kwargs):
"""Returns the environment in `AEC` format.

Args:
**kwargs: keyword args to forward to create the `MOMultiwalker` environment.

Returns:
A raw env.
"""
env = MOMultiwalker(**kwargs)
return env


class MOMultiwalker(MOAECEnv, pz_multiwalker):
"""Environment for MO Multiwalker problem domain.

The init method takes in environment arguments and should define the following attributes:
- possible_agents
- action_spaces
- observation_spaces
- reward_spaces
These attributes should not be changed after initialization.
"""

metadata = {
"render_modes": ["human", "rgb_array"],
"name": "momultiwalker_v0",
"is_parallelizable": True,
"render_fps": FPS,
}

@override
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.env = _env(*args, **kwargs) # override engine
# spaces
self.reward_spaces = dict(zip(self.agents, self.env.reward_space))

def reward_space(self, agent):
"""Returns the reward space for the given agent."""
return self.reward_spaces[agent]

@override
def reset(self, seed=None, options=None):
super().reset(seed, options) # super
zero_reward = np.zeros(
self.reward_spaces[self.agents[0]].shape, dtype=np.float32
) # np.copy() makes different copies of this.
self._cumulative_rewards = dict(zip(self.agents, [zero_reward.copy() for _ in self.agents]))
self.rewards = dict(zip(self.agents, [zero_reward.copy() for _ in self.agents]))
221 changes: 221 additions & 0 deletions momadm_benchmarks/envs/multiwalker/multiwalker_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
"""Adapted from the Multiwalker problem.

From Gupta, J. K., Egorov, M., and Kochenderfer, M. (2017). Cooperative multi-agent control using
umutucak marked this conversation as resolved.
Show resolved Hide resolved
deep reinforcement learning. International Conference on Autonomous Agents and Multiagent Systems
"""

from typing_extensions import override

import numpy as np
from gymnasium import spaces
from pettingzoo.sisl.multiwalker.multiwalker_base import (
FPS,
LEG_H,
SCALE,
TERRAIN_GRASS,
TERRAIN_HEIGHT,
TERRAIN_LENGTH,
TERRAIN_STARTPAD,
TERRAIN_STEP,
VIEWPORT_W,
WALKER_SEPERATION,
)
from pettingzoo.sisl.multiwalker.multiwalker_base import (
BipedalWalker as pz_bipedalwalker,
)
from pettingzoo.sisl.multiwalker.multiwalker_base import (
MultiWalkerEnv as pz_multiwalker_base,
)


class MOBipedalWalker(pz_bipedalwalker):
"""Walker Object with the physics implemented."""

@override
def __init__(
self,
world,
forward_reward,
fall_reward,
terminate_reward,
init_x=TERRAIN_STEP * TERRAIN_STARTPAD / 2,
init_y=TERRAIN_HEIGHT + 2 * LEG_H,
n_walkers=2,
seed=None,
terrain_length=TERRAIN_LENGTH,
terrain_step=TERRAIN_STEP,
):
super().__init__(world, init_x, init_y, n_walkers, seed)
self.forward_reward = forward_reward
self.fall_reward = fall_reward
self.terminate_reward = terminate_reward
self.terrain_length = terrain_length
self.terrain_step = terrain_step

@property
def reward_space(self):
"""Reward space shape = 3 element 1D array, each element representing 1 objective.

1. package moving forward.
2. no walkers falling.
3. package not falling.
"""
return spaces.Box(
low=np.array([-(self.terrain_step * self.forward_reward), self.fall_reward, self.terminate_reward]),
high=np.array([self.terrain_step * self.forward_reward, 0, 0]),
shape=(3,),
dtype=np.float32,
)


class MOMultiWalkerEnv(pz_multiwalker_base):
"""Multiwalker problem domain environment engine.

Deals with the simulation of the environment.
"""

@override
def __init__(
self,
n_walkers=3,
position_noise=1e-3,
angle_noise=1e-3,
forward_reward=1.0,
terminate_reward=-100.0,
fall_reward=-10.0,
shared_reward=True,
terminate_on_fall=True,
remove_on_fall=True,
terrain_length=TERRAIN_LENGTH,
max_cycles=500,
render_mode=None,
):
super().__init__(
n_walkers=3,
umutucak marked this conversation as resolved.
Show resolved Hide resolved
position_noise=1e-3,
angle_noise=1e-3,
forward_reward=1.0,
terminate_reward=-100.0,
fall_reward=-10.0,
shared_reward=True,
terminate_on_fall=True,
remove_on_fall=True,
terrain_length=TERRAIN_LENGTH,
max_cycles=500,
render_mode=render_mode,
)
self.setup()
self.last_rewards = [np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)]

@override
def setup(self):
"""Continuation of the `__init__`."""
super().setup()
init_y = TERRAIN_HEIGHT + 2 * LEG_H
self.walkers = [
MOBipedalWalker(
self.world,
self.forward_reward,
self.fall_reward,
self.terminate_reward,
init_x=sx,
init_y=init_y,
seed=self.seed_val,
)
for sx in self.start_x
]
self.reward_space = [agent.reward_space for agent in self.walkers]

@override
def reset(self):
obs = super().reset()
self.last_rewards = [np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)]
return obs

@override
def step(self, action, agent_id, is_last):
# action is array of size 4
action = action.reshape(4)
assert self.walkers[agent_id].hull is not None, agent_id
self.walkers[agent_id].apply_action(action)
if is_last:
self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
rewards, done, mod_obs = self.scroll_subroutine()
self.last_obs = mod_obs
global_reward = np.mean(rewards, axis=0) # modified shared MO rewards
local_reward = rewards * self.local_ratio
self.last_rewards = global_reward * (1.0 - self.local_ratio) + local_reward * self.local_ratio
umutucak marked this conversation as resolved.
Show resolved Hide resolved
self.last_dones = done
self.frames = self.frames + 1

if self.render_mode == "human":
self.render()

@override
def scroll_subroutine(self):
"""This is the step engine of the environment.

Here we have vectorized the reward math from the PettingZoo env to be multi-objective.
"""
xpos = np.zeros(self.n_walkers)
obs = []
done = False
rewards = np.array([np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)])

for i in range(self.n_walkers):
if self.walkers[i].hull is None:
obs.append(np.zeros_like(self.observation_space[i].low))
continue
pos = self.walkers[i].hull.position
x, y = pos.x, pos.y
xpos[i] = x

walker_obs = self.walkers[i].get_observation()
neighbor_obs = []
for j in [i - 1, i + 1]:
# if no neighbor (for edge walkers)
if j < 0 or j == self.n_walkers or self.walkers[j].hull is None:
neighbor_obs.append(0.0)
neighbor_obs.append(0.0)
else:
xm = (self.walkers[j].hull.position.x - x) / self.package_length
ym = (self.walkers[j].hull.position.y - y) / self.package_length
neighbor_obs.append(self.np_random.normal(xm, self.position_noise))
neighbor_obs.append(self.np_random.normal(ym, self.position_noise))
xd = (self.package.position.x - x) / self.package_length
yd = (self.package.position.y - y) / self.package_length
neighbor_obs.append(self.np_random.normal(xd, self.position_noise))
neighbor_obs.append(self.np_random.normal(yd, self.position_noise))
neighbor_obs.append(self.np_random.normal(self.package.angle, self.angle_noise))
obs.append(np.array(walker_obs + neighbor_obs))

# Below this point is the MO reward computation. Above this point is the original PZ code.
package_shaping = self.forward_reward * self.package.position.x
print("before:", rewards)
rewards[:][0] = package_shaping - self.prev_package_shaping # move forward
self.prev_package_shaping = package_shaping

self.scroll = xpos.mean() - VIEWPORT_W / SCALE / 5 - (self.n_walkers - 1) * WALKER_SEPERATION * TERRAIN_STEP

done = [False] * self.n_walkers
for i, (fallen, walker) in enumerate(zip(self.fallen_walkers, self.walkers)):
if fallen: # agent does not fall
rewards[i][1] = self.fall_reward # not all, only the one that fell
if self.remove_on_fall:
walker._destroy()
if not self.terminate_on_fall:
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this not be if self.terminate_on_fall? This is for giving everyone the termination penalty on top of the falling penalty if the setting is toggled.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed it to be like this, but if I'm wrong we can revert.

rewards[:][1] = self.terminate_reward
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be rewards[:, 2] for the 3rd objective (pkg not falling)? Which objective should be penalize for terminate_on_fall?

done[i] = True

if self.terminate_on_fall and np.sum(self.fallen_walkers) > 0:
done = [True] * self.n_walkers

if self.game_over or self.package.position.x < 0: # package doesn't fall
umutucak marked this conversation as resolved.
Show resolved Hide resolved
done = [True] * self.n_walkers
rewards[:][2] = self.terminate_reward

elif self.package.position.x > (self.terrain_length - TERRAIN_GRASS) * TERRAIN_STEP:
done = [True] * self.n_walkers

print("after:", rewards)
return rewards, done, obs
2 changes: 1 addition & 1 deletion momadm_benchmarks/test/api_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def play_test(env, observation_0, num_cycles):
}
for agent in env.agent_iter(env.num_agents * num_cycles):
generated_agents.add(agent)
assert agent not in has_finished, "agents cannot resurect! Generate a new agent with a new name."
assert agent not in has_finished, "agents cannot resurrect! Generate a new agent with a new name."
assert isinstance(env.infos[agent], dict), "an environment agent's info must be a dictionary"
prev_observe, reward, terminated, truncated, info = env.last()
if terminated or truncated:
Expand Down
Loading
Loading