Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SISL/Multiwalker env MO Port #5

Merged
merged 23 commits into from
Oct 19, 2023
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
pip install pytest
sudo apt-get update
sudo apt-get install libglu1-mesa-dev libgl1-mesa-dev libosmesa6-dev xvfb patchelf ffmpeg cmake swig
pip install pettingzoo
pip install pettingzoo[all]
pip install -e .[all]
- name: Full Python tests
run: |
Expand Down
10 changes: 5 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# See https://pre-commit.com/hooks.html for more hooks
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
rev: v4.5.0
hooks:
- id: check-symlinks
- id: destroyed-symlinks
Expand All @@ -17,13 +17,13 @@ repos:
- id: detect-private-key
- id: debug-statements
- repo: https://github.com/codespell-project/codespell
rev: v2.2.4
rev: v2.2.6
hooks:
- id: codespell
args:
- --ignore-words-list=reacher, mor
- repo: https://github.com/PyCQA/flake8
rev: 6.0.0
rev: 6.1.0
hooks:
- id: flake8
args:
Expand All @@ -34,7 +34,7 @@ repos:
- --show-source
- --statistics
- repo: https://github.com/asottile/pyupgrade
rev: v3.3.1
rev: v3.15.0
hooks:
- id: pyupgrade
args: ["--py37-plus"]
Expand All @@ -43,7 +43,7 @@ repos:
hooks:
- id: isort
- repo: https://github.com/python/black
rev: 23.3.0
rev: 23.9.1
hooks:
- id: black
- repo: https://github.com/pycqa/pydocstyle
Expand Down
5 changes: 5 additions & 0 deletions momadm_benchmarks/envs/multiwalker/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""MO Multiwalker problem.

From Gupta, J. K., Egorov, M., and Kochenderfer, M. (2017). Cooperative multi-agent control using
umutucak marked this conversation as resolved.
Show resolved Hide resolved
deep reinforcement learning. International Conference on Autonomous Agents and Multiagent Systems
"""
5 changes: 5 additions & 0 deletions momadm_benchmarks/envs/multiwalker/momultiwalker_v0.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""Multiwalker domain environment for multi-objective optimization."""
umutucak marked this conversation as resolved.
Show resolved Hide resolved
from momadm_benchmarks.envs.multiwalker.multiwalker import env, parallel_env, raw_env


__all__ = ["env", "parallel_env", "raw_env"]
119 changes: 119 additions & 0 deletions momadm_benchmarks/envs/multiwalker/multiwalker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
"""MO Multiwalker problem.

From Gupta, J. K., Egorov, M., and Kochenderfer, M. (2017). Cooperative multi-agent control using
umutucak marked this conversation as resolved.
Show resolved Hide resolved
deep reinforcement learning. International Conference on Autonomous Agents and Multiagent Systems
"""

from typing_extensions import override

import numpy as np
from pettingzoo.sisl.multiwalker.multiwalker import FPS
from pettingzoo.sisl.multiwalker.multiwalker import raw_env as pz_multiwalker
from pettingzoo.utils import wrappers

from momadm_benchmarks.envs.multiwalker.multiwalker_base import MOMultiWalkerEnv as _env
from momadm_benchmarks.utils.conversions import mo_aec_to_parallel
from momadm_benchmarks.utils.env import MOAECEnv


def env(**kwargs):
"""Returns the env in `AEC` format.

Args:
**kwargs: keyword args to forward to the raw_env function.

Returns:
A fully wrapped AEC env.
"""
env = raw_env(**kwargs)
env = wrappers.ClipOutOfBoundsWrapper(env)
return env


def parallel_env(**kwargs):
"""Returns the env in `parallel` format.

Args:
**kwargs: keyword args to forward to the raw_env function.

Returns:
A fully wrapped parallel env.
"""
env = raw_env(**kwargs)
env = mo_aec_to_parallel(env)
return env


def raw_env(**kwargs):
"""Returns the wrapped env in `AEC` format.
umutucak marked this conversation as resolved.
Show resolved Hide resolved

Args:
**kwargs: keyword args to forward to create the `MOMultiwalker` environment.

Returns:
A fully wrapped env.
"""
env = MOMultiwalker(**kwargs)
return env


class MOMultiwalker(MOAECEnv, pz_multiwalker):
"""Environment for MO Multiwalker problem domain.

The init method takes in environment arguments and should define the following attributes:
- possible_agents
- action_spaces
- observation_spaces
- reward_spaces
These attributes should not be changed after initialization.
"""

metadata = {
"render_modes": ["human", "rgb_array"],
"name": "momultiwalker_v0",
"is_parallelizable": True,
"render_fps": FPS,
}

@override
def __init__(self, *args, **kwargs):
"""Initializes the multiwalker domain.

Keyword arguments:
n_walkers: number of bipedal walkers in environment.
umutucak marked this conversation as resolved.
Show resolved Hide resolved
position_noise: noise applied to agent positional sensor observations.
angle_noise: noise applied to agent rotational sensor observations.
forward_reward: reward applied for an agent standing, scaled by agent's x coordinate.
fall_reward: reward applied when an agent falls down.
shared_reward: whether reward is distributed among all agents or allocated locally.
terminate_reward: reward applied for each fallen walker in environment.
terminate_on_fall: toggles whether agent is done if it falls down.
terrain_length: length of terrain in number of steps.
max_cycles: after max_cycles steps all agents will return done.
"""
super().__init__(*args, **kwargs)
self.env = _env(*args, **kwargs) # override engine
# spaces
self.reward_spaces = dict(zip(self.agents, self.env.reward_space))

def reward_space(self, agent):
"""Returns the reward space for the given agent."""
return self.reward_spaces[agent]

@override
def reset(self, seed=None, options=None):
"""Reset needs to initialize the `agents` attribute and must set up the environment so that render(), and step() can be called without issues.
umutucak marked this conversation as resolved.
Show resolved Hide resolved

Args:
seed
options

Returns:
the observations for each agent
"""
super().reset(seed) # super
umutucak marked this conversation as resolved.
Show resolved Hide resolved
zero_reward = np.zeros(
self.reward_spaces["walker_0"].shape, dtype=np.float32
umutucak marked this conversation as resolved.
Show resolved Hide resolved
) # np.copy() makes different copies of this.
self._cumulative_rewards = dict(zip(self.agents, [zero_reward.copy() for _ in self.agents]))
self.rewards = dict(zip(self.agents, [zero_reward.copy() for _ in self.agents]))
Loading
Loading