Skip to content

Commit

Permalink
review fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
umutucak committed Oct 17, 2023
1 parent 95a9da0 commit c7e7637
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 66 deletions.
2 changes: 1 addition & 1 deletion momadm_benchmarks/envs/multiwalker/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""MO Multiwalker problem.
"""Adapted from the Multiwalker problem.
From Gupta, J. K., Egorov, M., and Kochenderfer, M. (2017). Cooperative multi-agent control using
deep reinforcement learning. International Conference on Autonomous Agents and Multiagent Systems
Expand Down
2 changes: 1 addition & 1 deletion momadm_benchmarks/envs/multiwalker/momultiwalker_v0.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Multiwalker domain environment for multi-objective optimization."""
"""Multiwalker domain environment for MOMARL."""
from momadm_benchmarks.envs.multiwalker.multiwalker import env, parallel_env, raw_env


Expand Down
33 changes: 5 additions & 28 deletions momadm_benchmarks/envs/multiwalker/multiwalker.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""MO Multiwalker problem.
"""Adapted form of the Multiwalker problem.
From Gupta, J. K., Egorov, M., and Kochenderfer, M. (2017). Cooperative multi-agent control using
deep reinforcement learning. International Conference on Autonomous Agents and Multiagent Systems
Expand All @@ -17,7 +17,7 @@


def env(**kwargs):
"""Returns the env in `AEC` format.
"""Returns the wrapped environment in `AEC` format.
Args:
**kwargs: keyword args to forward to the raw_env function.
Expand All @@ -31,7 +31,7 @@ def env(**kwargs):


def parallel_env(**kwargs):
"""Returns the env in `parallel` format.
"""Returns the wrapped env in `parallel` format.
Args:
**kwargs: keyword args to forward to the raw_env function.
Expand All @@ -45,13 +45,13 @@ def parallel_env(**kwargs):


def raw_env(**kwargs):
"""Returns the wrapped env in `AEC` format.
"""Returns the environment in `AEC` format.
Args:
**kwargs: keyword args to forward to create the `MOMultiwalker` environment.
Returns:
A fully wrapped env.
A raw env.
"""
env = MOMultiwalker(**kwargs)
return env
Expand All @@ -77,20 +77,6 @@ class MOMultiwalker(MOAECEnv, pz_multiwalker):

@override
def __init__(self, *args, **kwargs):
"""Initializes the multiwalker domain.
Keyword arguments:
n_walkers: number of bipedal walkers in environment.
position_noise: noise applied to agent positional sensor observations.
angle_noise: noise applied to agent rotational sensor observations.
forward_reward: reward applied for an agent standing, scaled by agent's x coordinate.
fall_reward: reward applied when an agent falls down.
shared_reward: whether reward is distributed among all agents or allocated locally.
terminate_reward: reward applied for each fallen walker in environment.
terminate_on_fall: toggles whether agent is done if it falls down.
terrain_length: length of terrain in number of steps.
max_cycles: after max_cycles steps all agents will return done.
"""
super().__init__(*args, **kwargs)
self.env = _env(*args, **kwargs) # override engine
# spaces
Expand All @@ -102,15 +88,6 @@ def reward_space(self, agent):

@override
def reset(self, seed=None, options=None):
"""Reset needs to initialize the `agents` attribute and must set up the environment so that render(), and step() can be called without issues.
Args:
seed
options
Returns:
the observations for each agent
"""
super().reset(seed) # super
zero_reward = np.zeros(
self.reward_spaces["walker_0"].shape, dtype=np.float32
Expand Down
39 changes: 3 additions & 36 deletions momadm_benchmarks/envs/multiwalker/multiwalker_base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""MO Multiwalker problem.
"""Adapted from the Multiwalker problem.
From Gupta, J. K., Egorov, M., and Kochenderfer, M. (2017). Cooperative multi-agent control using
deep reinforcement learning. International Conference on Autonomous Agents and Multiagent Systems
Expand Down Expand Up @@ -53,6 +53,7 @@ class MOMultiWalkerEnv(pz_multiwalker_base):
Deals with the simulation of the environment.
"""

@override
def __init__(
self,
n_walkers=3,
Expand All @@ -68,20 +69,6 @@ def __init__(
max_cycles=500,
render_mode=None,
):
"""Initializes the `MOMultiWalkerEnv` class.
Keyword Arguments:
n_walkers: number of bipedal walkers in environment.
position_noise: noise applied to agent positional sensor observations.
angle_noise: noise applied to agent rotational sensor observations.
forward_reward: reward applied for an agent standing, scaled by agent's x coordinate.
fall_reward: reward applied when an agent falls down.
shared_reward: whether reward is distributed among all agents or allocated locally.
terminate_reward: reward applied for each fallen walker in environment.
terminate_on_fall: toggles whether agent is done if it falls down.
terrain_length: length of terrain in number of steps.
max_cycles: after max_cycles steps all agents will return done.
"""
super().__init__(
n_walkers=3,
position_noise=1e-3,
Expand All @@ -99,14 +86,6 @@ def __init__(
self.setup()
self.last_rewards = [np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)]

def _share_rewards(self, rewards):
shared_rewards = np.empty((3,))
# print(rewards)
for i in range(len(rewards)):
avg_reward = rewards[:][i].mean() # numpy magic: mean of first elements of all nested arrays
shared_rewards[i] = avg_reward
return shared_rewards

@override
def setup(self):
"""Continuation of the `__init__`."""
Expand All @@ -117,10 +96,6 @@ def setup(self):

@override
def reset(self):
"""Reset needs to initialize the `agents` attribute and must set up the environment so that render(), and step() can be called without issues.
Returns the observations for each agent.
"""
obs = super().reset()
self.last_rewards = [np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)]
return obs
Expand All @@ -135,14 +110,9 @@ def step(self, action, agent_id, is_last):
if is_last:
self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
rewards, done, mod_obs = self.scroll_subroutine()
# print("step:", agent_id, rewards)
# print("reward type:", type(rewards))
self.last_obs = mod_obs
global_reward = self._share_rewards(rewards) # modified shared MO rewards
global_reward = np.mean(rewards, axis=0) # modified shared MO rewards
local_reward = rewards * self.local_ratio
# print("global_reward:", global_reward)
# print("local ratio:", self.local_ratio)
# print("local reward", local_reward)
self.last_rewards = global_reward * (1.0 - self.local_ratio) + local_reward * self.local_ratio
self.last_dones = done
self.frames = self.frames + 1
Expand All @@ -160,7 +130,6 @@ def scroll_subroutine(self):
obs = []
done = False
rewards = np.array([np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)])
# print("sub type:", type(rewards))

for i in range(self.n_walkers):
if self.walkers[i].hull is None:
Expand Down Expand Up @@ -215,6 +184,4 @@ def scroll_subroutine(self):
elif self.package.position.x > (self.terrain_length - TERRAIN_GRASS) * TERRAIN_STEP:
done = [True] * self.n_walkers

# print("subroutine:", rewards)
# print("sub type:", type(rewards))
return rewards, done, obs

0 comments on commit c7e7637

Please sign in to comment.