Skip to content

Commit

Permalink
ran and fixed precommit
Browse files Browse the repository at this point in the history
  • Loading branch information
umutucak committed Oct 10, 2023
1 parent d370d5c commit 24ee56d
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 47 deletions.
30 changes: 17 additions & 13 deletions momadm_benchmarks/envs/multiwalker/multiwalker.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,41 @@
import numpy as np

from typing_extensions import override

from momadm_benchmarks.utils.env import MOAECEnv

import numpy as np
from pettingzoo.sisl.multiwalker.multiwalker import raw_env as pz_multiwalker
from pettingzoo.utils import wrappers

from momadm_benchmarks.envs.multiwalker.multiwalker_base import MOMultiWalkerEnv as _env
from pettingzoo.utils import wrappers
from momadm_benchmarks.utils.env import MOAECEnv


def env(**kwargs):
env = mo_env(**kwargs)
env = wrappers.ClipOutOfBoundsWrapper(env)
env = wrappers.OrderEnforcingWrapper(env)
return env


class mo_env(MOAECEnv, pz_multiwalker):
@override
def __init__(self, *args, **kwargs):
pz_multiwalker().__init__(self, *args, **kwargs)
self.env = _env(*args, **kwargs) #override engine
#spaces
self.env = _env(*args, **kwargs) # override engine
# spaces
self.reward_spaces = dict(zip(self.agents, self.env.reward_space))

def reward_space(self, agent):
"""Returns the reward space of the agent"""
return self.reward_spaces[agent]

@override
def reset(self, seed=None, options=None):
pz_multiwalker.reset() # super
zero_reward:np.ndarray
"""Second step of the env initialization (optionally with a seed)"""
pz_multiwalker.reset() # super
zero_reward: np.ndarray
for agent in self.agents:
zero_reward = np.zeros(self.reward_space(agent).shape[0], dtype=np.float32)
break
self._cumulative_rewards = dict(zip(self.agents, [zero_reward.copy() for _ in self.agents])) # CHECK check copy https://numpy.org/doc/stable/reference/generated/numpy.copy.html
self.rewards = dict(zip(self.agents, [zero_reward.copy() for _ in self.agents]))
self._cumulative_rewards = dict(
zip(self.agents, [zero_reward.copy() for _ in self.agents])
) # CHECK check copy https://numpy.org/doc/stable/reference/generated/numpy.copy.html
self.rewards = dict(zip(self.agents, [zero_reward.copy() for _ in self.agents]))
70 changes: 36 additions & 34 deletions momadm_benchmarks/envs/multiwalker/multiwalker_base.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,29 @@
from typing_extensions import override
from pettingzoo.sisl.multiwalker.multiwalker_base import TERRAIN_LENGTH, TERRAIN_STEP, TERRAIN_STARTPAD, TERRAIN_GRASS, TERRAIN_HEIGHT, LEG_H, VIEWPORT_W, SCALE, WALKER_SEPERATION

from pettingzoo.sisl.multiwalker.multiwalker_base import MultiWalkerEnv as pz_multiwalker_base
from pettingzoo.sisl.multiwalker.multiwalker_base import BipedalWalker as pz_bipedalwalker

import numpy as np
from gymnasium import spaces
from pettingzoo.sisl.multiwalker.multiwalker_base import (
LEG_H,
SCALE,
TERRAIN_GRASS,
TERRAIN_HEIGHT,
TERRAIN_LENGTH,
TERRAIN_STARTPAD,
TERRAIN_STEP,
VIEWPORT_W,
WALKER_SEPERATION,
)
from pettingzoo.sisl.multiwalker.multiwalker_base import (
BipedalWalker as pz_bipedalwalker,
)
from pettingzoo.sisl.multiwalker.multiwalker_base import (
MultiWalkerEnv as pz_multiwalker_base,
)


class MOBipedalWalker(pz_bipedalwalker):
def __init(self,
world,
init_x=TERRAIN_STEP * TERRAIN_STARTPAD / 2,
init_y=TERRAIN_HEIGHT + 2 * LEG_H,
n_walkers=2,
seed=None
def __init(
self, world, init_x=TERRAIN_STEP * TERRAIN_STARTPAD / 2, init_y=TERRAIN_HEIGHT + 2 * LEG_H, n_walkers=2, seed=None
):
super.__init__(world, init_x, init_y, n_walkers, seed)

Expand All @@ -27,6 +37,7 @@ def reward_space(self):
"""
return spaces.Box(low=-np.inf, high=np.inf, shape=(3,), dtype=np.float32)


class MOMultiWalkerEnv(pz_multiwalker_base):
def __init__(
self,
Expand All @@ -43,7 +54,8 @@ def __init__(
max_cycles=500,
render_mode=None,
):
pz_multiwalker_base.__init__(self,
pz_multiwalker_base.__init__(
self,
n_walkers=3,
position_noise=1e-3,
angle_noise=1e-3,
Expand All @@ -55,24 +67,25 @@ def __init__(
remove_on_fall=True,
terrain_length=TERRAIN_LENGTH,
max_cycles=500,
render_mode=None
render_mode=None,
)
self.setup()
self.last_rewards = [np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)]
self.last_rewards = [np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)]

@override
def setup(self):
super.setup()
self.reward_space = [agent.reward_space for agent in self.walkers]

@override
def reset(self): # TODO is this correct?
def reset(self): # TODO is this correct?
obs = super.reset()
self.last_rewards = [np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)]
return obs

@override
def scroll_subroutine(self):
"""This is the step engine of the environment. Here we have vectorized the reward math from PZ to be MO"""
xpos = np.zeros(self.n_walkers)
obs = []
done = False
Expand Down Expand Up @@ -102,25 +115,19 @@ def scroll_subroutine(self):
yd = (self.package.position.y - y) / self.package_length
neighbor_obs.append(self.np_random.normal(xd, self.position_noise))
neighbor_obs.append(self.np_random.normal(yd, self.position_noise))
neighbor_obs.append(
self.np_random.normal(self.package.angle, self.angle_noise)
)
neighbor_obs.append(self.np_random.normal(self.package.angle, self.angle_noise))
obs.append(np.array(walker_obs + neighbor_obs))

package_shaping = self.forward_reward * 130 * self.package.position.x
for agent in rewards: # move forward
agent[0] += package_shaping - self.prev_package_shaping
for agent in rewards: # move forward
agent[0] += package_shaping - self.prev_package_shaping
self.prev_package_shaping = package_shaping

self.scroll = (
xpos.mean()
- VIEWPORT_W / SCALE / 5
- (self.n_walkers - 1) * WALKER_SEPERATION * TERRAIN_STEP
)
self.scroll = xpos.mean() - VIEWPORT_W / SCALE / 5 - (self.n_walkers - 1) * WALKER_SEPERATION * TERRAIN_STEP

done = [False] * self.n_walkers
for i, (fallen, walker) in enumerate(zip(self.fallen_walkers, self.walkers)):
if fallen: # agent doesnt fall
if fallen: # agent does not fall
for agent in rewards:
agent[1] += self.fall_reward
if self.remove_on_fall:
Expand All @@ -129,18 +136,13 @@ def scroll_subroutine(self):
for agent in rewards:
agent[1] += self.terminate_reward
done[i] = True
if ( # package doesnt fall
(self.terminate_on_fall and np.sum(self.fallen_walkers) > 0)
or self.game_over
or self.package.position.x < 0
if ( # package does not fall
(self.terminate_on_fall and np.sum(self.fallen_walkers) > 0) or self.game_over or self.package.position.x < 0
):
for agent in rewards:
agent[2] += self.terminate_reward
done = [True] * self.n_walkers
elif (
self.package.position.x
> (self.terrain_length - TERRAIN_GRASS) * TERRAIN_STEP
):
elif self.package.position.x > (self.terrain_length - TERRAIN_GRASS) * TERRAIN_STEP:
done = [True] * self.n_walkers

return rewards, done, obs
return rewards, done, obs

0 comments on commit 24ee56d

Please sign in to comment.