From 24ee56dff304f8f94c7f0ad714f77940a9ab41d3 Mon Sep 17 00:00:00 2001 From: umutucak Date: Tue, 10 Oct 2023 12:08:40 +0200 Subject: [PATCH] ran and fixed precommit --- .../envs/multiwalker/multiwalker.py | 30 ++++---- .../envs/multiwalker/multiwalker_base.py | 70 ++++++++++--------- 2 files changed, 53 insertions(+), 47 deletions(-) diff --git a/momadm_benchmarks/envs/multiwalker/multiwalker.py b/momadm_benchmarks/envs/multiwalker/multiwalker.py index 007b5969..d17cd3bc 100644 --- a/momadm_benchmarks/envs/multiwalker/multiwalker.py +++ b/momadm_benchmarks/envs/multiwalker/multiwalker.py @@ -1,13 +1,12 @@ -import numpy as np - from typing_extensions import override -from momadm_benchmarks.utils.env import MOAECEnv - +import numpy as np from pettingzoo.sisl.multiwalker.multiwalker import raw_env as pz_multiwalker +from pettingzoo.utils import wrappers from momadm_benchmarks.envs.multiwalker.multiwalker_base import MOMultiWalkerEnv as _env -from pettingzoo.utils import wrappers +from momadm_benchmarks.utils.env import MOAECEnv + def env(**kwargs): env = mo_env(**kwargs) @@ -15,23 +14,28 @@ def env(**kwargs): env = wrappers.OrderEnforcingWrapper(env) return env + class mo_env(MOAECEnv, pz_multiwalker): @override def __init__(self, *args, **kwargs): pz_multiwalker().__init__(self, *args, **kwargs) - self.env = _env(*args, **kwargs) #override engine - #spaces + self.env = _env(*args, **kwargs) # override engine + # spaces self.reward_spaces = dict(zip(self.agents, self.env.reward_space)) - + def reward_space(self, agent): + """Returns the reward space of the agent""" return self.reward_spaces[agent] - + @override def reset(self, seed=None, options=None): - pz_multiwalker.reset() # super - zero_reward:np.ndarray + """Second step of the env initialization (optionally with a seed)""" + pz_multiwalker.reset() # super + zero_reward: np.ndarray for agent in self.agents: zero_reward = np.zeros(self.reward_space(agent).shape[0], dtype=np.float32) break - self._cumulative_rewards = dict(zip(self.agents, [zero_reward.copy() for _ in self.agents])) # CHECK check copy https://numpy.org/doc/stable/reference/generated/numpy.copy.html - self.rewards = dict(zip(self.agents, [zero_reward.copy() for _ in self.agents])) \ No newline at end of file + self._cumulative_rewards = dict( + zip(self.agents, [zero_reward.copy() for _ in self.agents]) + ) # CHECK check copy https://numpy.org/doc/stable/reference/generated/numpy.copy.html + self.rewards = dict(zip(self.agents, [zero_reward.copy() for _ in self.agents])) diff --git a/momadm_benchmarks/envs/multiwalker/multiwalker_base.py b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py index d560111c..0172c41a 100644 --- a/momadm_benchmarks/envs/multiwalker/multiwalker_base.py +++ b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py @@ -1,19 +1,29 @@ from typing_extensions import override -from pettingzoo.sisl.multiwalker.multiwalker_base import TERRAIN_LENGTH, TERRAIN_STEP, TERRAIN_STARTPAD, TERRAIN_GRASS, TERRAIN_HEIGHT, LEG_H, VIEWPORT_W, SCALE, WALKER_SEPERATION - -from pettingzoo.sisl.multiwalker.multiwalker_base import MultiWalkerEnv as pz_multiwalker_base -from pettingzoo.sisl.multiwalker.multiwalker_base import BipedalWalker as pz_bipedalwalker import numpy as np from gymnasium import spaces +from pettingzoo.sisl.multiwalker.multiwalker_base import ( + LEG_H, + SCALE, + TERRAIN_GRASS, + TERRAIN_HEIGHT, + TERRAIN_LENGTH, + TERRAIN_STARTPAD, + TERRAIN_STEP, + VIEWPORT_W, + WALKER_SEPERATION, +) +from pettingzoo.sisl.multiwalker.multiwalker_base import ( + BipedalWalker as pz_bipedalwalker, +) +from pettingzoo.sisl.multiwalker.multiwalker_base import ( + MultiWalkerEnv as pz_multiwalker_base, +) + class MOBipedalWalker(pz_bipedalwalker): - def __init(self, - world, - init_x=TERRAIN_STEP * TERRAIN_STARTPAD / 2, - init_y=TERRAIN_HEIGHT + 2 * LEG_H, - n_walkers=2, - seed=None + def __init( + self, world, init_x=TERRAIN_STEP * TERRAIN_STARTPAD / 2, init_y=TERRAIN_HEIGHT + 2 * LEG_H, n_walkers=2, seed=None ): super.__init__(world, init_x, init_y, n_walkers, seed) @@ -27,6 +37,7 @@ def reward_space(self): """ return spaces.Box(low=-np.inf, high=np.inf, shape=(3,), dtype=np.float32) + class MOMultiWalkerEnv(pz_multiwalker_base): def __init__( self, @@ -43,7 +54,8 @@ def __init__( max_cycles=500, render_mode=None, ): - pz_multiwalker_base.__init__(self, + pz_multiwalker_base.__init__( + self, n_walkers=3, position_noise=1e-3, angle_noise=1e-3, @@ -55,10 +67,10 @@ def __init__( remove_on_fall=True, terrain_length=TERRAIN_LENGTH, max_cycles=500, - render_mode=None + render_mode=None, ) self.setup() - self.last_rewards = [np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)] + self.last_rewards = [np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)] @override def setup(self): @@ -66,13 +78,14 @@ def setup(self): self.reward_space = [agent.reward_space for agent in self.walkers] @override - def reset(self): # TODO is this correct? + def reset(self): # TODO is this correct? obs = super.reset() self.last_rewards = [np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)] return obs @override def scroll_subroutine(self): + """This is the step engine of the environment. Here we have vectorized the reward math from PZ to be MO""" xpos = np.zeros(self.n_walkers) obs = [] done = False @@ -102,25 +115,19 @@ def scroll_subroutine(self): yd = (self.package.position.y - y) / self.package_length neighbor_obs.append(self.np_random.normal(xd, self.position_noise)) neighbor_obs.append(self.np_random.normal(yd, self.position_noise)) - neighbor_obs.append( - self.np_random.normal(self.package.angle, self.angle_noise) - ) + neighbor_obs.append(self.np_random.normal(self.package.angle, self.angle_noise)) obs.append(np.array(walker_obs + neighbor_obs)) package_shaping = self.forward_reward * 130 * self.package.position.x - for agent in rewards: # move forward - agent[0] += package_shaping - self.prev_package_shaping + for agent in rewards: # move forward + agent[0] += package_shaping - self.prev_package_shaping self.prev_package_shaping = package_shaping - self.scroll = ( - xpos.mean() - - VIEWPORT_W / SCALE / 5 - - (self.n_walkers - 1) * WALKER_SEPERATION * TERRAIN_STEP - ) + self.scroll = xpos.mean() - VIEWPORT_W / SCALE / 5 - (self.n_walkers - 1) * WALKER_SEPERATION * TERRAIN_STEP done = [False] * self.n_walkers for i, (fallen, walker) in enumerate(zip(self.fallen_walkers, self.walkers)): - if fallen: # agent doesnt fall + if fallen: # agent does not fall for agent in rewards: agent[1] += self.fall_reward if self.remove_on_fall: @@ -129,18 +136,13 @@ def scroll_subroutine(self): for agent in rewards: agent[1] += self.terminate_reward done[i] = True - if ( # package doesnt fall - (self.terminate_on_fall and np.sum(self.fallen_walkers) > 0) - or self.game_over - or self.package.position.x < 0 + if ( # package does not fall + (self.terminate_on_fall and np.sum(self.fallen_walkers) > 0) or self.game_over or self.package.position.x < 0 ): for agent in rewards: agent[2] += self.terminate_reward done = [True] * self.n_walkers - elif ( - self.package.position.x - > (self.terrain_length - TERRAIN_GRASS) * TERRAIN_STEP - ): + elif self.package.position.x > (self.terrain_length - TERRAIN_GRASS) * TERRAIN_STEP: done = [True] * self.n_walkers - return rewards, done, obs \ No newline at end of file + return rewards, done, obs