From bc1a81e20bec6820880ba2d3e111460e95f65b17 Mon Sep 17 00:00:00 2001 From: umutucak Date: Mon, 2 Oct 2023 11:40:42 +0200 Subject: [PATCH 01/23] class init for env and engine --- .../envs/multiwalker/__init__.py | 0 .../envs/multiwalker/multiwalker.py | 42 ++++++++++++ .../envs/multiwalker/multiwalker_base.py | 65 +++++++++++++++++++ 3 files changed, 107 insertions(+) create mode 100644 momadm_benchmarks/envs/multiwalker/__init__.py create mode 100644 momadm_benchmarks/envs/multiwalker/multiwalker.py create mode 100644 momadm_benchmarks/envs/multiwalker/multiwalker_base.py diff --git a/momadm_benchmarks/envs/multiwalker/__init__.py b/momadm_benchmarks/envs/multiwalker/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/momadm_benchmarks/envs/multiwalker/multiwalker.py b/momadm_benchmarks/envs/multiwalker/multiwalker.py new file mode 100644 index 00000000..cf346df3 --- /dev/null +++ b/momadm_benchmarks/envs/multiwalker/multiwalker.py @@ -0,0 +1,42 @@ +import numpy as np + +from typing_extensions import override + +from momadm_benchmarks.utils.env import MOAECEnv + +from pettingzoo.sisl.multiwalker.multiwalker import raw_env as pz_multiwalker + +from momadm_benchmarks.envs.multiwalker.multiwalker_base import MOMultiWalkerEnv as _env +from pettingzoo.utils import wrappers + +def env(**kwargs): + env = mo_env(**kwargs) + env = wrappers.ClipOutOfBoundsWrapper(env) + env = wrappers.OrderEnforcingWrapper(env) + return env + +class mo_env(MOAECEnv, pz_multiwalker): + @override + def __init__(self, *args, **kwargs): + pz_multiwalker().__init__(self, *args, **kwargs) + self.env = _env(*args, **kwargs) #override engine + #spaces + self.reward_spaces = dict(zip(self.agents, self.env.reward_space)) + + def reward_space(self, agent): + return self.reward_spaces[agent] + + @override + def reset(self, seed=None, options=None): + pz_multiwalker.reset() # super + zero_reward:np.ndarray + for agent in self.agents: + zero_reward = np.zeros(self.reward_space(agent).shape[0], dtype=np.float32) + break + self._cumulative_rewards = dict(zip(self.agents, [zero_reward.copy() for _ in self.agents])) # CHECK check copy https://numpy.org/doc/stable/reference/generated/numpy.copy.html + self.rewards = dict(zip(self.agents, [zero_reward.copy() for _ in self.agents])) + + # TODO + @override + def step(self, action): + pass \ No newline at end of file diff --git a/momadm_benchmarks/envs/multiwalker/multiwalker_base.py b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py new file mode 100644 index 00000000..5a577898 --- /dev/null +++ b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py @@ -0,0 +1,65 @@ +from typing_extensions import override +from pettingzoo.sisl.multiwalker.multiwalker_base import TERRAIN_LENGTH, TERRAIN_STEP, TERRAIN_STARTPAD, TERRAIN_HEIGHT, LEG_H + +from pettingzoo.sisl.multiwalker.multiwalker_base import MultiWalkerEnv as pz_multiwalker_base +from pettingzoo.sisl.multiwalker.multiwalker_base import BipedalWalker as pz_bipedalwalker + +import numpy as np +from gymnasium import spaces + +class MOBipedalWalker(pz_bipedalwalker): + def __init(self, + world, + init_x=TERRAIN_STEP * TERRAIN_STARTPAD / 2, + init_y=TERRAIN_HEIGHT + 2 * LEG_H, + n_walkers=2, + seed=None + ): + super.__init__(world, init_x, init_y, n_walkers, seed) + + # @property + # def reward_space(self): + # return spaces.Box(low, high, shape, dtype=np.float32) # TODO what is the shape of the reward space + +class MOMultiWalkerEnv(pz_multiwalker_base): + def __init__( + self, + n_walkers=3, + position_noise=1e-3, + angle_noise=1e-3, + forward_reward=1.0, + terminate_reward=-100.0, + fall_reward=-10.0, + shared_reward=True, + terminate_on_fall=True, + remove_on_fall=True, + terrain_length=TERRAIN_LENGTH, + max_cycles=500, + render_mode=None, + ): + pz_multiwalker_base.__init__(self, + n_walkers=3, + position_noise=1e-3, + angle_noise=1e-3, + forward_reward=1.0, + terminate_reward=-100.0, + fall_reward=-10.0, + shared_reward=True, + terminate_on_fall=True, + remove_on_fall=True, + terrain_length=TERRAIN_LENGTH, + max_cycles=500, + render_mode=None + ) + self.setup() + last_rewards = [0 for _ in range(self.n_walkers)] # TODO vectorize the scalar 0 + + @override + def setup(self): + super.setup() + self.reward_space = [agent.reward_space for agent in self.walkers] # TODO implement reward space in MOBipedalWalker + + @override + def reset(self): + super.reset() + # self.lastrewards = [0 for _ in range(self.n_walkers)] # TODO vectorize the scalar 0 value \ No newline at end of file From 1f3b8a4dafbe99a02c7fd54ac98fc3cf281caee1 Mon Sep 17 00:00:00 2001 From: umutucak Date: Mon, 2 Oct 2023 13:34:56 +0200 Subject: [PATCH 02/23] defined reward shape + vectorized rewards WIP --- .../envs/multiwalker/multiwalker_base.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/momadm_benchmarks/envs/multiwalker/multiwalker_base.py b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py index 5a577898..41ea4f74 100644 --- a/momadm_benchmarks/envs/multiwalker/multiwalker_base.py +++ b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py @@ -17,9 +17,9 @@ def __init(self, ): super.__init__(world, init_x, init_y, n_walkers, seed) - # @property - # def reward_space(self): - # return spaces.Box(low, high, shape, dtype=np.float32) # TODO what is the shape of the reward space + @property + def reward_space(self): + return spaces.Box(low=-np.inf, high=np.inf, shape=(3,), dtype=np.float32) class MOMultiWalkerEnv(pz_multiwalker_base): def __init__( @@ -52,14 +52,14 @@ def __init__( render_mode=None ) self.setup() - last_rewards = [0 for _ in range(self.n_walkers)] # TODO vectorize the scalar 0 + self.last_rewards = [np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)] @override def setup(self): super.setup() - self.reward_space = [agent.reward_space for agent in self.walkers] # TODO implement reward space in MOBipedalWalker + self.reward_space = [agent.reward_space for agent in self.walkers] @override def reset(self): super.reset() - # self.lastrewards = [0 for _ in range(self.n_walkers)] # TODO vectorize the scalar 0 value \ No newline at end of file + self.last_rewards = [np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)] \ No newline at end of file From 6bd7727ed6b98518b4aea2300578dcde20a6f529 Mon Sep 17 00:00:00 2001 From: umutucak Date: Mon, 2 Oct 2023 13:39:59 +0200 Subject: [PATCH 03/23] MO reward space doc --- momadm_benchmarks/envs/multiwalker/multiwalker_base.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/momadm_benchmarks/envs/multiwalker/multiwalker_base.py b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py index 41ea4f74..4b1503c4 100644 --- a/momadm_benchmarks/envs/multiwalker/multiwalker_base.py +++ b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py @@ -19,6 +19,12 @@ def __init(self, @property def reward_space(self): + """ + Reward space shape = 3 element 1D array, each element representing 1 objective. + 1. package moving forward + 2. no walkers falling + 3. package not folling + """ return spaces.Box(low=-np.inf, high=np.inf, shape=(3,), dtype=np.float32) class MOMultiWalkerEnv(pz_multiwalker_base): From b2c9bcfcf02d2d174631277c5014b596405145f1 Mon Sep 17 00:00:00 2001 From: umutucak Date: Tue, 10 Oct 2023 10:23:25 +0200 Subject: [PATCH 04/23] removed step() override, port not needed --- momadm_benchmarks/envs/multiwalker/multiwalker.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/momadm_benchmarks/envs/multiwalker/multiwalker.py b/momadm_benchmarks/envs/multiwalker/multiwalker.py index cf346df3..007b5969 100644 --- a/momadm_benchmarks/envs/multiwalker/multiwalker.py +++ b/momadm_benchmarks/envs/multiwalker/multiwalker.py @@ -34,9 +34,4 @@ def reset(self, seed=None, options=None): zero_reward = np.zeros(self.reward_space(agent).shape[0], dtype=np.float32) break self._cumulative_rewards = dict(zip(self.agents, [zero_reward.copy() for _ in self.agents])) # CHECK check copy https://numpy.org/doc/stable/reference/generated/numpy.copy.html - self.rewards = dict(zip(self.agents, [zero_reward.copy() for _ in self.agents])) - - # TODO - @override - def step(self, action): - pass \ No newline at end of file + self.rewards = dict(zip(self.agents, [zero_reward.copy() for _ in self.agents])) \ No newline at end of file From 7ba18648b691150e4b31cdcccbc9ef18121518aa Mon Sep 17 00:00:00 2001 From: umutucak Date: Tue, 10 Oct 2023 10:34:39 +0200 Subject: [PATCH 05/23] base.reset() fix --- momadm_benchmarks/envs/multiwalker/multiwalker_base.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/momadm_benchmarks/envs/multiwalker/multiwalker_base.py b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py index 4b1503c4..96676ecc 100644 --- a/momadm_benchmarks/envs/multiwalker/multiwalker_base.py +++ b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py @@ -66,6 +66,7 @@ def setup(self): self.reward_space = [agent.reward_space for agent in self.walkers] @override - def reset(self): - super.reset() - self.last_rewards = [np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)] \ No newline at end of file + def reset(self): # TODO is this correct? + obs = super.reset() + self.last_rewards = [np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)] + return obs \ No newline at end of file From ca76e94a49c33f94fb96b56ec10b877bb07ed4c2 Mon Sep 17 00:00:00 2001 From: umutucak Date: Tue, 10 Oct 2023 11:43:48 +0200 Subject: [PATCH 06/23] scroll_subroutine default for diff --- .../envs/multiwalker/multiwalker_base.py | 80 ++++++++++++++++++- 1 file changed, 77 insertions(+), 3 deletions(-) diff --git a/momadm_benchmarks/envs/multiwalker/multiwalker_base.py b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py index 96676ecc..01e4e403 100644 --- a/momadm_benchmarks/envs/multiwalker/multiwalker_base.py +++ b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py @@ -1,5 +1,5 @@ from typing_extensions import override -from pettingzoo.sisl.multiwalker.multiwalker_base import TERRAIN_LENGTH, TERRAIN_STEP, TERRAIN_STARTPAD, TERRAIN_HEIGHT, LEG_H +from pettingzoo.sisl.multiwalker.multiwalker_base import TERRAIN_LENGTH, TERRAIN_STEP, TERRAIN_STARTPAD, TERRAIN_GRASS, TERRAIN_HEIGHT, LEG_H, VIEWPORT_W, SCALE, WALKER_SEPERATION from pettingzoo.sisl.multiwalker.multiwalker_base import MultiWalkerEnv as pz_multiwalker_base from pettingzoo.sisl.multiwalker.multiwalker_base import BipedalWalker as pz_bipedalwalker @@ -23,7 +23,7 @@ def reward_space(self): Reward space shape = 3 element 1D array, each element representing 1 objective. 1. package moving forward 2. no walkers falling - 3. package not folling + 3. package not falling """ return spaces.Box(low=-np.inf, high=np.inf, shape=(3,), dtype=np.float32) @@ -69,4 +69,78 @@ def setup(self): def reset(self): # TODO is this correct? obs = super.reset() self.last_rewards = [np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)] - return obs \ No newline at end of file + return obs + + @override + def scroll_subroutine(self): + xpos = np.zeros(self.n_walkers) + obs = [] + done = False + rewards = np.zeros(self.n_walkers) + + for i in range(self.n_walkers): + if self.walkers[i].hull is None: + obs.append(np.zeros_like(self.observation_space[i].low)) + continue + pos = self.walkers[i].hull.position + x, y = pos.x, pos.y + xpos[i] = x + + walker_obs = self.walkers[i].get_observation() + neighbor_obs = [] + for j in [i - 1, i + 1]: + # if no neighbor (for edge walkers) + if j < 0 or j == self.n_walkers or self.walkers[j].hull is None: + neighbor_obs.append(0.0) + neighbor_obs.append(0.0) + else: + xm = (self.walkers[j].hull.position.x - x) / self.package_length + ym = (self.walkers[j].hull.position.y - y) / self.package_length + neighbor_obs.append(self.np_random.normal(xm, self.position_noise)) + neighbor_obs.append(self.np_random.normal(ym, self.position_noise)) + xd = (self.package.position.x - x) / self.package_length + yd = (self.package.position.y - y) / self.package_length + neighbor_obs.append(self.np_random.normal(xd, self.position_noise)) + neighbor_obs.append(self.np_random.normal(yd, self.position_noise)) + neighbor_obs.append( + self.np_random.normal(self.package.angle, self.angle_noise) + ) + obs.append(np.array(walker_obs + neighbor_obs)) + + shaping = -5.0 * abs(walker_obs[0]) + rewards[i] = shaping - self.prev_shaping[i] + self.prev_shaping[i] = shaping + + package_shaping = self.forward_reward * 130 * self.package.position.x / SCALE + rewards += package_shaping - self.prev_package_shaping + self.prev_package_shaping = package_shaping + + self.scroll = ( + xpos.mean() + - VIEWPORT_W / SCALE / 5 + - (self.n_walkers - 1) * WALKER_SEPERATION * TERRAIN_STEP + ) + + done = [False] * self.n_walkers + for i, (fallen, walker) in enumerate(zip(self.fallen_walkers, self.walkers)): + if fallen: + rewards[i] += self.fall_reward + if self.remove_on_fall: + walker._destroy() + if not self.terminate_on_fall: + rewards[i] += self.terminate_reward + done[i] = True + if ( + (self.terminate_on_fall and np.sum(self.fallen_walkers) > 0) + or self.game_over + or self.package.position.x < 0 + ): + rewards += self.terminate_reward + done = [True] * self.n_walkers + elif ( + self.package.position.x + > (self.terrain_length - TERRAIN_GRASS) * TERRAIN_STEP + ): + done = [True] * self.n_walkers + + return rewards, done, obs \ No newline at end of file From d370d5cb9869feb49b8ef739c5ef404defd2e273 Mon Sep 17 00:00:00 2001 From: umutucak Date: Tue, 10 Oct 2023 11:44:57 +0200 Subject: [PATCH 07/23] MO reward backend --- .../envs/multiwalker/multiwalker_base.py | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/momadm_benchmarks/envs/multiwalker/multiwalker_base.py b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py index 01e4e403..d560111c 100644 --- a/momadm_benchmarks/envs/multiwalker/multiwalker_base.py +++ b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py @@ -76,7 +76,7 @@ def scroll_subroutine(self): xpos = np.zeros(self.n_walkers) obs = [] done = False - rewards = np.zeros(self.n_walkers) + rewards = [np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)] for i in range(self.n_walkers): if self.walkers[i].hull is None: @@ -107,12 +107,9 @@ def scroll_subroutine(self): ) obs.append(np.array(walker_obs + neighbor_obs)) - shaping = -5.0 * abs(walker_obs[0]) - rewards[i] = shaping - self.prev_shaping[i] - self.prev_shaping[i] = shaping - - package_shaping = self.forward_reward * 130 * self.package.position.x / SCALE - rewards += package_shaping - self.prev_package_shaping + package_shaping = self.forward_reward * 130 * self.package.position.x + for agent in rewards: # move forward + agent[0] += package_shaping - self.prev_package_shaping self.prev_package_shaping = package_shaping self.scroll = ( @@ -123,19 +120,22 @@ def scroll_subroutine(self): done = [False] * self.n_walkers for i, (fallen, walker) in enumerate(zip(self.fallen_walkers, self.walkers)): - if fallen: - rewards[i] += self.fall_reward + if fallen: # agent doesnt fall + for agent in rewards: + agent[1] += self.fall_reward if self.remove_on_fall: walker._destroy() if not self.terminate_on_fall: - rewards[i] += self.terminate_reward + for agent in rewards: + agent[1] += self.terminate_reward done[i] = True - if ( + if ( # package doesnt fall (self.terminate_on_fall and np.sum(self.fallen_walkers) > 0) or self.game_over or self.package.position.x < 0 ): - rewards += self.terminate_reward + for agent in rewards: + agent[2] += self.terminate_reward done = [True] * self.n_walkers elif ( self.package.position.x @@ -143,4 +143,4 @@ def scroll_subroutine(self): ): done = [True] * self.n_walkers - return rewards, done, obs \ No newline at end of file + return rewards, done, obs \ No newline at end of file From 24ee56dff304f8f94c7f0ad714f77940a9ab41d3 Mon Sep 17 00:00:00 2001 From: umutucak Date: Tue, 10 Oct 2023 12:08:40 +0200 Subject: [PATCH 08/23] ran and fixed precommit --- .../envs/multiwalker/multiwalker.py | 30 ++++---- .../envs/multiwalker/multiwalker_base.py | 70 ++++++++++--------- 2 files changed, 53 insertions(+), 47 deletions(-) diff --git a/momadm_benchmarks/envs/multiwalker/multiwalker.py b/momadm_benchmarks/envs/multiwalker/multiwalker.py index 007b5969..d17cd3bc 100644 --- a/momadm_benchmarks/envs/multiwalker/multiwalker.py +++ b/momadm_benchmarks/envs/multiwalker/multiwalker.py @@ -1,13 +1,12 @@ -import numpy as np - from typing_extensions import override -from momadm_benchmarks.utils.env import MOAECEnv - +import numpy as np from pettingzoo.sisl.multiwalker.multiwalker import raw_env as pz_multiwalker +from pettingzoo.utils import wrappers from momadm_benchmarks.envs.multiwalker.multiwalker_base import MOMultiWalkerEnv as _env -from pettingzoo.utils import wrappers +from momadm_benchmarks.utils.env import MOAECEnv + def env(**kwargs): env = mo_env(**kwargs) @@ -15,23 +14,28 @@ def env(**kwargs): env = wrappers.OrderEnforcingWrapper(env) return env + class mo_env(MOAECEnv, pz_multiwalker): @override def __init__(self, *args, **kwargs): pz_multiwalker().__init__(self, *args, **kwargs) - self.env = _env(*args, **kwargs) #override engine - #spaces + self.env = _env(*args, **kwargs) # override engine + # spaces self.reward_spaces = dict(zip(self.agents, self.env.reward_space)) - + def reward_space(self, agent): + """Returns the reward space of the agent""" return self.reward_spaces[agent] - + @override def reset(self, seed=None, options=None): - pz_multiwalker.reset() # super - zero_reward:np.ndarray + """Second step of the env initialization (optionally with a seed)""" + pz_multiwalker.reset() # super + zero_reward: np.ndarray for agent in self.agents: zero_reward = np.zeros(self.reward_space(agent).shape[0], dtype=np.float32) break - self._cumulative_rewards = dict(zip(self.agents, [zero_reward.copy() for _ in self.agents])) # CHECK check copy https://numpy.org/doc/stable/reference/generated/numpy.copy.html - self.rewards = dict(zip(self.agents, [zero_reward.copy() for _ in self.agents])) \ No newline at end of file + self._cumulative_rewards = dict( + zip(self.agents, [zero_reward.copy() for _ in self.agents]) + ) # CHECK check copy https://numpy.org/doc/stable/reference/generated/numpy.copy.html + self.rewards = dict(zip(self.agents, [zero_reward.copy() for _ in self.agents])) diff --git a/momadm_benchmarks/envs/multiwalker/multiwalker_base.py b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py index d560111c..0172c41a 100644 --- a/momadm_benchmarks/envs/multiwalker/multiwalker_base.py +++ b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py @@ -1,19 +1,29 @@ from typing_extensions import override -from pettingzoo.sisl.multiwalker.multiwalker_base import TERRAIN_LENGTH, TERRAIN_STEP, TERRAIN_STARTPAD, TERRAIN_GRASS, TERRAIN_HEIGHT, LEG_H, VIEWPORT_W, SCALE, WALKER_SEPERATION - -from pettingzoo.sisl.multiwalker.multiwalker_base import MultiWalkerEnv as pz_multiwalker_base -from pettingzoo.sisl.multiwalker.multiwalker_base import BipedalWalker as pz_bipedalwalker import numpy as np from gymnasium import spaces +from pettingzoo.sisl.multiwalker.multiwalker_base import ( + LEG_H, + SCALE, + TERRAIN_GRASS, + TERRAIN_HEIGHT, + TERRAIN_LENGTH, + TERRAIN_STARTPAD, + TERRAIN_STEP, + VIEWPORT_W, + WALKER_SEPERATION, +) +from pettingzoo.sisl.multiwalker.multiwalker_base import ( + BipedalWalker as pz_bipedalwalker, +) +from pettingzoo.sisl.multiwalker.multiwalker_base import ( + MultiWalkerEnv as pz_multiwalker_base, +) + class MOBipedalWalker(pz_bipedalwalker): - def __init(self, - world, - init_x=TERRAIN_STEP * TERRAIN_STARTPAD / 2, - init_y=TERRAIN_HEIGHT + 2 * LEG_H, - n_walkers=2, - seed=None + def __init( + self, world, init_x=TERRAIN_STEP * TERRAIN_STARTPAD / 2, init_y=TERRAIN_HEIGHT + 2 * LEG_H, n_walkers=2, seed=None ): super.__init__(world, init_x, init_y, n_walkers, seed) @@ -27,6 +37,7 @@ def reward_space(self): """ return spaces.Box(low=-np.inf, high=np.inf, shape=(3,), dtype=np.float32) + class MOMultiWalkerEnv(pz_multiwalker_base): def __init__( self, @@ -43,7 +54,8 @@ def __init__( max_cycles=500, render_mode=None, ): - pz_multiwalker_base.__init__(self, + pz_multiwalker_base.__init__( + self, n_walkers=3, position_noise=1e-3, angle_noise=1e-3, @@ -55,10 +67,10 @@ def __init__( remove_on_fall=True, terrain_length=TERRAIN_LENGTH, max_cycles=500, - render_mode=None + render_mode=None, ) self.setup() - self.last_rewards = [np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)] + self.last_rewards = [np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)] @override def setup(self): @@ -66,13 +78,14 @@ def setup(self): self.reward_space = [agent.reward_space for agent in self.walkers] @override - def reset(self): # TODO is this correct? + def reset(self): # TODO is this correct? obs = super.reset() self.last_rewards = [np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)] return obs @override def scroll_subroutine(self): + """This is the step engine of the environment. Here we have vectorized the reward math from PZ to be MO""" xpos = np.zeros(self.n_walkers) obs = [] done = False @@ -102,25 +115,19 @@ def scroll_subroutine(self): yd = (self.package.position.y - y) / self.package_length neighbor_obs.append(self.np_random.normal(xd, self.position_noise)) neighbor_obs.append(self.np_random.normal(yd, self.position_noise)) - neighbor_obs.append( - self.np_random.normal(self.package.angle, self.angle_noise) - ) + neighbor_obs.append(self.np_random.normal(self.package.angle, self.angle_noise)) obs.append(np.array(walker_obs + neighbor_obs)) package_shaping = self.forward_reward * 130 * self.package.position.x - for agent in rewards: # move forward - agent[0] += package_shaping - self.prev_package_shaping + for agent in rewards: # move forward + agent[0] += package_shaping - self.prev_package_shaping self.prev_package_shaping = package_shaping - self.scroll = ( - xpos.mean() - - VIEWPORT_W / SCALE / 5 - - (self.n_walkers - 1) * WALKER_SEPERATION * TERRAIN_STEP - ) + self.scroll = xpos.mean() - VIEWPORT_W / SCALE / 5 - (self.n_walkers - 1) * WALKER_SEPERATION * TERRAIN_STEP done = [False] * self.n_walkers for i, (fallen, walker) in enumerate(zip(self.fallen_walkers, self.walkers)): - if fallen: # agent doesnt fall + if fallen: # agent does not fall for agent in rewards: agent[1] += self.fall_reward if self.remove_on_fall: @@ -129,18 +136,13 @@ def scroll_subroutine(self): for agent in rewards: agent[1] += self.terminate_reward done[i] = True - if ( # package doesnt fall - (self.terminate_on_fall and np.sum(self.fallen_walkers) > 0) - or self.game_over - or self.package.position.x < 0 + if ( # package does not fall + (self.terminate_on_fall and np.sum(self.fallen_walkers) > 0) or self.game_over or self.package.position.x < 0 ): for agent in rewards: agent[2] += self.terminate_reward done = [True] * self.n_walkers - elif ( - self.package.position.x - > (self.terrain_length - TERRAIN_GRASS) * TERRAIN_STEP - ): + elif self.package.position.x > (self.terrain_length - TERRAIN_GRASS) * TERRAIN_STEP: done = [True] * self.n_walkers - return rewards, done, obs \ No newline at end of file + return rewards, done, obs From 96875604ae2b8a511b61f4b541c6f86f7e24e913 Mon Sep 17 00:00:00 2001 From: umutucak Date: Tue, 10 Oct 2023 18:22:04 +0200 Subject: [PATCH 09/23] fixed all precommit error but 1 --- .../envs/multiwalker/__init__.py | 5 ++ .../envs/multiwalker/multiwalker.py | 50 ++++++++++++++++++- .../envs/multiwalker/multiwalker_base.py | 47 ++++++++++++++--- 3 files changed, 94 insertions(+), 8 deletions(-) diff --git a/momadm_benchmarks/envs/multiwalker/__init__.py b/momadm_benchmarks/envs/multiwalker/__init__.py index e69de29b..a15d89df 100644 --- a/momadm_benchmarks/envs/multiwalker/__init__.py +++ b/momadm_benchmarks/envs/multiwalker/__init__.py @@ -0,0 +1,5 @@ +"""MO Multiwalker problem. + +From Gupta, J. K., Egorov, M., and Kochenderfer, M. (2017). Cooperative multi-agent control using +deep reinforcement learning. International Conference on Autonomous Agents and Multiagent Systems +""" diff --git a/momadm_benchmarks/envs/multiwalker/multiwalker.py b/momadm_benchmarks/envs/multiwalker/multiwalker.py index d17cd3bc..5bdcc00c 100644 --- a/momadm_benchmarks/envs/multiwalker/multiwalker.py +++ b/momadm_benchmarks/envs/multiwalker/multiwalker.py @@ -1,3 +1,9 @@ +"""MO Multiwalker problem. + +From Gupta, J. K., Egorov, M., and Kochenderfer, M. (2017). Cooperative multi-agent control using +deep reinforcement learning. International Conference on Autonomous Agents and Multiagent Systems +""" + from typing_extensions import override import numpy as np @@ -9,6 +15,14 @@ def env(**kwargs): + """Autowrapper for the multiwalker domain. + + Args: + **kwargs: keyword args to forward to the raw_env function. + + Returns: + A fully wrapped env. + """ env = mo_env(**kwargs) env = wrappers.ClipOutOfBoundsWrapper(env) env = wrappers.OrderEnforcingWrapper(env) @@ -16,20 +30,52 @@ def env(**kwargs): class mo_env(MOAECEnv, pz_multiwalker): + """Environment for MO Multiwalker problem domain. + + The init method takes in environment arguments and should define the following attributes: + - possible_agents + - action_spaces + - observation_spaces + - reward_spaces + These attributes should not be changed after initialization. + """ + @override def __init__(self, *args, **kwargs): + """Initializes the multiwalker domain. + + Keyword arguments: + n_walkers: number of bipedal walkers in environment. + position_noise: noise applied to agent positional sensor observations. + angle_noise: noise applied to agent rotational sensor observations. + forward_reward: reward applied for an agent standing, scaled by agent's x coordinate. + fall_reward: reward applied when an agent falls down. + shared_reward: whether reward is distributed among all agents or allocated locally. + terminate_reward: reward applied for each fallen walker in environment. + terminate_on_fall: toggles whether agent is done if it falls down. + terrain_length: length of terrain in number of steps. + max_cycles: after max_cycles steps all agents will return done. + """ pz_multiwalker().__init__(self, *args, **kwargs) self.env = _env(*args, **kwargs) # override engine # spaces self.reward_spaces = dict(zip(self.agents, self.env.reward_space)) def reward_space(self, agent): - """Returns the reward space of the agent""" + """Returns the reward space for the given agent.""" return self.reward_spaces[agent] @override def reset(self, seed=None, options=None): - """Second step of the env initialization (optionally with a seed)""" + """Reset needs to initialize the `agents` attribute and must set up the environment so that render(), and step() can be called without issues. + + Args: + seed + options + + Returns: + the observations for each agent + """ pz_multiwalker.reset() # super zero_reward: np.ndarray for agent in self.agents: diff --git a/momadm_benchmarks/envs/multiwalker/multiwalker_base.py b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py index 0172c41a..4a08f371 100644 --- a/momadm_benchmarks/envs/multiwalker/multiwalker_base.py +++ b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py @@ -1,3 +1,9 @@ +"""MO Multiwalker problem. + +From Gupta, J. K., Egorov, M., and Kochenderfer, M. (2017). Cooperative multi-agent control using +deep reinforcement learning. International Conference on Autonomous Agents and Multiagent Systems +""" + from typing_extensions import override import numpy as np @@ -22,6 +28,8 @@ class MOBipedalWalker(pz_bipedalwalker): + """Walker Object with the physics implemented.""" + def __init( self, world, init_x=TERRAIN_STEP * TERRAIN_STARTPAD / 2, init_y=TERRAIN_HEIGHT + 2 * LEG_H, n_walkers=2, seed=None ): @@ -29,16 +37,21 @@ def __init( @property def reward_space(self): - """ - Reward space shape = 3 element 1D array, each element representing 1 objective. - 1. package moving forward - 2. no walkers falling - 3. package not falling + """Reward space shape = 3 element 1D array, each element representing 1 objective. + + 1. package moving forward. + 2. no walkers falling. + 3. package not falling. """ return spaces.Box(low=-np.inf, high=np.inf, shape=(3,), dtype=np.float32) class MOMultiWalkerEnv(pz_multiwalker_base): + """Multiwalker problem domain environment engine. + + Deals with the simulation of the environment. + """ + def __init__( self, n_walkers=3, @@ -54,6 +67,20 @@ def __init__( max_cycles=500, render_mode=None, ): + """Initializes the `MOMultiWalkerEnv` class. + + Keyword Arguments: + n_walkers: number of bipedal walkers in environment. + position_noise: noise applied to agent positional sensor observations. + angle_noise: noise applied to agent rotational sensor observations. + forward_reward: reward applied for an agent standing, scaled by agent's x coordinate. + fall_reward: reward applied when an agent falls down. + shared_reward: whether reward is distributed among all agents or allocated locally. + terminate_reward: reward applied for each fallen walker in environment. + terminate_on_fall: toggles whether agent is done if it falls down. + terrain_length: length of terrain in number of steps. + max_cycles: after max_cycles steps all agents will return done. + """ pz_multiwalker_base.__init__( self, n_walkers=3, @@ -74,18 +101,26 @@ def __init__( @override def setup(self): + """Continuation of the `__init__`.""" super.setup() self.reward_space = [agent.reward_space for agent in self.walkers] @override def reset(self): # TODO is this correct? + """Reset needs to initialize the `agents` attribute and must set up the environment so that render(), and step() can be called without issues. + + Returns the observations for each agent. + """ obs = super.reset() self.last_rewards = [np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)] return obs @override def scroll_subroutine(self): - """This is the step engine of the environment. Here we have vectorized the reward math from PZ to be MO""" + """This is the step engine of the environment. + + Here we have vectorized the reward math from the PettingZoo env to be multi-objective. + """ xpos = np.zeros(self.n_walkers) obs = [] done = False From 9b22950acc27535d028dbd426a18bb216ba3f435 Mon Sep 17 00:00:00 2001 From: umutucak Date: Wed, 11 Oct 2023 11:31:40 +0200 Subject: [PATCH 10/23] =?UTF-8?q?TERM=5FSESSION=5FID=3Dw0t0p0:8A09846D-45E?= =?UTF-8?q?4-45E6-9AFE-C591885D192A=20SSH=5FAUTH=5FSOCK=3D/private/tmp/com?= =?UTF-8?q?.apple.launchd.dekcBjReVV/Listeners=20LC=5FTERMINAL=5FVERSION?= =?UTF-8?q?=3D3.4.21=20COLORFGBG=3D15;0=20ITERM=5FPROFILE=3DMain=20XPC=5FF?= =?UTF-8?q?LAGS=3D0x0=20PWD=3D/Users/ucak/MOMAS/momadm-bechmarks=20SHELL?= =?UTF-8?q?=3D/bin/zsh=20=5F=5FCFBundleIdentifier=3Dcom.googlecode.iterm2?= =?UTF-8?q?=20LC=5FCTYPE=3DUTF-8=20TERM=5FPROGRAM=5FVERSION=3D3.4.21=20TER?= =?UTF-8?q?M=5FPROGRAM=3DiTerm.app=20PATH=3D/Users/ucak/MOMAS/momadm-bechm?= =?UTF-8?q?arks/.venv/bin:/opt/homebrew/bin:/opt/homebrew/sbin:/usr/local/?= =?UTF-8?q?bin:/System/Cryptexes/App/usr/bin:/usr/bin:/bin:/usr/sbin:/sbin?= =?UTF-8?q?:/Library/Frameworks/Mono.framework/Versions/Current/Commands:/?= =?UTF-8?q?var/run/com.apple.security.cryptexd/codex.system/bootstrap/usr/?= =?UTF-8?q?local/bin:/var/run/com.apple.security.cryptexd/codex.system/boo?= =?UTF-8?q?tstrap/usr/bin:/var/run/com.apple.security.cryptexd/codex.syste?= =?UTF-8?q?m/bootstrap/usr/appleinternal/bin=20LC=5FTERMINAL=3DiTerm2=20CO?= =?UTF-8?q?LORTERM=3Dtruecolor=20COMMAND=5FMODE=3Dunix2003=20TERM=3Dxterm-?= =?UTF-8?q?256color=20HOME=3D/Users/ucak=20TMPDIR=3D/var/folders/s5/3px3g1?= =?UTF-8?q?59399c2l1bm=5Fn1bw4c0000gn/T/=20USER=3Ducak=20XPC=5FSERVICE=5FN?= =?UTF-8?q?AME=3D0=20LOGNAME=3Ducak=20ITERM=5FSESSION=5FID=3Dw0t0p0:8A0984?= =?UTF-8?q?6D-45E4-45E6-9AFE-C591885D192A=20=5F=5FCF=5FUSER=5FTEXT=5FENCOD?= =?UTF-8?q?ING=3D0x0:0:0=20SHLVL=3D1=20OLDPWD=3D/Users/ucak/MOMAS=20ZSH=3D?= =?UTF-8?q?/Users/ucak/.oh-my-zsh=20HOMEBREW=5FPREFIX=3D/opt/homebrew=20HO?= =?UTF-8?q?MEBREW=5FCELLAR=3D/opt/homebrew/Cellar=20HOMEBREW=5FREPOSITORY?= =?UTF-8?q?=3D/opt/homebrew=20MANPATH=3D/opt/homebrew/share/man::=20INFOPA?= =?UTF-8?q?TH=3D/opt/homebrew/share/info:=20PAGER=3Dless=20LESS=3D-R=20LSC?= =?UTF-8?q?OLORS=3DGxfxcxdxbxegedabagacad=20LS=5FCOLORS=3Ddi=3D1;36:ln=3D3?= =?UTF-8?q?5:so=3D32:pi=3D33:ex=3D31:bd=3D34;46:cd=3D34;43:su=3D30;41:sg?= =?UTF-8?q?=3D30;46:tw=3D30;42:ow=3D30;43=20VIRTUAL=5FENV=3D/Users/ucak/MO?= =?UTF-8?q?MAS/momadm-bechmarks/.venv=20PS1=3D(.venv)=20%(=3F:%{=1B[01;32m?= =?UTF-8?q?%}=E2=9E=9C=20:%{=1B[01;31m%}=E2=9E=9C=20)=20%{=1B[36m%}%c%{=1B?= =?UTF-8?q?[00m%}=20$(git=5Fprompt=5Finfo)=20VIRTUAL=5FENV=5FPROMPT=3D(.ve?= =?UTF-8?q?nv)=20=5F=3D/usr/bin/env,=20,=20=20functions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../envs/multiwalker/momultiwalker_v0.py | 5 +++ .../envs/multiwalker/multiwalker.py | 34 +++++++++++++++++-- tests/all_modules.py | 3 +- 3 files changed, 38 insertions(+), 4 deletions(-) create mode 100644 momadm_benchmarks/envs/multiwalker/momultiwalker_v0.py diff --git a/momadm_benchmarks/envs/multiwalker/momultiwalker_v0.py b/momadm_benchmarks/envs/multiwalker/momultiwalker_v0.py new file mode 100644 index 00000000..c83439c2 --- /dev/null +++ b/momadm_benchmarks/envs/multiwalker/momultiwalker_v0.py @@ -0,0 +1,5 @@ +"""Multiwalker domain environment for multi-objective optimization.""" +from momadm_benchmarks.envs.multiwalker.multiwalker import env, parallel_env, raw_env + + +__all__ = ["env", "parallel_env", "raw_env"] diff --git a/momadm_benchmarks/envs/multiwalker/multiwalker.py b/momadm_benchmarks/envs/multiwalker/multiwalker.py index 5bdcc00c..99803465 100644 --- a/momadm_benchmarks/envs/multiwalker/multiwalker.py +++ b/momadm_benchmarks/envs/multiwalker/multiwalker.py @@ -11,25 +11,53 @@ from pettingzoo.utils import wrappers from momadm_benchmarks.envs.multiwalker.multiwalker_base import MOMultiWalkerEnv as _env +from momadm_benchmarks.utils.conversions import mo_aec_to_parallel from momadm_benchmarks.utils.env import MOAECEnv def env(**kwargs): - """Autowrapper for the multiwalker domain. + """Returns the env in `AEC` format. Args: **kwargs: keyword args to forward to the raw_env function. + Returns: + A fully wrapped AEC env. + """ + env = raw_env(**kwargs) + return env + + +def parallel_env(**kwargs): + """Returns the env in `parallel` format. + + Args: + **kwargs: keyword args to forward to the raw_env function. + + Returns: + A fully wrapped parallel env. + """ + env = raw_env(**kwargs) + env = mo_aec_to_parallel(env) + return env + + +def raw_env(**kwargs): + """Returns the wrapped env in `AEC` format. + + Args: + **kwargs: keyword args to forward to create the `MOMultiwalker` environment. + Returns: A fully wrapped env. """ - env = mo_env(**kwargs) + env = MOMultiwalker(**kwargs) env = wrappers.ClipOutOfBoundsWrapper(env) env = wrappers.OrderEnforcingWrapper(env) return env -class mo_env(MOAECEnv, pz_multiwalker): +class MOMultiwalker(MOAECEnv, pz_multiwalker): """Environment for MO Multiwalker problem domain. The init method takes in environment arguments and should define the following attributes: diff --git a/tests/all_modules.py b/tests/all_modules.py index 89a060a2..50fc4c51 100644 --- a/tests/all_modules.py +++ b/tests/all_modules.py @@ -1,6 +1,7 @@ from momadm_benchmarks.envs.beach_domain import mobeach_v0 - +from momadm_benchmarks.envs.multiwalker import momultiwalker_v0 all_environments = { "mobeach_v0": mobeach_v0, + "momultiwalker_v0": momultiwalker_v0, } From 9b6cc40ca20e2b3f491e5ede78dd1c3a064ee38a Mon Sep 17 00:00:00 2001 From: umutucak Date: Wed, 11 Oct 2023 12:01:11 +0200 Subject: [PATCH 11/23] cleanup + shared MO rewards math --- .../envs/multiwalker/multiwalker_base.py | 49 ++++++++++++++----- 1 file changed, 37 insertions(+), 12 deletions(-) diff --git a/momadm_benchmarks/envs/multiwalker/multiwalker_base.py b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py index 4a08f371..07403992 100644 --- a/momadm_benchmarks/envs/multiwalker/multiwalker_base.py +++ b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py @@ -9,6 +9,7 @@ import numpy as np from gymnasium import spaces from pettingzoo.sisl.multiwalker.multiwalker_base import ( + FPS, LEG_H, SCALE, TERRAIN_GRASS, @@ -81,7 +82,7 @@ def __init__( terrain_length: length of terrain in number of steps. max_cycles: after max_cycles steps all agents will return done. """ - pz_multiwalker_base.__init__( + super.__init__( self, n_walkers=3, position_noise=1e-3, @@ -99,6 +100,13 @@ def __init__( self.setup() self.last_rewards = [np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)] + def _share_rewards(self, rewards): + shared_rewards = np.empty((3,)) + for i in range(len(rewards)): + avg_reward = rewards[:, i].mean(axis=1) # numpy magic: mean of first elements of all nested arrays + np.append(shared_rewards, avg_reward, axis=1) + return shared_rewards + @override def setup(self): """Continuation of the `__init__`.""" @@ -106,7 +114,7 @@ def setup(self): self.reward_space = [agent.reward_space for agent in self.walkers] @override - def reset(self): # TODO is this correct? + def reset(self): """Reset needs to initialize the `agents` attribute and must set up the environment so that render(), and step() can be called without issues. Returns the observations for each agent. @@ -115,6 +123,25 @@ def reset(self): # TODO is this correct? self.last_rewards = [np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)] return obs + @override + def step(self, action, agent_id, is_last): + # action is array of size 4 + action = action.reshape(4) + assert self.walkers[agent_id].hull is not None, agent_id + self.walkers[agent_id].apply_action(action) + if is_last: + self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) + rewards, done, mod_obs = self.scroll_subroutine() + self.last_obs = mod_obs + global_reward = self._share_rewards(rewards) # modified shared MO rewards + local_reward = rewards * self.local_ratio + self.last_rewards = global_reward * (1.0 - self.local_ratio) + local_reward * self.local_ratio + self.last_dones = done + self.frames = self.frames + 1 + + if self.render_mode == "human": + self.render() + @override def scroll_subroutine(self): """This is the step engine of the environment. @@ -153,9 +180,10 @@ def scroll_subroutine(self): neighbor_obs.append(self.np_random.normal(self.package.angle, self.angle_noise)) obs.append(np.array(walker_obs + neighbor_obs)) - package_shaping = self.forward_reward * 130 * self.package.position.x + # Below this point is the MO reward computation. Above this point is the original PZ code. + package_shaping = self.forward_reward * self.package.position.x for agent in rewards: # move forward - agent[0] += package_shaping - self.prev_package_shaping + agent[0] = package_shaping - self.prev_package_shaping self.prev_package_shaping = package_shaping self.scroll = xpos.mean() - VIEWPORT_W / SCALE / 5 - (self.n_walkers - 1) * WALKER_SEPERATION * TERRAIN_STEP @@ -163,19 +191,16 @@ def scroll_subroutine(self): done = [False] * self.n_walkers for i, (fallen, walker) in enumerate(zip(self.fallen_walkers, self.walkers)): if fallen: # agent does not fall - for agent in rewards: - agent[1] += self.fall_reward + rewards[i, 1] = self.fall_reward # not all, only the one that fell if self.remove_on_fall: walker._destroy() if not self.terminate_on_fall: for agent in rewards: - agent[1] += self.terminate_reward - done[i] = True - if ( # package does not fall - (self.terminate_on_fall and np.sum(self.fallen_walkers) > 0) or self.game_over or self.package.position.x < 0 - ): + agent[1] = self.terminate_reward + done[i] = True + if self.game_over or self.package.position.x < 0: # package doesn't fall for agent in rewards: - agent[2] += self.terminate_reward + agent[2] = self.terminate_reward done = [True] * self.n_walkers elif self.package.position.x > (self.terrain_length - TERRAIN_GRASS) * TERRAIN_STEP: done = [True] * self.n_walkers From e4a3a5fabea566c16209084ce556e7e9d4c84a29 Mon Sep 17 00:00:00 2001 From: umutucak Date: Thu, 12 Oct 2023 17:45:29 +0200 Subject: [PATCH 12/23] super() fix, vector fix, misc bug fix --- .../envs/multiwalker/multiwalker.py | 16 +++---- .../envs/multiwalker/multiwalker_base.py | 43 ++++++++++++------- 2 files changed, 34 insertions(+), 25 deletions(-) diff --git a/momadm_benchmarks/envs/multiwalker/multiwalker.py b/momadm_benchmarks/envs/multiwalker/multiwalker.py index 99803465..90c2308c 100644 --- a/momadm_benchmarks/envs/multiwalker/multiwalker.py +++ b/momadm_benchmarks/envs/multiwalker/multiwalker.py @@ -8,7 +8,8 @@ import numpy as np from pettingzoo.sisl.multiwalker.multiwalker import raw_env as pz_multiwalker -from pettingzoo.utils import wrappers +from pettingzoo.utils import wrappers, agent_selector +from gymnasium.utils import EzPickle from momadm_benchmarks.envs.multiwalker.multiwalker_base import MOMultiWalkerEnv as _env from momadm_benchmarks.utils.conversions import mo_aec_to_parallel @@ -84,7 +85,7 @@ def __init__(self, *args, **kwargs): terrain_length: length of terrain in number of steps. max_cycles: after max_cycles steps all agents will return done. """ - pz_multiwalker().__init__(self, *args, **kwargs) + super().__init__(*args, **kwargs) self.env = _env(*args, **kwargs) # override engine # spaces self.reward_spaces = dict(zip(self.agents, self.env.reward_space)) @@ -104,12 +105,9 @@ def reset(self, seed=None, options=None): Returns: the observations for each agent """ - pz_multiwalker.reset() # super - zero_reward: np.ndarray - for agent in self.agents: - zero_reward = np.zeros(self.reward_space(agent).shape[0], dtype=np.float32) - break + super().reset() # super + zero_reward = np.zeros(self.reward_spaces["walker_0"].shape, dtype=np.float32) # np.copy() makes different copies of this. self._cumulative_rewards = dict( zip(self.agents, [zero_reward.copy() for _ in self.agents]) - ) # CHECK check copy https://numpy.org/doc/stable/reference/generated/numpy.copy.html - self.rewards = dict(zip(self.agents, [zero_reward.copy() for _ in self.agents])) + ) + self.rewards = dict(zip(self.agents, [zero_reward.copy() for _ in self.agents])) \ No newline at end of file diff --git a/momadm_benchmarks/envs/multiwalker/multiwalker_base.py b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py index 07403992..c8377761 100644 --- a/momadm_benchmarks/envs/multiwalker/multiwalker_base.py +++ b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py @@ -34,7 +34,7 @@ class MOBipedalWalker(pz_bipedalwalker): def __init( self, world, init_x=TERRAIN_STEP * TERRAIN_STARTPAD / 2, init_y=TERRAIN_HEIGHT + 2 * LEG_H, n_walkers=2, seed=None ): - super.__init__(world, init_x, init_y, n_walkers, seed) + super().__init__(world, init_x, init_y, n_walkers, seed) @property def reward_space(self): @@ -82,8 +82,7 @@ def __init__( terrain_length: length of terrain in number of steps. max_cycles: after max_cycles steps all agents will return done. """ - super.__init__( - self, + super().__init__( n_walkers=3, position_noise=1e-3, angle_noise=1e-3, @@ -102,15 +101,21 @@ def __init__( def _share_rewards(self, rewards): shared_rewards = np.empty((3,)) + # print(rewards) for i in range(len(rewards)): - avg_reward = rewards[:, i].mean(axis=1) # numpy magic: mean of first elements of all nested arrays - np.append(shared_rewards, avg_reward, axis=1) + avg_reward = rewards[:][i].mean() # numpy magic: mean of first elements of all nested arrays + shared_rewards[i] = avg_reward return shared_rewards @override def setup(self): """Continuation of the `__init__`.""" - super.setup() + super().setup() + init_y = TERRAIN_HEIGHT + 2 * LEG_H + self.walkers = [ + MOBipedalWalker(self.world, init_x=sx, init_y=init_y, seed=self.seed_val) + for sx in self.start_x + ] self.reward_space = [agent.reward_space for agent in self.walkers] @override @@ -119,7 +124,7 @@ def reset(self): Returns the observations for each agent. """ - obs = super.reset() + obs = super().reset() self.last_rewards = [np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)] return obs @@ -129,12 +134,18 @@ def step(self, action, agent_id, is_last): action = action.reshape(4) assert self.walkers[agent_id].hull is not None, agent_id self.walkers[agent_id].apply_action(action) + # print("action:", action) if is_last: self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) rewards, done, mod_obs = self.scroll_subroutine() + # print("step:", agent_id, rewards) + # print("reward type:", type(rewards)) self.last_obs = mod_obs global_reward = self._share_rewards(rewards) # modified shared MO rewards local_reward = rewards * self.local_ratio + # print("global_reward:", global_reward) + # print("local ratio:", self.local_ratio) + # print("local reward", local_reward) self.last_rewards = global_reward * (1.0 - self.local_ratio) + local_reward * self.local_ratio self.last_dones = done self.frames = self.frames + 1 @@ -151,7 +162,8 @@ def scroll_subroutine(self): xpos = np.zeros(self.n_walkers) obs = [] done = False - rewards = [np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)] + rewards = np.array([np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)]) + # print("sub type:", type(rewards)) for i in range(self.n_walkers): if self.walkers[i].hull is None: @@ -182,8 +194,7 @@ def scroll_subroutine(self): # Below this point is the MO reward computation. Above this point is the original PZ code. package_shaping = self.forward_reward * self.package.position.x - for agent in rewards: # move forward - agent[0] = package_shaping - self.prev_package_shaping + rewards[:][0] = package_shaping - self.prev_package_shaping # move forward self.prev_package_shaping = package_shaping self.scroll = xpos.mean() - VIEWPORT_W / SCALE / 5 - (self.n_walkers - 1) * WALKER_SEPERATION * TERRAIN_STEP @@ -191,18 +202,18 @@ def scroll_subroutine(self): done = [False] * self.n_walkers for i, (fallen, walker) in enumerate(zip(self.fallen_walkers, self.walkers)): if fallen: # agent does not fall - rewards[i, 1] = self.fall_reward # not all, only the one that fell + rewards[i][1] = self.fall_reward # not all, only the one that fell if self.remove_on_fall: walker._destroy() if not self.terminate_on_fall: - for agent in rewards: - agent[1] = self.terminate_reward - done[i] = True + rewards[:][1] = self.terminate_reward + done = [True] * self.n_walkers if self.game_over or self.package.position.x < 0: # package doesn't fall - for agent in rewards: - agent[2] = self.terminate_reward + rewards[:][2] = self.terminate_reward done = [True] * self.n_walkers elif self.package.position.x > (self.terrain_length - TERRAIN_GRASS) * TERRAIN_STEP: done = [True] * self.n_walkers + # print("subroutine:", rewards) + # print("sub type:", type(rewards)) return rewards, done, obs From d03b9703c968cc06ada28751b3bb425c32fdad33 Mon Sep 17 00:00:00 2001 From: umutucak Date: Mon, 16 Oct 2023 11:08:49 +0200 Subject: [PATCH 13/23] metadata + remove OrderEnforcing for now --- .../envs/multiwalker/multiwalker.py | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/momadm_benchmarks/envs/multiwalker/multiwalker.py b/momadm_benchmarks/envs/multiwalker/multiwalker.py index 90c2308c..3777f033 100644 --- a/momadm_benchmarks/envs/multiwalker/multiwalker.py +++ b/momadm_benchmarks/envs/multiwalker/multiwalker.py @@ -7,9 +7,9 @@ from typing_extensions import override import numpy as np +from pettingzoo.sisl.multiwalker.multiwalker import FPS from pettingzoo.sisl.multiwalker.multiwalker import raw_env as pz_multiwalker -from pettingzoo.utils import wrappers, agent_selector -from gymnasium.utils import EzPickle +from pettingzoo.utils import wrappers from momadm_benchmarks.envs.multiwalker.multiwalker_base import MOMultiWalkerEnv as _env from momadm_benchmarks.utils.conversions import mo_aec_to_parallel @@ -26,6 +26,7 @@ def env(**kwargs): A fully wrapped AEC env. """ env = raw_env(**kwargs) + env = wrappers.ClipOutOfBoundsWrapper(env) return env @@ -53,8 +54,6 @@ def raw_env(**kwargs): A fully wrapped env. """ env = MOMultiwalker(**kwargs) - env = wrappers.ClipOutOfBoundsWrapper(env) - env = wrappers.OrderEnforcingWrapper(env) return env @@ -69,6 +68,13 @@ class MOMultiwalker(MOAECEnv, pz_multiwalker): These attributes should not be changed after initialization. """ + metadata = { + "render_modes": ["human", "rgb_array"], + "name": "momultiwalker_v0", + "is_parallelizable": True, + "render_fps": FPS, + } + @override def __init__(self, *args, **kwargs): """Initializes the multiwalker domain. @@ -105,9 +111,9 @@ def reset(self, seed=None, options=None): Returns: the observations for each agent """ - super().reset() # super - zero_reward = np.zeros(self.reward_spaces["walker_0"].shape, dtype=np.float32) # np.copy() makes different copies of this. - self._cumulative_rewards = dict( - zip(self.agents, [zero_reward.copy() for _ in self.agents]) - ) - self.rewards = dict(zip(self.agents, [zero_reward.copy() for _ in self.agents])) \ No newline at end of file + super().reset() # super + zero_reward = np.zeros( + self.reward_spaces["walker_0"].shape, dtype=np.float32 + ) # np.copy() makes different copies of this. + self._cumulative_rewards = dict(zip(self.agents, [zero_reward.copy() for _ in self.agents])) + self.rewards = dict(zip(self.agents, [zero_reward.copy() for _ in self.agents])) From 61044327b4b095791d4112dd55d44bf27dc87eb8 Mon Sep 17 00:00:00 2001 From: umutucak Date: Mon, 16 Oct 2023 11:09:05 +0200 Subject: [PATCH 14/23] AEC API fix --- momadm_benchmarks/utils/conversions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/momadm_benchmarks/utils/conversions.py b/momadm_benchmarks/utils/conversions.py index 38ca1595..707b8dee 100644 --- a/momadm_benchmarks/utils/conversions.py +++ b/momadm_benchmarks/utils/conversions.py @@ -61,7 +61,7 @@ def reward_spaces(self): @override def reward_space(self, agent): - return self.aec_env.reward_spaces(agent) + return self.aec_env.reward_spaces[agent] @override def step(self, actions): From 236ff4aa6fd1548d97c36998298d6b978a989396 Mon Sep 17 00:00:00 2001 From: umutucak Date: Mon, 16 Oct 2023 11:10:22 +0200 Subject: [PATCH 15/23] step logic fix if conditions --- .../envs/multiwalker/multiwalker_base.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/momadm_benchmarks/envs/multiwalker/multiwalker_base.py b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py index c8377761..1c1a6a94 100644 --- a/momadm_benchmarks/envs/multiwalker/multiwalker_base.py +++ b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py @@ -112,10 +112,7 @@ def setup(self): """Continuation of the `__init__`.""" super().setup() init_y = TERRAIN_HEIGHT + 2 * LEG_H - self.walkers = [ - MOBipedalWalker(self.world, init_x=sx, init_y=init_y, seed=self.seed_val) - for sx in self.start_x - ] + self.walkers = [MOBipedalWalker(self.world, init_x=sx, init_y=init_y, seed=self.seed_val) for sx in self.start_x] self.reward_space = [agent.reward_space for agent in self.walkers] @override @@ -194,7 +191,7 @@ def scroll_subroutine(self): # Below this point is the MO reward computation. Above this point is the original PZ code. package_shaping = self.forward_reward * self.package.position.x - rewards[:][0] = package_shaping - self.prev_package_shaping # move forward + rewards[:][0] = package_shaping - self.prev_package_shaping # move forward self.prev_package_shaping = package_shaping self.scroll = xpos.mean() - VIEWPORT_W / SCALE / 5 - (self.n_walkers - 1) * WALKER_SEPERATION * TERRAIN_STEP @@ -207,10 +204,14 @@ def scroll_subroutine(self): walker._destroy() if not self.terminate_on_fall: rewards[:][1] = self.terminate_reward - done = [True] * self.n_walkers + done[i] = True + + if self.terminate_on_fall and np.sum(self.fallen_walkers) > 0: + done = [True] * self.n_walkers + if self.game_over or self.package.position.x < 0: # package doesn't fall rewards[:][2] = self.terminate_reward - done = [True] * self.n_walkers + elif self.package.position.x > (self.terrain_length - TERRAIN_GRASS) * TERRAIN_STEP: done = [True] * self.n_walkers From 3f7dc6c9c0a541e843ed3cd019c00951f61ae5db Mon Sep 17 00:00:00 2001 From: umutucak Date: Mon, 16 Oct 2023 14:54:25 +0200 Subject: [PATCH 16/23] deterministic test pass --- momadm_benchmarks/envs/multiwalker/multiwalker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/momadm_benchmarks/envs/multiwalker/multiwalker.py b/momadm_benchmarks/envs/multiwalker/multiwalker.py index 3777f033..702ecbaa 100644 --- a/momadm_benchmarks/envs/multiwalker/multiwalker.py +++ b/momadm_benchmarks/envs/multiwalker/multiwalker.py @@ -111,7 +111,7 @@ def reset(self, seed=None, options=None): Returns: the observations for each agent """ - super().reset() # super + super().reset(seed) # super zero_reward = np.zeros( self.reward_spaces["walker_0"].shape, dtype=np.float32 ) # np.copy() makes different copies of this. From b2a36f06c79e33b3ee05f76cd827ffdbf8739cad Mon Sep 17 00:00:00 2001 From: umutucak Date: Mon, 16 Oct 2023 16:52:16 +0200 Subject: [PATCH 17/23] final test fix please --- .pre-commit-config.yaml | 10 +++++----- momadm_benchmarks/test/api_test.py | 2 +- tests/all_modules.py | 1 + 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 548cbfc9..686bc8c1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,7 +2,7 @@ # See https://pre-commit.com/hooks.html for more hooks repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.5.0 hooks: - id: check-symlinks - id: destroyed-symlinks @@ -17,13 +17,13 @@ repos: - id: detect-private-key - id: debug-statements - repo: https://github.com/codespell-project/codespell - rev: v2.2.4 + rev: v2.2.6 hooks: - id: codespell args: - --ignore-words-list=reacher, mor - repo: https://github.com/PyCQA/flake8 - rev: 6.0.0 + rev: 6.1.0 hooks: - id: flake8 args: @@ -34,7 +34,7 @@ repos: - --show-source - --statistics - repo: https://github.com/asottile/pyupgrade - rev: v3.3.1 + rev: v3.15.0 hooks: - id: pyupgrade args: ["--py37-plus"] @@ -43,7 +43,7 @@ repos: hooks: - id: isort - repo: https://github.com/python/black - rev: 23.3.0 + rev: 23.9.1 hooks: - id: black - repo: https://github.com/pycqa/pydocstyle diff --git a/momadm_benchmarks/test/api_test.py b/momadm_benchmarks/test/api_test.py index 434a2457..06cbf0d3 100644 --- a/momadm_benchmarks/test/api_test.py +++ b/momadm_benchmarks/test/api_test.py @@ -171,7 +171,7 @@ def play_test(env, observation_0, num_cycles): } for agent in env.agent_iter(env.num_agents * num_cycles): generated_agents.add(agent) - assert agent not in has_finished, "agents cannot resurect! Generate a new agent with a new name." + assert agent not in has_finished, "agents cannot resurrect! Generate a new agent with a new name." assert isinstance(env.infos[agent], dict), "an environment agent's info must be a dictionary" prev_observe, reward, terminated, truncated, info = env.last() if terminated or truncated: diff --git a/tests/all_modules.py b/tests/all_modules.py index 50fc4c51..63aa02ab 100644 --- a/tests/all_modules.py +++ b/tests/all_modules.py @@ -1,6 +1,7 @@ from momadm_benchmarks.envs.beach_domain import mobeach_v0 from momadm_benchmarks.envs.multiwalker import momultiwalker_v0 + all_environments = { "mobeach_v0": mobeach_v0, "momultiwalker_v0": momultiwalker_v0, From 072cbe79269a3dda156029ab0e65b622478742a3 Mon Sep 17 00:00:00 2001 From: umutucak Date: Tue, 17 Oct 2023 10:51:35 +0200 Subject: [PATCH 18/23] PZ dependencies installed on runner --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 07dbd951..9674e675 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -23,7 +23,7 @@ jobs: pip install pytest sudo apt-get update sudo apt-get install libglu1-mesa-dev libgl1-mesa-dev libosmesa6-dev xvfb patchelf ffmpeg cmake swig - pip install pettingzoo + pip install pettingzoo[all] pip install -e .[all] - name: Full Python tests run: | From 95a9da00357fed91b31b6e76722f80ce2e3524c1 Mon Sep 17 00:00:00 2001 From: umutucak Date: Tue, 17 Oct 2023 11:22:29 +0200 Subject: [PATCH 19/23] rendering mode fix --- momadm_benchmarks/envs/multiwalker/multiwalker_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/momadm_benchmarks/envs/multiwalker/multiwalker_base.py b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py index 1c1a6a94..ab533041 100644 --- a/momadm_benchmarks/envs/multiwalker/multiwalker_base.py +++ b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py @@ -94,7 +94,7 @@ def __init__( remove_on_fall=True, terrain_length=TERRAIN_LENGTH, max_cycles=500, - render_mode=None, + render_mode=render_mode, ) self.setup() self.last_rewards = [np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)] From c7e7637bfe26534069dd308f1fed227ba245a649 Mon Sep 17 00:00:00 2001 From: umutucak Date: Tue, 17 Oct 2023 15:34:27 +0200 Subject: [PATCH 20/23] review fixes --- .../envs/multiwalker/__init__.py | 2 +- .../envs/multiwalker/momultiwalker_v0.py | 2 +- .../envs/multiwalker/multiwalker.py | 33 +++------------- .../envs/multiwalker/multiwalker_base.py | 39 ++----------------- 4 files changed, 10 insertions(+), 66 deletions(-) diff --git a/momadm_benchmarks/envs/multiwalker/__init__.py b/momadm_benchmarks/envs/multiwalker/__init__.py index a15d89df..f6d14c49 100644 --- a/momadm_benchmarks/envs/multiwalker/__init__.py +++ b/momadm_benchmarks/envs/multiwalker/__init__.py @@ -1,4 +1,4 @@ -"""MO Multiwalker problem. +"""Adapted from the Multiwalker problem. From Gupta, J. K., Egorov, M., and Kochenderfer, M. (2017). Cooperative multi-agent control using deep reinforcement learning. International Conference on Autonomous Agents and Multiagent Systems diff --git a/momadm_benchmarks/envs/multiwalker/momultiwalker_v0.py b/momadm_benchmarks/envs/multiwalker/momultiwalker_v0.py index c83439c2..591d8639 100644 --- a/momadm_benchmarks/envs/multiwalker/momultiwalker_v0.py +++ b/momadm_benchmarks/envs/multiwalker/momultiwalker_v0.py @@ -1,4 +1,4 @@ -"""Multiwalker domain environment for multi-objective optimization.""" +"""Multiwalker domain environment for MOMARL.""" from momadm_benchmarks.envs.multiwalker.multiwalker import env, parallel_env, raw_env diff --git a/momadm_benchmarks/envs/multiwalker/multiwalker.py b/momadm_benchmarks/envs/multiwalker/multiwalker.py index 702ecbaa..216b7d45 100644 --- a/momadm_benchmarks/envs/multiwalker/multiwalker.py +++ b/momadm_benchmarks/envs/multiwalker/multiwalker.py @@ -1,4 +1,4 @@ -"""MO Multiwalker problem. +"""Adapted form of the Multiwalker problem. From Gupta, J. K., Egorov, M., and Kochenderfer, M. (2017). Cooperative multi-agent control using deep reinforcement learning. International Conference on Autonomous Agents and Multiagent Systems @@ -17,7 +17,7 @@ def env(**kwargs): - """Returns the env in `AEC` format. + """Returns the wrapped environment in `AEC` format. Args: **kwargs: keyword args to forward to the raw_env function. @@ -31,7 +31,7 @@ def env(**kwargs): def parallel_env(**kwargs): - """Returns the env in `parallel` format. + """Returns the wrapped env in `parallel` format. Args: **kwargs: keyword args to forward to the raw_env function. @@ -45,13 +45,13 @@ def parallel_env(**kwargs): def raw_env(**kwargs): - """Returns the wrapped env in `AEC` format. + """Returns the environment in `AEC` format. Args: **kwargs: keyword args to forward to create the `MOMultiwalker` environment. Returns: - A fully wrapped env. + A raw env. """ env = MOMultiwalker(**kwargs) return env @@ -77,20 +77,6 @@ class MOMultiwalker(MOAECEnv, pz_multiwalker): @override def __init__(self, *args, **kwargs): - """Initializes the multiwalker domain. - - Keyword arguments: - n_walkers: number of bipedal walkers in environment. - position_noise: noise applied to agent positional sensor observations. - angle_noise: noise applied to agent rotational sensor observations. - forward_reward: reward applied for an agent standing, scaled by agent's x coordinate. - fall_reward: reward applied when an agent falls down. - shared_reward: whether reward is distributed among all agents or allocated locally. - terminate_reward: reward applied for each fallen walker in environment. - terminate_on_fall: toggles whether agent is done if it falls down. - terrain_length: length of terrain in number of steps. - max_cycles: after max_cycles steps all agents will return done. - """ super().__init__(*args, **kwargs) self.env = _env(*args, **kwargs) # override engine # spaces @@ -102,15 +88,6 @@ def reward_space(self, agent): @override def reset(self, seed=None, options=None): - """Reset needs to initialize the `agents` attribute and must set up the environment so that render(), and step() can be called without issues. - - Args: - seed - options - - Returns: - the observations for each agent - """ super().reset(seed) # super zero_reward = np.zeros( self.reward_spaces["walker_0"].shape, dtype=np.float32 diff --git a/momadm_benchmarks/envs/multiwalker/multiwalker_base.py b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py index ab533041..b936859d 100644 --- a/momadm_benchmarks/envs/multiwalker/multiwalker_base.py +++ b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py @@ -1,4 +1,4 @@ -"""MO Multiwalker problem. +"""Adapted from the Multiwalker problem. From Gupta, J. K., Egorov, M., and Kochenderfer, M. (2017). Cooperative multi-agent control using deep reinforcement learning. International Conference on Autonomous Agents and Multiagent Systems @@ -53,6 +53,7 @@ class MOMultiWalkerEnv(pz_multiwalker_base): Deals with the simulation of the environment. """ + @override def __init__( self, n_walkers=3, @@ -68,20 +69,6 @@ def __init__( max_cycles=500, render_mode=None, ): - """Initializes the `MOMultiWalkerEnv` class. - - Keyword Arguments: - n_walkers: number of bipedal walkers in environment. - position_noise: noise applied to agent positional sensor observations. - angle_noise: noise applied to agent rotational sensor observations. - forward_reward: reward applied for an agent standing, scaled by agent's x coordinate. - fall_reward: reward applied when an agent falls down. - shared_reward: whether reward is distributed among all agents or allocated locally. - terminate_reward: reward applied for each fallen walker in environment. - terminate_on_fall: toggles whether agent is done if it falls down. - terrain_length: length of terrain in number of steps. - max_cycles: after max_cycles steps all agents will return done. - """ super().__init__( n_walkers=3, position_noise=1e-3, @@ -99,14 +86,6 @@ def __init__( self.setup() self.last_rewards = [np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)] - def _share_rewards(self, rewards): - shared_rewards = np.empty((3,)) - # print(rewards) - for i in range(len(rewards)): - avg_reward = rewards[:][i].mean() # numpy magic: mean of first elements of all nested arrays - shared_rewards[i] = avg_reward - return shared_rewards - @override def setup(self): """Continuation of the `__init__`.""" @@ -117,10 +96,6 @@ def setup(self): @override def reset(self): - """Reset needs to initialize the `agents` attribute and must set up the environment so that render(), and step() can be called without issues. - - Returns the observations for each agent. - """ obs = super().reset() self.last_rewards = [np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)] return obs @@ -135,14 +110,9 @@ def step(self, action, agent_id, is_last): if is_last: self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) rewards, done, mod_obs = self.scroll_subroutine() - # print("step:", agent_id, rewards) - # print("reward type:", type(rewards)) self.last_obs = mod_obs - global_reward = self._share_rewards(rewards) # modified shared MO rewards + global_reward = np.mean(rewards, axis=0) # modified shared MO rewards local_reward = rewards * self.local_ratio - # print("global_reward:", global_reward) - # print("local ratio:", self.local_ratio) - # print("local reward", local_reward) self.last_rewards = global_reward * (1.0 - self.local_ratio) + local_reward * self.local_ratio self.last_dones = done self.frames = self.frames + 1 @@ -160,7 +130,6 @@ def scroll_subroutine(self): obs = [] done = False rewards = np.array([np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)]) - # print("sub type:", type(rewards)) for i in range(self.n_walkers): if self.walkers[i].hull is None: @@ -215,6 +184,4 @@ def scroll_subroutine(self): elif self.package.position.x > (self.terrain_length - TERRAIN_GRASS) * TERRAIN_STEP: done = [True] * self.n_walkers - # print("subroutine:", rewards) - # print("sub type:", type(rewards)) return rewards, done, obs From 1923bdb42634eeadf7533d7497f6962820048dea Mon Sep 17 00:00:00 2001 From: umutucak Date: Thu, 19 Oct 2023 11:18:20 +0200 Subject: [PATCH 21/23] review fix 2 --- .../envs/multiwalker/multiwalker.py | 4 +- .../envs/multiwalker/multiwalker_base.py | 44 ++++++++++++++++--- 2 files changed, 41 insertions(+), 7 deletions(-) diff --git a/momadm_benchmarks/envs/multiwalker/multiwalker.py b/momadm_benchmarks/envs/multiwalker/multiwalker.py index 216b7d45..63f23916 100644 --- a/momadm_benchmarks/envs/multiwalker/multiwalker.py +++ b/momadm_benchmarks/envs/multiwalker/multiwalker.py @@ -88,9 +88,9 @@ def reward_space(self, agent): @override def reset(self, seed=None, options=None): - super().reset(seed) # super + super().reset(seed, options) # super zero_reward = np.zeros( - self.reward_spaces["walker_0"].shape, dtype=np.float32 + self.reward_spaces[self.agents[0]].shape, dtype=np.float32 ) # np.copy() makes different copies of this. self._cumulative_rewards = dict(zip(self.agents, [zero_reward.copy() for _ in self.agents])) self.rewards = dict(zip(self.agents, [zero_reward.copy() for _ in self.agents])) diff --git a/momadm_benchmarks/envs/multiwalker/multiwalker_base.py b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py index b936859d..bc5b8ccd 100644 --- a/momadm_benchmarks/envs/multiwalker/multiwalker_base.py +++ b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py @@ -31,10 +31,26 @@ class MOBipedalWalker(pz_bipedalwalker): """Walker Object with the physics implemented.""" - def __init( - self, world, init_x=TERRAIN_STEP * TERRAIN_STARTPAD / 2, init_y=TERRAIN_HEIGHT + 2 * LEG_H, n_walkers=2, seed=None + @override + def __init__( + self, + world, + forward_reward, + fall_reward, + terminate_reward, + init_x=TERRAIN_STEP * TERRAIN_STARTPAD / 2, + init_y=TERRAIN_HEIGHT + 2 * LEG_H, + n_walkers=2, + seed=None, + terrain_length=TERRAIN_LENGTH, + terrain_step=TERRAIN_STEP, ): super().__init__(world, init_x, init_y, n_walkers, seed) + self.forward_reward = forward_reward + self.fall_reward = fall_reward + self.terminate_reward = terminate_reward + self.terrain_length = terrain_length + self.terrain_step = terrain_step @property def reward_space(self): @@ -44,7 +60,12 @@ def reward_space(self): 2. no walkers falling. 3. package not falling. """ - return spaces.Box(low=-np.inf, high=np.inf, shape=(3,), dtype=np.float32) + return spaces.Box( + low=np.array([-(self.terrain_step * self.forward_reward), self.fall_reward, self.terminate_reward]), + high=np.array([self.terrain_step * self.forward_reward, 0, 0]), + shape=(3,), + dtype=np.float32, + ) class MOMultiWalkerEnv(pz_multiwalker_base): @@ -91,7 +112,18 @@ def setup(self): """Continuation of the `__init__`.""" super().setup() init_y = TERRAIN_HEIGHT + 2 * LEG_H - self.walkers = [MOBipedalWalker(self.world, init_x=sx, init_y=init_y, seed=self.seed_val) for sx in self.start_x] + self.walkers = [ + MOBipedalWalker( + self.world, + self.forward_reward, + self.fall_reward, + self.terminate_reward, + init_x=sx, + init_y=init_y, + seed=self.seed_val, + ) + for sx in self.start_x + ] self.reward_space = [agent.reward_space for agent in self.walkers] @override @@ -106,7 +138,6 @@ def step(self, action, agent_id, is_last): action = action.reshape(4) assert self.walkers[agent_id].hull is not None, agent_id self.walkers[agent_id].apply_action(action) - # print("action:", action) if is_last: self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) rewards, done, mod_obs = self.scroll_subroutine() @@ -160,6 +191,7 @@ def scroll_subroutine(self): # Below this point is the MO reward computation. Above this point is the original PZ code. package_shaping = self.forward_reward * self.package.position.x + print("before:", rewards) rewards[:][0] = package_shaping - self.prev_package_shaping # move forward self.prev_package_shaping = package_shaping @@ -179,9 +211,11 @@ def scroll_subroutine(self): done = [True] * self.n_walkers if self.game_over or self.package.position.x < 0: # package doesn't fall + done = [True] * self.n_walkers rewards[:][2] = self.terminate_reward elif self.package.position.x > (self.terrain_length - TERRAIN_GRASS) * TERRAIN_STEP: done = [True] * self.n_walkers + print("after:", rewards) return rewards, done, obs From c932805b32482a9be12ed125a75a3c9b8c2d0858 Mon Sep 17 00:00:00 2001 From: umutucak Date: Thu, 19 Oct 2023 11:39:21 +0200 Subject: [PATCH 22/23] review fix 2.2, rewards fixed, terminate_on_fall check changed --- .../envs/multiwalker/multiwalker_base.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/momadm_benchmarks/envs/multiwalker/multiwalker_base.py b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py index bc5b8ccd..90d8b999 100644 --- a/momadm_benchmarks/envs/multiwalker/multiwalker_base.py +++ b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py @@ -191,31 +191,31 @@ def scroll_subroutine(self): # Below this point is the MO reward computation. Above this point is the original PZ code. package_shaping = self.forward_reward * self.package.position.x - print("before:", rewards) - rewards[:][0] = package_shaping - self.prev_package_shaping # move forward + # print("before:", rewards) + rewards[:, 0] = package_shaping - self.prev_package_shaping # obj1: move forward self.prev_package_shaping = package_shaping self.scroll = xpos.mean() - VIEWPORT_W / SCALE / 5 - (self.n_walkers - 1) * WALKER_SEPERATION * TERRAIN_STEP done = [False] * self.n_walkers for i, (fallen, walker) in enumerate(zip(self.fallen_walkers, self.walkers)): - if fallen: # agent does not fall - rewards[i][1] = self.fall_reward # not all, only the one that fell + if fallen: # obj2: agent does not fall + rewards[i, 1] = self.fall_reward # not all, only the one that fell if self.remove_on_fall: walker._destroy() - if not self.terminate_on_fall: - rewards[:][1] = self.terminate_reward + if self.terminate_on_fall: + rewards[:, 1] = self.terminate_reward done[i] = True if self.terminate_on_fall and np.sum(self.fallen_walkers) > 0: done = [True] * self.n_walkers - if self.game_over or self.package.position.x < 0: # package doesn't fall + if self.game_over or self.package.position.x < 0: # obj3: package doesn't fall done = [True] * self.n_walkers - rewards[:][2] = self.terminate_reward + rewards[:, 2] = self.terminate_reward elif self.package.position.x > (self.terrain_length - TERRAIN_GRASS) * TERRAIN_STEP: done = [True] * self.n_walkers - print("after:", rewards) + # print("after:", rewards) return rewards, done, obs From debe0b4d0890f4c3ab30c53d09a47e7258de09e0 Mon Sep 17 00:00:00 2001 From: umutucak Date: Thu, 19 Oct 2023 14:40:53 +0200 Subject: [PATCH 23/23] review fix final --- .../envs/multiwalker/multiwalker_base.py | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/momadm_benchmarks/envs/multiwalker/multiwalker_base.py b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py index 90d8b999..7acc74cb 100644 --- a/momadm_benchmarks/envs/multiwalker/multiwalker_base.py +++ b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py @@ -61,7 +61,9 @@ def reward_space(self): 3. package not falling. """ return spaces.Box( - low=np.array([-(self.terrain_step * self.forward_reward), self.fall_reward, self.terminate_reward]), + low=np.array( + [-(self.terrain_step * self.forward_reward), self.fall_reward + self.terminate_reward, self.terminate_reward] + ), high=np.array([self.terrain_step * self.forward_reward, 0, 0]), shape=(3,), dtype=np.float32, @@ -91,17 +93,17 @@ def __init__( render_mode=None, ): super().__init__( - n_walkers=3, - position_noise=1e-3, - angle_noise=1e-3, - forward_reward=1.0, - terminate_reward=-100.0, - fall_reward=-10.0, - shared_reward=True, - terminate_on_fall=True, - remove_on_fall=True, - terrain_length=TERRAIN_LENGTH, - max_cycles=500, + n_walkers=n_walkers, + position_noise=position_noise, + angle_noise=angle_noise, + forward_reward=forward_reward, + terminate_reward=terminate_reward, + fall_reward=fall_reward, + shared_reward=shared_reward, + terminate_on_fall=terminate_on_fall, + remove_on_fall=remove_on_fall, + terrain_length=terrain_length, + max_cycles=max_cycles, render_mode=render_mode, ) self.setup() @@ -191,7 +193,6 @@ def scroll_subroutine(self): # Below this point is the MO reward computation. Above this point is the original PZ code. package_shaping = self.forward_reward * self.package.position.x - # print("before:", rewards) rewards[:, 0] = package_shaping - self.prev_package_shaping # obj1: move forward self.prev_package_shaping = package_shaping @@ -204,10 +205,10 @@ def scroll_subroutine(self): if self.remove_on_fall: walker._destroy() if self.terminate_on_fall: - rewards[:, 1] = self.terminate_reward + rewards[:, 1] += self.terminate_reward done[i] = True - if self.terminate_on_fall and np.sum(self.fallen_walkers) > 0: + if self.terminate_on_fall and np.sum(self.fallen_walkers) > 0: # terminate_on_fall global termination done = [True] * self.n_walkers if self.game_over or self.package.position.x < 0: # obj3: package doesn't fall @@ -217,5 +218,4 @@ def scroll_subroutine(self): elif self.package.position.x > (self.terrain_length - TERRAIN_GRASS) * TERRAIN_STEP: done = [True] * self.n_walkers - # print("after:", rewards) return rewards, done, obs