diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 07dbd951..9674e675 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -23,7 +23,7 @@ jobs: pip install pytest sudo apt-get update sudo apt-get install libglu1-mesa-dev libgl1-mesa-dev libosmesa6-dev xvfb patchelf ffmpeg cmake swig - pip install pettingzoo + pip install pettingzoo[all] pip install -e .[all] - name: Full Python tests run: | diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 548cbfc9..686bc8c1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,7 +2,7 @@ # See https://pre-commit.com/hooks.html for more hooks repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.5.0 hooks: - id: check-symlinks - id: destroyed-symlinks @@ -17,13 +17,13 @@ repos: - id: detect-private-key - id: debug-statements - repo: https://github.com/codespell-project/codespell - rev: v2.2.4 + rev: v2.2.6 hooks: - id: codespell args: - --ignore-words-list=reacher, mor - repo: https://github.com/PyCQA/flake8 - rev: 6.0.0 + rev: 6.1.0 hooks: - id: flake8 args: @@ -34,7 +34,7 @@ repos: - --show-source - --statistics - repo: https://github.com/asottile/pyupgrade - rev: v3.3.1 + rev: v3.15.0 hooks: - id: pyupgrade args: ["--py37-plus"] @@ -43,7 +43,7 @@ repos: hooks: - id: isort - repo: https://github.com/python/black - rev: 23.3.0 + rev: 23.9.1 hooks: - id: black - repo: https://github.com/pycqa/pydocstyle diff --git a/momadm_benchmarks/envs/multiwalker/__init__.py b/momadm_benchmarks/envs/multiwalker/__init__.py new file mode 100644 index 00000000..f6d14c49 --- /dev/null +++ b/momadm_benchmarks/envs/multiwalker/__init__.py @@ -0,0 +1,5 @@ +"""Adapted from the Multiwalker problem. + +From Gupta, J. K., Egorov, M., and Kochenderfer, M. (2017). Cooperative multi-agent control using +deep reinforcement learning. International Conference on Autonomous Agents and Multiagent Systems +""" diff --git a/momadm_benchmarks/envs/multiwalker/momultiwalker_v0.py b/momadm_benchmarks/envs/multiwalker/momultiwalker_v0.py new file mode 100644 index 00000000..591d8639 --- /dev/null +++ b/momadm_benchmarks/envs/multiwalker/momultiwalker_v0.py @@ -0,0 +1,5 @@ +"""Multiwalker domain environment for MOMARL.""" +from momadm_benchmarks.envs.multiwalker.multiwalker import env, parallel_env, raw_env + + +__all__ = ["env", "parallel_env", "raw_env"] diff --git a/momadm_benchmarks/envs/multiwalker/multiwalker.py b/momadm_benchmarks/envs/multiwalker/multiwalker.py new file mode 100644 index 00000000..63f23916 --- /dev/null +++ b/momadm_benchmarks/envs/multiwalker/multiwalker.py @@ -0,0 +1,96 @@ +"""Adapted form of the Multiwalker problem. + +From Gupta, J. K., Egorov, M., and Kochenderfer, M. (2017). Cooperative multi-agent control using +deep reinforcement learning. International Conference on Autonomous Agents and Multiagent Systems +""" + +from typing_extensions import override + +import numpy as np +from pettingzoo.sisl.multiwalker.multiwalker import FPS +from pettingzoo.sisl.multiwalker.multiwalker import raw_env as pz_multiwalker +from pettingzoo.utils import wrappers + +from momadm_benchmarks.envs.multiwalker.multiwalker_base import MOMultiWalkerEnv as _env +from momadm_benchmarks.utils.conversions import mo_aec_to_parallel +from momadm_benchmarks.utils.env import MOAECEnv + + +def env(**kwargs): + """Returns the wrapped environment in `AEC` format. + + Args: + **kwargs: keyword args to forward to the raw_env function. + + Returns: + A fully wrapped AEC env. + """ + env = raw_env(**kwargs) + env = wrappers.ClipOutOfBoundsWrapper(env) + return env + + +def parallel_env(**kwargs): + """Returns the wrapped env in `parallel` format. + + Args: + **kwargs: keyword args to forward to the raw_env function. + + Returns: + A fully wrapped parallel env. + """ + env = raw_env(**kwargs) + env = mo_aec_to_parallel(env) + return env + + +def raw_env(**kwargs): + """Returns the environment in `AEC` format. + + Args: + **kwargs: keyword args to forward to create the `MOMultiwalker` environment. + + Returns: + A raw env. + """ + env = MOMultiwalker(**kwargs) + return env + + +class MOMultiwalker(MOAECEnv, pz_multiwalker): + """Environment for MO Multiwalker problem domain. + + The init method takes in environment arguments and should define the following attributes: + - possible_agents + - action_spaces + - observation_spaces + - reward_spaces + These attributes should not be changed after initialization. + """ + + metadata = { + "render_modes": ["human", "rgb_array"], + "name": "momultiwalker_v0", + "is_parallelizable": True, + "render_fps": FPS, + } + + @override + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.env = _env(*args, **kwargs) # override engine + # spaces + self.reward_spaces = dict(zip(self.agents, self.env.reward_space)) + + def reward_space(self, agent): + """Returns the reward space for the given agent.""" + return self.reward_spaces[agent] + + @override + def reset(self, seed=None, options=None): + super().reset(seed, options) # super + zero_reward = np.zeros( + self.reward_spaces[self.agents[0]].shape, dtype=np.float32 + ) # np.copy() makes different copies of this. + self._cumulative_rewards = dict(zip(self.agents, [zero_reward.copy() for _ in self.agents])) + self.rewards = dict(zip(self.agents, [zero_reward.copy() for _ in self.agents])) diff --git a/momadm_benchmarks/envs/multiwalker/multiwalker_base.py b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py new file mode 100644 index 00000000..7acc74cb --- /dev/null +++ b/momadm_benchmarks/envs/multiwalker/multiwalker_base.py @@ -0,0 +1,221 @@ +"""Adapted from the Multiwalker problem. + +From Gupta, J. K., Egorov, M., and Kochenderfer, M. (2017). Cooperative multi-agent control using +deep reinforcement learning. International Conference on Autonomous Agents and Multiagent Systems +""" + +from typing_extensions import override + +import numpy as np +from gymnasium import spaces +from pettingzoo.sisl.multiwalker.multiwalker_base import ( + FPS, + LEG_H, + SCALE, + TERRAIN_GRASS, + TERRAIN_HEIGHT, + TERRAIN_LENGTH, + TERRAIN_STARTPAD, + TERRAIN_STEP, + VIEWPORT_W, + WALKER_SEPERATION, +) +from pettingzoo.sisl.multiwalker.multiwalker_base import ( + BipedalWalker as pz_bipedalwalker, +) +from pettingzoo.sisl.multiwalker.multiwalker_base import ( + MultiWalkerEnv as pz_multiwalker_base, +) + + +class MOBipedalWalker(pz_bipedalwalker): + """Walker Object with the physics implemented.""" + + @override + def __init__( + self, + world, + forward_reward, + fall_reward, + terminate_reward, + init_x=TERRAIN_STEP * TERRAIN_STARTPAD / 2, + init_y=TERRAIN_HEIGHT + 2 * LEG_H, + n_walkers=2, + seed=None, + terrain_length=TERRAIN_LENGTH, + terrain_step=TERRAIN_STEP, + ): + super().__init__(world, init_x, init_y, n_walkers, seed) + self.forward_reward = forward_reward + self.fall_reward = fall_reward + self.terminate_reward = terminate_reward + self.terrain_length = terrain_length + self.terrain_step = terrain_step + + @property + def reward_space(self): + """Reward space shape = 3 element 1D array, each element representing 1 objective. + + 1. package moving forward. + 2. no walkers falling. + 3. package not falling. + """ + return spaces.Box( + low=np.array( + [-(self.terrain_step * self.forward_reward), self.fall_reward + self.terminate_reward, self.terminate_reward] + ), + high=np.array([self.terrain_step * self.forward_reward, 0, 0]), + shape=(3,), + dtype=np.float32, + ) + + +class MOMultiWalkerEnv(pz_multiwalker_base): + """Multiwalker problem domain environment engine. + + Deals with the simulation of the environment. + """ + + @override + def __init__( + self, + n_walkers=3, + position_noise=1e-3, + angle_noise=1e-3, + forward_reward=1.0, + terminate_reward=-100.0, + fall_reward=-10.0, + shared_reward=True, + terminate_on_fall=True, + remove_on_fall=True, + terrain_length=TERRAIN_LENGTH, + max_cycles=500, + render_mode=None, + ): + super().__init__( + n_walkers=n_walkers, + position_noise=position_noise, + angle_noise=angle_noise, + forward_reward=forward_reward, + terminate_reward=terminate_reward, + fall_reward=fall_reward, + shared_reward=shared_reward, + terminate_on_fall=terminate_on_fall, + remove_on_fall=remove_on_fall, + terrain_length=terrain_length, + max_cycles=max_cycles, + render_mode=render_mode, + ) + self.setup() + self.last_rewards = [np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)] + + @override + def setup(self): + """Continuation of the `__init__`.""" + super().setup() + init_y = TERRAIN_HEIGHT + 2 * LEG_H + self.walkers = [ + MOBipedalWalker( + self.world, + self.forward_reward, + self.fall_reward, + self.terminate_reward, + init_x=sx, + init_y=init_y, + seed=self.seed_val, + ) + for sx in self.start_x + ] + self.reward_space = [agent.reward_space for agent in self.walkers] + + @override + def reset(self): + obs = super().reset() + self.last_rewards = [np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)] + return obs + + @override + def step(self, action, agent_id, is_last): + # action is array of size 4 + action = action.reshape(4) + assert self.walkers[agent_id].hull is not None, agent_id + self.walkers[agent_id].apply_action(action) + if is_last: + self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) + rewards, done, mod_obs = self.scroll_subroutine() + self.last_obs = mod_obs + global_reward = np.mean(rewards, axis=0) # modified shared MO rewards + local_reward = rewards * self.local_ratio + self.last_rewards = global_reward * (1.0 - self.local_ratio) + local_reward * self.local_ratio + self.last_dones = done + self.frames = self.frames + 1 + + if self.render_mode == "human": + self.render() + + @override + def scroll_subroutine(self): + """This is the step engine of the environment. + + Here we have vectorized the reward math from the PettingZoo env to be multi-objective. + """ + xpos = np.zeros(self.n_walkers) + obs = [] + done = False + rewards = np.array([np.zeros(shape=(3,), dtype=np.float32) for _ in range(self.n_walkers)]) + + for i in range(self.n_walkers): + if self.walkers[i].hull is None: + obs.append(np.zeros_like(self.observation_space[i].low)) + continue + pos = self.walkers[i].hull.position + x, y = pos.x, pos.y + xpos[i] = x + + walker_obs = self.walkers[i].get_observation() + neighbor_obs = [] + for j in [i - 1, i + 1]: + # if no neighbor (for edge walkers) + if j < 0 or j == self.n_walkers or self.walkers[j].hull is None: + neighbor_obs.append(0.0) + neighbor_obs.append(0.0) + else: + xm = (self.walkers[j].hull.position.x - x) / self.package_length + ym = (self.walkers[j].hull.position.y - y) / self.package_length + neighbor_obs.append(self.np_random.normal(xm, self.position_noise)) + neighbor_obs.append(self.np_random.normal(ym, self.position_noise)) + xd = (self.package.position.x - x) / self.package_length + yd = (self.package.position.y - y) / self.package_length + neighbor_obs.append(self.np_random.normal(xd, self.position_noise)) + neighbor_obs.append(self.np_random.normal(yd, self.position_noise)) + neighbor_obs.append(self.np_random.normal(self.package.angle, self.angle_noise)) + obs.append(np.array(walker_obs + neighbor_obs)) + + # Below this point is the MO reward computation. Above this point is the original PZ code. + package_shaping = self.forward_reward * self.package.position.x + rewards[:, 0] = package_shaping - self.prev_package_shaping # obj1: move forward + self.prev_package_shaping = package_shaping + + self.scroll = xpos.mean() - VIEWPORT_W / SCALE / 5 - (self.n_walkers - 1) * WALKER_SEPERATION * TERRAIN_STEP + + done = [False] * self.n_walkers + for i, (fallen, walker) in enumerate(zip(self.fallen_walkers, self.walkers)): + if fallen: # obj2: agent does not fall + rewards[i, 1] = self.fall_reward # not all, only the one that fell + if self.remove_on_fall: + walker._destroy() + if self.terminate_on_fall: + rewards[:, 1] += self.terminate_reward + done[i] = True + + if self.terminate_on_fall and np.sum(self.fallen_walkers) > 0: # terminate_on_fall global termination + done = [True] * self.n_walkers + + if self.game_over or self.package.position.x < 0: # obj3: package doesn't fall + done = [True] * self.n_walkers + rewards[:, 2] = self.terminate_reward + + elif self.package.position.x > (self.terrain_length - TERRAIN_GRASS) * TERRAIN_STEP: + done = [True] * self.n_walkers + + return rewards, done, obs diff --git a/momadm_benchmarks/test/api_test.py b/momadm_benchmarks/test/api_test.py index 434a2457..06cbf0d3 100644 --- a/momadm_benchmarks/test/api_test.py +++ b/momadm_benchmarks/test/api_test.py @@ -171,7 +171,7 @@ def play_test(env, observation_0, num_cycles): } for agent in env.agent_iter(env.num_agents * num_cycles): generated_agents.add(agent) - assert agent not in has_finished, "agents cannot resurect! Generate a new agent with a new name." + assert agent not in has_finished, "agents cannot resurrect! Generate a new agent with a new name." assert isinstance(env.infos[agent], dict), "an environment agent's info must be a dictionary" prev_observe, reward, terminated, truncated, info = env.last() if terminated or truncated: diff --git a/momadm_benchmarks/utils/conversions.py b/momadm_benchmarks/utils/conversions.py index 38ca1595..707b8dee 100644 --- a/momadm_benchmarks/utils/conversions.py +++ b/momadm_benchmarks/utils/conversions.py @@ -61,7 +61,7 @@ def reward_spaces(self): @override def reward_space(self, agent): - return self.aec_env.reward_spaces(agent) + return self.aec_env.reward_spaces[agent] @override def step(self, actions): diff --git a/tests/all_modules.py b/tests/all_modules.py index 89a060a2..63aa02ab 100644 --- a/tests/all_modules.py +++ b/tests/all_modules.py @@ -1,6 +1,8 @@ from momadm_benchmarks.envs.beach_domain import mobeach_v0 +from momadm_benchmarks.envs.multiwalker import momultiwalker_v0 all_environments = { "mobeach_v0": mobeach_v0, + "momultiwalker_v0": momultiwalker_v0, }