diff --git a/docs/_static/videos/mo-ant.gif b/docs/_static/videos/mo-ant.gif new file mode 100644 index 00000000..9397b4ff Binary files /dev/null and b/docs/_static/videos/mo-ant.gif differ diff --git a/docs/_static/videos/mo-humanoid.gif b/docs/_static/videos/mo-humanoid.gif new file mode 100644 index 00000000..625a40f8 Binary files /dev/null and b/docs/_static/videos/mo-humanoid.gif differ diff --git a/docs/_static/videos/mo-swimmer.gif b/docs/_static/videos/mo-swimmer.gif new file mode 100644 index 00000000..f1dffd63 Binary files /dev/null and b/docs/_static/videos/mo-swimmer.gif differ diff --git a/docs/_static/videos/mo-walker2d.gif b/docs/_static/videos/mo-walker2d.gif new file mode 100644 index 00000000..6a2a2e1e Binary files /dev/null and b/docs/_static/videos/mo-walker2d.gif differ diff --git a/docs/environments/mujoco.md b/docs/environments/mujoco.md index aa0d3154..70272d24 100644 --- a/docs/environments/mujoco.md +++ b/docs/environments/mujoco.md @@ -11,6 +11,10 @@ Multi-objective versions of Mujoco environments. | [`mo-reacher-v4`](https://mo-gymnasium.farama.org/environments/mo-reacher/)
| Continuous / Discrete | `[target_1, target_2, target_3, target_4]` | Mujoco version of `mo-reacher-v0`, based on `Reacher-v4` [environment](https://gymnasium.farama.org/environments/mujoco/reacher/). | | [`mo-hopper-v4`](https://mo-gymnasium.farama.org/environments/mo-hopper/)
| Continuous / Continuous | `[velocity, height, energy]` | Multi-objective version of [Hopper-v4](https://gymnasium.farama.org/environments/mujoco/hopper/) env. | | [`mo-halfcheetah-v4`](https://mo-gymnasium.farama.org/environments/mo-halfcheetah/)
| Continuous / Continuous | `[velocity, energy]` | Multi-objective version of [HalfCheetah-v4](https://gymnasium.farama.org/environments/mujoco/half_cheetah/) env. Similar to [Xu et al. 2020](https://github.com/mit-gfx/PGMORL). | +| [`mo-walker2d-v4`](https://mo-gymnasium.farama.org/environments/mo-walker2d/)
| Continuous / Continuous | `[velocity, energy]` | Multi-objective version of [Walker2d-v4](https://gymnasium.farama.org/environments/mujoco/walker2d/) env. | +| [`mo-ant-v4`](https://mo-gymnasium.farama.org/environments/mo-ant/)
| Continuous / Continuous | `[x_velocity, y_velocity, energy]` | Multi-objective version of [Ant-v4](https://gymnasium.farama.org/environments/mujoco/ant/) env. | +| [`mo-swimmer-v4`](https://mo-gymnasium.farama.org/environments/mo-swimmer/)
| Continuous / Continuous | `[velocity, energy]` | Multi-objective version of [Swimmer-v4](https://gymnasium.farama.org/environments/mujoco/swimmer/) env. | +| [`mo-humanoid-v4`](https://mo-gymnasium.farama.org/environments/mo-humanoid/)
| Continuous / Continuous | `[velocity, energy]` | Multi-objective version of [Humonoid-v4](https://gymnasium.farama.org/environments/mujoco/humanoid/) env. | ```{toctree} @@ -21,5 +25,8 @@ Multi-objective versions of Mujoco environments. ./mo-reacher ./mo-hopper ./mo-halfcheetah - +./mo-walker2d +./mo-ant +./mo-swimmer +./mo-humanoid ``` diff --git a/mo_gymnasium/envs/mujoco/__init__.py b/mo_gymnasium/envs/mujoco/__init__.py index 442feae1..12b77130 100644 --- a/mo_gymnasium/envs/mujoco/__init__.py +++ b/mo_gymnasium/envs/mujoco/__init__.py @@ -20,6 +20,37 @@ kwargs={"cost_objective": False}, ) +register( + id="mo-walker2d-v4", + entry_point="mo_gymnasium.envs.mujoco.walker2d:MOWalker2dEnv", + max_episode_steps=1000, +) + +register( + id="mo-ant-v4", + entry_point="mo_gymnasium.envs.mujoco.ant:MOAntEnv", + max_episode_steps=1000, +) + +register( + id="mo-ant-2d-v4", + entry_point="mo_gymnasium.envs.mujoco.ant:MOAntEnv", + max_episode_steps=1000, + kwargs={"cost_objective": False}, +) + +register( + id="mo-swimmer-v4", + entry_point="mo_gymnasium.envs.mujoco.swimmer:MOSwimmerEnv", + max_episode_steps=1000, +) + +register( + id="mo-humanoid-v4", + entry_point="mo_gymnasium.envs.mujoco.humanoid:MOHumanoidEnv", + max_episode_steps=1000, +) + register( id="mo-reacher-v4", entry_point="mo_gymnasium.envs.mujoco.reacher:MOReacherEnv", diff --git a/mo_gymnasium/envs/mujoco/ant.py b/mo_gymnasium/envs/mujoco/ant.py new file mode 100644 index 00000000..637edeb2 --- /dev/null +++ b/mo_gymnasium/envs/mujoco/ant.py @@ -0,0 +1,46 @@ +import numpy as np +from gymnasium.envs.mujoco.ant_v4 import AntEnv +from gymnasium.spaces import Box +from gymnasium.utils import EzPickle + + +class MOAntEnv(AntEnv, EzPickle): + """ + ## Description + Multi-objective version of the AntEnv environment. + + See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/ant/) for more information. + + ## Reward Space + The reward is 2- or 3-dimensional: + - 0: x-velocity + - 1: y-velocity + - 2: Control cost of the action + If the cost_objective flag is set to False, the reward is 2-dimensional, and the cost is added to other objectives. + A healthy reward is added to all objectives. + """ + + def __init__(self, cost_objective=True, **kwargs): + super().__init__(**kwargs) + EzPickle.__init__(self, cost_objective, **kwargs) + self.cost_objetive = cost_objective + self.reward_dim = 3 if cost_objective else 2 + self.reward_space = Box(low=-np.inf, high=np.inf, shape=(self.reward_dim,)) + + def step(self, action): + observation, reward, terminated, truncated, info = super().step(action) + x_velocity = info["x_velocity"] + y_velocity = info["y_velocity"] + cost = info["reward_ctrl"] + healthy_reward = info["reward_survive"] + + if self.cost_objetive: + cost /= self._ctrl_cost_weight # Ignore the weight in the original AntEnv + vec_reward = np.array([x_velocity, y_velocity, cost], dtype=np.float32) + else: + vec_reward = np.array([x_velocity, y_velocity], dtype=np.float32) + vec_reward += cost + + vec_reward += healthy_reward + + return observation, vec_reward, terminated, truncated, info diff --git a/mo_gymnasium/envs/mujoco/humanoid.py b/mo_gymnasium/envs/mujoco/humanoid.py new file mode 100644 index 00000000..12518cd8 --- /dev/null +++ b/mo_gymnasium/envs/mujoco/humanoid.py @@ -0,0 +1,34 @@ +import numpy as np +from gymnasium.envs.mujoco.humanoid_v4 import HumanoidEnv +from gymnasium.spaces import Box +from gymnasium.utils import EzPickle + + +class MOHumanoidEnv(HumanoidEnv, EzPickle): + """ + ## Description + Multi-objective version of the HumanoidEnv environment. + + See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/humanoid/) for more information. + + ## Reward Space + The reward is 2-dimensional: + - 0: Reward for running forward (x-velocity) + - 1: Control cost of the action + """ + + def __init__(self, **kwargs): + super().__init__(**kwargs) + EzPickle.__init__(self, **kwargs) + self.reward_space = Box(low=-np.inf, high=np.inf, shape=(2,)) + self.reward_dim = 2 + + def step(self, action): + observation, reward, terminated, truncated, info = super().step(action) + velocity = info["x_velocity"] + negative_cost = 10 * info["reward_quadctrl"] + vec_reward = np.array([velocity, negative_cost], dtype=np.float32) + + vec_reward += self.healthy_reward # All objectives are penalyzed when the agent falls + + return observation, vec_reward, terminated, truncated, info diff --git a/mo_gymnasium/envs/mujoco/swimmer.py b/mo_gymnasium/envs/mujoco/swimmer.py new file mode 100644 index 00000000..a3c5082b --- /dev/null +++ b/mo_gymnasium/envs/mujoco/swimmer.py @@ -0,0 +1,33 @@ +import numpy as np +from gymnasium.envs.mujoco.swimmer_v4 import SwimmerEnv +from gymnasium.spaces import Box +from gymnasium.utils import EzPickle + + +class MOSwimmerEnv(SwimmerEnv, EzPickle): + """ + ## Description + Multi-objective version of the SwimmerEnv environment. + + See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/swimmer/) for more information. + + ## Reward Space + The reward is 2-dimensional: + - 0: Reward for moving forward (x-velocity) + - 1: Control cost of the action + """ + + def __init__(self, **kwargs): + super().__init__(**kwargs) + EzPickle.__init__(self, **kwargs) + self.reward_space = Box(low=-np.inf, high=np.inf, shape=(2,)) + self.reward_dim = 2 + + def step(self, action): + observation, reward, terminated, truncated, info = super().step(action) + velocity = info["x_velocity"] + energy = -np.sum(np.square(action)) + + vec_reward = np.array([velocity, energy], dtype=np.float32) + + return observation, vec_reward, terminated, truncated, info diff --git a/mo_gymnasium/envs/mujoco/walker2d.py b/mo_gymnasium/envs/mujoco/walker2d.py new file mode 100644 index 00000000..e3806810 --- /dev/null +++ b/mo_gymnasium/envs/mujoco/walker2d.py @@ -0,0 +1,35 @@ +import numpy as np +from gymnasium.envs.mujoco.walker2d_v4 import Walker2dEnv +from gymnasium.spaces import Box +from gymnasium.utils import EzPickle + + +class MOWalker2dEnv(Walker2dEnv, EzPickle): + """ + ## Description + Multi-objective version of the Walker2dEnv environment. + + See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/walker2d/) for more information. + + ## Reward Space + The reward is 2-dimensional: + - 0: Reward for running forward (x-velocity) + - 1: Control cost of the action + """ + + def __init__(self, **kwargs): + super().__init__(**kwargs) + EzPickle.__init__(self, **kwargs) + self.reward_space = Box(low=-np.inf, high=np.inf, shape=(2,)) + self.reward_dim = 2 + + def step(self, action): + observation, reward, terminated, truncated, info = super().step(action) + velocity = info["x_velocity"] + energy = -np.sum(np.square(action)) + + vec_reward = np.array([velocity, energy], dtype=np.float32) + + vec_reward += self.healthy_reward # All objectives are penalyzed when the agent falls + + return observation, vec_reward, terminated, truncated, info