diff --git a/docs/_static/videos/mo-ant.gif b/docs/_static/videos/mo-ant.gif
new file mode 100644
index 00000000..9397b4ff
Binary files /dev/null and b/docs/_static/videos/mo-ant.gif differ
diff --git a/docs/_static/videos/mo-humanoid.gif b/docs/_static/videos/mo-humanoid.gif
new file mode 100644
index 00000000..625a40f8
Binary files /dev/null and b/docs/_static/videos/mo-humanoid.gif differ
diff --git a/docs/_static/videos/mo-swimmer.gif b/docs/_static/videos/mo-swimmer.gif
new file mode 100644
index 00000000..f1dffd63
Binary files /dev/null and b/docs/_static/videos/mo-swimmer.gif differ
diff --git a/docs/_static/videos/mo-walker2d.gif b/docs/_static/videos/mo-walker2d.gif
new file mode 100644
index 00000000..6a2a2e1e
Binary files /dev/null and b/docs/_static/videos/mo-walker2d.gif differ
diff --git a/docs/environments/mujoco.md b/docs/environments/mujoco.md
index aa0d3154..70272d24 100644
--- a/docs/environments/mujoco.md
+++ b/docs/environments/mujoco.md
@@ -11,6 +11,10 @@ Multi-objective versions of Mujoco environments.
| [`mo-reacher-v4`](https://mo-gymnasium.farama.org/environments/mo-reacher/)
| Continuous / Discrete | `[target_1, target_2, target_3, target_4]` | Mujoco version of `mo-reacher-v0`, based on `Reacher-v4` [environment](https://gymnasium.farama.org/environments/mujoco/reacher/). |
| [`mo-hopper-v4`](https://mo-gymnasium.farama.org/environments/mo-hopper/)
| Continuous / Continuous | `[velocity, height, energy]` | Multi-objective version of [Hopper-v4](https://gymnasium.farama.org/environments/mujoco/hopper/) env. |
| [`mo-halfcheetah-v4`](https://mo-gymnasium.farama.org/environments/mo-halfcheetah/)
| Continuous / Continuous | `[velocity, energy]` | Multi-objective version of [HalfCheetah-v4](https://gymnasium.farama.org/environments/mujoco/half_cheetah/) env. Similar to [Xu et al. 2020](https://github.com/mit-gfx/PGMORL). |
+| [`mo-walker2d-v4`](https://mo-gymnasium.farama.org/environments/mo-walker2d/)
| Continuous / Continuous | `[velocity, energy]` | Multi-objective version of [Walker2d-v4](https://gymnasium.farama.org/environments/mujoco/walker2d/) env. |
+| [`mo-ant-v4`](https://mo-gymnasium.farama.org/environments/mo-ant/)
| Continuous / Continuous | `[x_velocity, y_velocity, energy]` | Multi-objective version of [Ant-v4](https://gymnasium.farama.org/environments/mujoco/ant/) env. |
+| [`mo-swimmer-v4`](https://mo-gymnasium.farama.org/environments/mo-swimmer/)
| Continuous / Continuous | `[velocity, energy]` | Multi-objective version of [Swimmer-v4](https://gymnasium.farama.org/environments/mujoco/swimmer/) env. |
+| [`mo-humanoid-v4`](https://mo-gymnasium.farama.org/environments/mo-humanoid/)
| Continuous / Continuous | `[velocity, energy]` | Multi-objective version of [Humonoid-v4](https://gymnasium.farama.org/environments/mujoco/humanoid/) env. |
```{toctree}
@@ -21,5 +25,8 @@ Multi-objective versions of Mujoco environments.
./mo-reacher
./mo-hopper
./mo-halfcheetah
-
+./mo-walker2d
+./mo-ant
+./mo-swimmer
+./mo-humanoid
```
diff --git a/mo_gymnasium/envs/mujoco/__init__.py b/mo_gymnasium/envs/mujoco/__init__.py
index 442feae1..12b77130 100644
--- a/mo_gymnasium/envs/mujoco/__init__.py
+++ b/mo_gymnasium/envs/mujoco/__init__.py
@@ -20,6 +20,37 @@
kwargs={"cost_objective": False},
)
+register(
+ id="mo-walker2d-v4",
+ entry_point="mo_gymnasium.envs.mujoco.walker2d:MOWalker2dEnv",
+ max_episode_steps=1000,
+)
+
+register(
+ id="mo-ant-v4",
+ entry_point="mo_gymnasium.envs.mujoco.ant:MOAntEnv",
+ max_episode_steps=1000,
+)
+
+register(
+ id="mo-ant-2d-v4",
+ entry_point="mo_gymnasium.envs.mujoco.ant:MOAntEnv",
+ max_episode_steps=1000,
+ kwargs={"cost_objective": False},
+)
+
+register(
+ id="mo-swimmer-v4",
+ entry_point="mo_gymnasium.envs.mujoco.swimmer:MOSwimmerEnv",
+ max_episode_steps=1000,
+)
+
+register(
+ id="mo-humanoid-v4",
+ entry_point="mo_gymnasium.envs.mujoco.humanoid:MOHumanoidEnv",
+ max_episode_steps=1000,
+)
+
register(
id="mo-reacher-v4",
entry_point="mo_gymnasium.envs.mujoco.reacher:MOReacherEnv",
diff --git a/mo_gymnasium/envs/mujoco/ant.py b/mo_gymnasium/envs/mujoco/ant.py
new file mode 100644
index 00000000..637edeb2
--- /dev/null
+++ b/mo_gymnasium/envs/mujoco/ant.py
@@ -0,0 +1,46 @@
+import numpy as np
+from gymnasium.envs.mujoco.ant_v4 import AntEnv
+from gymnasium.spaces import Box
+from gymnasium.utils import EzPickle
+
+
+class MOAntEnv(AntEnv, EzPickle):
+ """
+ ## Description
+ Multi-objective version of the AntEnv environment.
+
+ See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/ant/) for more information.
+
+ ## Reward Space
+ The reward is 2- or 3-dimensional:
+ - 0: x-velocity
+ - 1: y-velocity
+ - 2: Control cost of the action
+ If the cost_objective flag is set to False, the reward is 2-dimensional, and the cost is added to other objectives.
+ A healthy reward is added to all objectives.
+ """
+
+ def __init__(self, cost_objective=True, **kwargs):
+ super().__init__(**kwargs)
+ EzPickle.__init__(self, cost_objective, **kwargs)
+ self.cost_objetive = cost_objective
+ self.reward_dim = 3 if cost_objective else 2
+ self.reward_space = Box(low=-np.inf, high=np.inf, shape=(self.reward_dim,))
+
+ def step(self, action):
+ observation, reward, terminated, truncated, info = super().step(action)
+ x_velocity = info["x_velocity"]
+ y_velocity = info["y_velocity"]
+ cost = info["reward_ctrl"]
+ healthy_reward = info["reward_survive"]
+
+ if self.cost_objetive:
+ cost /= self._ctrl_cost_weight # Ignore the weight in the original AntEnv
+ vec_reward = np.array([x_velocity, y_velocity, cost], dtype=np.float32)
+ else:
+ vec_reward = np.array([x_velocity, y_velocity], dtype=np.float32)
+ vec_reward += cost
+
+ vec_reward += healthy_reward
+
+ return observation, vec_reward, terminated, truncated, info
diff --git a/mo_gymnasium/envs/mujoco/humanoid.py b/mo_gymnasium/envs/mujoco/humanoid.py
new file mode 100644
index 00000000..12518cd8
--- /dev/null
+++ b/mo_gymnasium/envs/mujoco/humanoid.py
@@ -0,0 +1,34 @@
+import numpy as np
+from gymnasium.envs.mujoco.humanoid_v4 import HumanoidEnv
+from gymnasium.spaces import Box
+from gymnasium.utils import EzPickle
+
+
+class MOHumanoidEnv(HumanoidEnv, EzPickle):
+ """
+ ## Description
+ Multi-objective version of the HumanoidEnv environment.
+
+ See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/humanoid/) for more information.
+
+ ## Reward Space
+ The reward is 2-dimensional:
+ - 0: Reward for running forward (x-velocity)
+ - 1: Control cost of the action
+ """
+
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
+ EzPickle.__init__(self, **kwargs)
+ self.reward_space = Box(low=-np.inf, high=np.inf, shape=(2,))
+ self.reward_dim = 2
+
+ def step(self, action):
+ observation, reward, terminated, truncated, info = super().step(action)
+ velocity = info["x_velocity"]
+ negative_cost = 10 * info["reward_quadctrl"]
+ vec_reward = np.array([velocity, negative_cost], dtype=np.float32)
+
+ vec_reward += self.healthy_reward # All objectives are penalyzed when the agent falls
+
+ return observation, vec_reward, terminated, truncated, info
diff --git a/mo_gymnasium/envs/mujoco/swimmer.py b/mo_gymnasium/envs/mujoco/swimmer.py
new file mode 100644
index 00000000..a3c5082b
--- /dev/null
+++ b/mo_gymnasium/envs/mujoco/swimmer.py
@@ -0,0 +1,33 @@
+import numpy as np
+from gymnasium.envs.mujoco.swimmer_v4 import SwimmerEnv
+from gymnasium.spaces import Box
+from gymnasium.utils import EzPickle
+
+
+class MOSwimmerEnv(SwimmerEnv, EzPickle):
+ """
+ ## Description
+ Multi-objective version of the SwimmerEnv environment.
+
+ See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/swimmer/) for more information.
+
+ ## Reward Space
+ The reward is 2-dimensional:
+ - 0: Reward for moving forward (x-velocity)
+ - 1: Control cost of the action
+ """
+
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
+ EzPickle.__init__(self, **kwargs)
+ self.reward_space = Box(low=-np.inf, high=np.inf, shape=(2,))
+ self.reward_dim = 2
+
+ def step(self, action):
+ observation, reward, terminated, truncated, info = super().step(action)
+ velocity = info["x_velocity"]
+ energy = -np.sum(np.square(action))
+
+ vec_reward = np.array([velocity, energy], dtype=np.float32)
+
+ return observation, vec_reward, terminated, truncated, info
diff --git a/mo_gymnasium/envs/mujoco/walker2d.py b/mo_gymnasium/envs/mujoco/walker2d.py
new file mode 100644
index 00000000..e3806810
--- /dev/null
+++ b/mo_gymnasium/envs/mujoco/walker2d.py
@@ -0,0 +1,35 @@
+import numpy as np
+from gymnasium.envs.mujoco.walker2d_v4 import Walker2dEnv
+from gymnasium.spaces import Box
+from gymnasium.utils import EzPickle
+
+
+class MOWalker2dEnv(Walker2dEnv, EzPickle):
+ """
+ ## Description
+ Multi-objective version of the Walker2dEnv environment.
+
+ See [Gymnasium's env](https://gymnasium.farama.org/environments/mujoco/walker2d/) for more information.
+
+ ## Reward Space
+ The reward is 2-dimensional:
+ - 0: Reward for running forward (x-velocity)
+ - 1: Control cost of the action
+ """
+
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
+ EzPickle.__init__(self, **kwargs)
+ self.reward_space = Box(low=-np.inf, high=np.inf, shape=(2,))
+ self.reward_dim = 2
+
+ def step(self, action):
+ observation, reward, terminated, truncated, info = super().step(action)
+ velocity = info["x_velocity"]
+ energy = -np.sum(np.square(action))
+
+ vec_reward = np.array([velocity, energy], dtype=np.float32)
+
+ vec_reward += self.healthy_reward # All objectives are penalyzed when the agent falls
+
+ return observation, vec_reward, terminated, truncated, info