diff --git a/mo_gymnasium/envs/mujoco/__init__.py b/mo_gymnasium/envs/mujoco/__init__.py index f5d5639e..92245429 100644 --- a/mo_gymnasium/envs/mujoco/__init__.py +++ b/mo_gymnasium/envs/mujoco/__init__.py @@ -33,7 +33,7 @@ ) register( - id="mo-hopper-2d-v5", + id="mo-hopper-2obj-v5", entry_point="mo_gymnasium.envs.mujoco.hopper_v5:MOHopperEnv", max_episode_steps=1000, kwargs={"cost_objective": False}, @@ -41,7 +41,7 @@ register( id="mo-walker2d-v4", - entry_point="mo_gymnasium.envs.mujoco.walker2d:MOWalker2dEnv", + entry_point="mo_gymnasium.envs.mujoco.walker2d_v4:MOWalker2dEnv", max_episode_steps=1000, ) @@ -53,13 +53,13 @@ register( id="mo-ant-v4", - entry_point="mo_gymnasium.envs.mujoco.ant:MOAntEnv", + entry_point="mo_gymnasium.envs.mujoco.ant_v4:MOAntEnv", max_episode_steps=1000, ) register( id="mo-ant-2d-v4", - entry_point="mo_gymnasium.envs.mujoco.ant:MOAntEnv", + entry_point="mo_gymnasium.envs.mujoco.ant_v4:MOAntEnv", max_episode_steps=1000, kwargs={"cost_objective": False}, ) @@ -72,7 +72,7 @@ ) register( - id="mo-ant-2d-v5", + id="mo-ant-2obj-v5", entry_point="mo_gymnasium.envs.mujoco.ant_v5:MOAntEnv", max_episode_steps=1000, kwargs={"cost_objective": False}, @@ -80,7 +80,7 @@ register( id="mo-swimmer-v4", - entry_point="mo_gymnasium.envs.mujoco.swimmer:MOSwimmerEnv", + entry_point="mo_gymnasium.envs.mujoco.swimmer_v4:MOSwimmerEnv", max_episode_steps=1000, ) @@ -92,7 +92,7 @@ register( id="mo-humanoid-v4", - entry_point="mo_gymnasium.envs.mujoco.humanoid:MOHumanoidEnv", + entry_point="mo_gymnasium.envs.mujoco.humanoid_v4:MOHumanoidEnv", max_episode_steps=1000, ) diff --git a/mo_gymnasium/envs/mujoco/ant.py b/mo_gymnasium/envs/mujoco/ant_v4.py similarity index 100% rename from mo_gymnasium/envs/mujoco/ant.py rename to mo_gymnasium/envs/mujoco/ant_v4.py diff --git a/mo_gymnasium/envs/mujoco/humanoid.py b/mo_gymnasium/envs/mujoco/humanoid_v4.py similarity index 100% rename from mo_gymnasium/envs/mujoco/humanoid.py rename to mo_gymnasium/envs/mujoco/humanoid_v4.py diff --git a/mo_gymnasium/envs/mujoco/humanoid_v5.py b/mo_gymnasium/envs/mujoco/humanoid_v5.py index d5df5200..914a0937 100644 --- a/mo_gymnasium/envs/mujoco/humanoid_v5.py +++ b/mo_gymnasium/envs/mujoco/humanoid_v5.py @@ -18,6 +18,7 @@ class MOHumanoidEnv(HumanoidEnv, EzPickle): ## Version History: - v5: Now includes contact forces. See: https://gymnasium.farama.org/environments/mujoco/humanoid/#version-history + The scales of the control cost has changed from v4. """ def __init__(self, **kwargs): @@ -29,8 +30,8 @@ def __init__(self, **kwargs): def step(self, action): observation, reward, terminated, truncated, info = super().step(action) velocity = info["x_velocity"] - negative_cost = info["reward_ctrl"] / self._ctrl_cost_weight # Revert the scale applied in the original environment - vec_reward = np.array([velocity, negative_cost], dtype=np.float32) + neg_energy_cost = info["reward_ctrl"] / self._ctrl_cost_weight # Revert the scale applied in the original environment + vec_reward = np.array([velocity, neg_energy_cost], dtype=np.float32) vec_reward += self.healthy_reward # All objectives are penalyzed when the agent falls vec_reward += info["reward_contact"] # Do not treat contact forces as a separate objective diff --git a/mo_gymnasium/envs/mujoco/swimmer.py b/mo_gymnasium/envs/mujoco/swimmer_v4.py similarity index 100% rename from mo_gymnasium/envs/mujoco/swimmer.py rename to mo_gymnasium/envs/mujoco/swimmer_v4.py diff --git a/mo_gymnasium/envs/mujoco/swimmer_v5.py b/mo_gymnasium/envs/mujoco/swimmer_v5.py index 11a160ca..38d33644 100644 --- a/mo_gymnasium/envs/mujoco/swimmer_v5.py +++ b/mo_gymnasium/envs/mujoco/swimmer_v5.py @@ -34,8 +34,8 @@ def __init__(self, **kwargs): def step(self, action): observation, reward, terminated, truncated, info = super().step(action) velocity = info["x_velocity"] - energy = -np.sum(np.square(action)) + neg_energy_cost = info["reward_ctrl"] / self._ctrl_cost_weight # Revert the scale applied in the original environment - vec_reward = np.array([velocity, energy], dtype=np.float32) + vec_reward = np.array([velocity, neg_energy_cost], dtype=np.float32) return observation, vec_reward, terminated, truncated, info diff --git a/mo_gymnasium/envs/mujoco/walker2d.py b/mo_gymnasium/envs/mujoco/walker2d_v4.py similarity index 87% rename from mo_gymnasium/envs/mujoco/walker2d.py rename to mo_gymnasium/envs/mujoco/walker2d_v4.py index e3806810..efa87bf6 100644 --- a/mo_gymnasium/envs/mujoco/walker2d.py +++ b/mo_gymnasium/envs/mujoco/walker2d_v4.py @@ -26,9 +26,9 @@ def __init__(self, **kwargs): def step(self, action): observation, reward, terminated, truncated, info = super().step(action) velocity = info["x_velocity"] - energy = -np.sum(np.square(action)) + neg_energy_cost = info["reward_ctrl"] / self._ctrl_cost_weight - vec_reward = np.array([velocity, energy], dtype=np.float32) + vec_reward = np.array([velocity, neg_energy_cost], dtype=np.float32) vec_reward += self.healthy_reward # All objectives are penalyzed when the agent falls