Env ids and variable names refactor

Farama-Foundation · Oct 27, 2024 · 311f378 · 311f378
1 parent 5188672
commit 311f378
Show file tree

Hide file tree

Showing 7 changed files with 14 additions and 13 deletions.
diff --git a/mo_gymnasium/envs/mujoco/__init__.py b/mo_gymnasium/envs/mujoco/__init__.py
@@ -33,15 +33,15 @@
 )
 
 register(
-    id="mo-hopper-2d-v5",
+    id="mo-hopper-2obj-v5",
     entry_point="mo_gymnasium.envs.mujoco.hopper_v5:MOHopperEnv",
     max_episode_steps=1000,
     kwargs={"cost_objective": False},
 )
 
 register(
     id="mo-walker2d-v4",
-    entry_point="mo_gymnasium.envs.mujoco.walker2d:MOWalker2dEnv",
+    entry_point="mo_gymnasium.envs.mujoco.walker2d_v4:MOWalker2dEnv",
     max_episode_steps=1000,
 )
 
@@ -53,13 +53,13 @@
 
 register(
     id="mo-ant-v4",
-    entry_point="mo_gymnasium.envs.mujoco.ant:MOAntEnv",
+    entry_point="mo_gymnasium.envs.mujoco.ant_v4:MOAntEnv",
     max_episode_steps=1000,
 )
 
 register(
     id="mo-ant-2d-v4",
-    entry_point="mo_gymnasium.envs.mujoco.ant:MOAntEnv",
+    entry_point="mo_gymnasium.envs.mujoco.ant_v4:MOAntEnv",
     max_episode_steps=1000,
     kwargs={"cost_objective": False},
 )
@@ -72,15 +72,15 @@
 )
 
 register(
-    id="mo-ant-2d-v5",
+    id="mo-ant-2obj-v5",
     entry_point="mo_gymnasium.envs.mujoco.ant_v5:MOAntEnv",
     max_episode_steps=1000,
     kwargs={"cost_objective": False},
 )
 
 register(
     id="mo-swimmer-v4",
-    entry_point="mo_gymnasium.envs.mujoco.swimmer:MOSwimmerEnv",
+    entry_point="mo_gymnasium.envs.mujoco.swimmer_v4:MOSwimmerEnv",
     max_episode_steps=1000,
 )
 
@@ -92,7 +92,7 @@
 
 register(
     id="mo-humanoid-v4",
-    entry_point="mo_gymnasium.envs.mujoco.humanoid:MOHumanoidEnv",
+    entry_point="mo_gymnasium.envs.mujoco.humanoid_v4:MOHumanoidEnv",
     max_episode_steps=1000,
 )
 

diff --git a/mo_gymnasium/envs/mujoco/ant.py → mo_gymnasium/envs/mujoco/ant_v4.py b/mo_gymnasium/envs/mujoco/ant.py → mo_gymnasium/envs/mujoco/ant_v4.py
diff --git a/mo_gymnasium/envs/mujoco/humanoid.py → mo_gymnasium/envs/mujoco/humanoid_v4.py b/mo_gymnasium/envs/mujoco/humanoid.py → mo_gymnasium/envs/mujoco/humanoid_v4.py
diff --git a/mo_gymnasium/envs/mujoco/humanoid_v5.py b/mo_gymnasium/envs/mujoco/humanoid_v5.py
@@ -18,6 +18,7 @@ class MOHumanoidEnv(HumanoidEnv, EzPickle):
 
     ## Version History:
     - v5: Now includes contact forces. See: https://gymnasium.farama.org/environments/mujoco/humanoid/#version-history
+          The scales of the control cost has changed from v4.
     """
 
     def __init__(self, **kwargs):
@@ -29,8 +30,8 @@ def __init__(self, **kwargs):
     def step(self, action):
         observation, reward, terminated, truncated, info = super().step(action)
         velocity = info["x_velocity"]
-        negative_cost = info["reward_ctrl"] / self._ctrl_cost_weight  # Revert the scale applied in the original environment
-        vec_reward = np.array([velocity, negative_cost], dtype=np.float32)
+        neg_energy_cost = info["reward_ctrl"] / self._ctrl_cost_weight  # Revert the scale applied in the original environment
+        vec_reward = np.array([velocity, neg_energy_cost], dtype=np.float32)
 
         vec_reward += self.healthy_reward  # All objectives are penalyzed when the agent falls
         vec_reward += info["reward_contact"]  # Do not treat contact forces as a separate objective

diff --git a/mo_gymnasium/envs/mujoco/swimmer.py → mo_gymnasium/envs/mujoco/swimmer_v4.py b/mo_gymnasium/envs/mujoco/swimmer.py → mo_gymnasium/envs/mujoco/swimmer_v4.py
diff --git a/mo_gymnasium/envs/mujoco/swimmer_v5.py b/mo_gymnasium/envs/mujoco/swimmer_v5.py
@@ -34,8 +34,8 @@ def __init__(self, **kwargs):
     def step(self, action):
         observation, reward, terminated, truncated, info = super().step(action)
         velocity = info["x_velocity"]
-        energy = -np.sum(np.square(action))
+        neg_energy_cost = info["reward_ctrl"] / self._ctrl_cost_weight  # Revert the scale applied in the original environment
 
-        vec_reward = np.array([velocity, energy], dtype=np.float32)
+        vec_reward = np.array([velocity, neg_energy_cost], dtype=np.float32)
 
         return observation, vec_reward, terminated, truncated, info
diff --git a/mo_gymnasium/envs/mujoco/walker2d.py → mo_gymnasium/envs/mujoco/walker2d_v4.py b/mo_gymnasium/envs/mujoco/walker2d.py → mo_gymnasium/envs/mujoco/walker2d_v4.py
@@ -26,9 +26,9 @@ def __init__(self, **kwargs):
     def step(self, action):
         observation, reward, terminated, truncated, info = super().step(action)
         velocity = info["x_velocity"]
-        energy = -np.sum(np.square(action))
+        neg_energy_cost = info["reward_ctrl"] / self._ctrl_cost_weight
 
-        vec_reward = np.array([velocity, energy], dtype=np.float32)
+        vec_reward = np.array([velocity, neg_energy_cost], dtype=np.float32)
 
         vec_reward += self.healthy_reward  # All objectives are penalyzed when the agent falls