Skip to content

Commit

Permalink
Env ids and variable names refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
LucasAlegre committed Oct 27, 2024
1 parent 5188672 commit 311f378
Show file tree
Hide file tree
Showing 7 changed files with 14 additions and 13 deletions.
14 changes: 7 additions & 7 deletions mo_gymnasium/envs/mujoco/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@
)

register(
id="mo-hopper-2d-v5",
id="mo-hopper-2obj-v5",
entry_point="mo_gymnasium.envs.mujoco.hopper_v5:MOHopperEnv",
max_episode_steps=1000,
kwargs={"cost_objective": False},
)

register(
id="mo-walker2d-v4",
entry_point="mo_gymnasium.envs.mujoco.walker2d:MOWalker2dEnv",
entry_point="mo_gymnasium.envs.mujoco.walker2d_v4:MOWalker2dEnv",
max_episode_steps=1000,
)

Expand All @@ -53,13 +53,13 @@

register(
id="mo-ant-v4",
entry_point="mo_gymnasium.envs.mujoco.ant:MOAntEnv",
entry_point="mo_gymnasium.envs.mujoco.ant_v4:MOAntEnv",
max_episode_steps=1000,
)

register(
id="mo-ant-2d-v4",
entry_point="mo_gymnasium.envs.mujoco.ant:MOAntEnv",
entry_point="mo_gymnasium.envs.mujoco.ant_v4:MOAntEnv",
max_episode_steps=1000,
kwargs={"cost_objective": False},
)
Expand All @@ -72,15 +72,15 @@
)

register(
id="mo-ant-2d-v5",
id="mo-ant-2obj-v5",
entry_point="mo_gymnasium.envs.mujoco.ant_v5:MOAntEnv",
max_episode_steps=1000,
kwargs={"cost_objective": False},
)

register(
id="mo-swimmer-v4",
entry_point="mo_gymnasium.envs.mujoco.swimmer:MOSwimmerEnv",
entry_point="mo_gymnasium.envs.mujoco.swimmer_v4:MOSwimmerEnv",
max_episode_steps=1000,
)

Expand All @@ -92,7 +92,7 @@

register(
id="mo-humanoid-v4",
entry_point="mo_gymnasium.envs.mujoco.humanoid:MOHumanoidEnv",
entry_point="mo_gymnasium.envs.mujoco.humanoid_v4:MOHumanoidEnv",
max_episode_steps=1000,
)

Expand Down
File renamed without changes.
File renamed without changes.
5 changes: 3 additions & 2 deletions mo_gymnasium/envs/mujoco/humanoid_v5.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class MOHumanoidEnv(HumanoidEnv, EzPickle):
## Version History:
- v5: Now includes contact forces. See: https://gymnasium.farama.org/environments/mujoco/humanoid/#version-history
The scales of the control cost has changed from v4.
"""

def __init__(self, **kwargs):
Expand All @@ -29,8 +30,8 @@ def __init__(self, **kwargs):
def step(self, action):
observation, reward, terminated, truncated, info = super().step(action)
velocity = info["x_velocity"]
negative_cost = info["reward_ctrl"] / self._ctrl_cost_weight # Revert the scale applied in the original environment
vec_reward = np.array([velocity, negative_cost], dtype=np.float32)
neg_energy_cost = info["reward_ctrl"] / self._ctrl_cost_weight # Revert the scale applied in the original environment
vec_reward = np.array([velocity, neg_energy_cost], dtype=np.float32)

vec_reward += self.healthy_reward # All objectives are penalyzed when the agent falls
vec_reward += info["reward_contact"] # Do not treat contact forces as a separate objective
Expand Down
File renamed without changes.
4 changes: 2 additions & 2 deletions mo_gymnasium/envs/mujoco/swimmer_v5.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ def __init__(self, **kwargs):
def step(self, action):
observation, reward, terminated, truncated, info = super().step(action)
velocity = info["x_velocity"]
energy = -np.sum(np.square(action))
neg_energy_cost = info["reward_ctrl"] / self._ctrl_cost_weight # Revert the scale applied in the original environment

vec_reward = np.array([velocity, energy], dtype=np.float32)
vec_reward = np.array([velocity, neg_energy_cost], dtype=np.float32)

return observation, vec_reward, terminated, truncated, info
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ def __init__(self, **kwargs):
def step(self, action):
observation, reward, terminated, truncated, info = super().step(action)
velocity = info["x_velocity"]
energy = -np.sum(np.square(action))
neg_energy_cost = info["reward_ctrl"] / self._ctrl_cost_weight

vec_reward = np.array([velocity, energy], dtype=np.float32)
vec_reward = np.array([velocity, neg_energy_cost], dtype=np.float32)

vec_reward += self.healthy_reward # All objectives are penalyzed when the agent falls

Expand Down

0 comments on commit 311f378

Please sign in to comment.