diff --git a/mo_gymnasium/envs/mujoco/walker2d_v4.py b/mo_gymnasium/envs/mujoco/walker2d_v4.py index efa87bf..e4a7493 100644 --- a/mo_gymnasium/envs/mujoco/walker2d_v4.py +++ b/mo_gymnasium/envs/mujoco/walker2d_v4.py @@ -26,7 +26,7 @@ def __init__(self, **kwargs): def step(self, action): observation, reward, terminated, truncated, info = super().step(action) velocity = info["x_velocity"] - neg_energy_cost = info["reward_ctrl"] / self._ctrl_cost_weight + neg_energy_cost = -np.sum(np.square(action)) vec_reward = np.array([velocity, neg_energy_cost], dtype=np.float32) diff --git a/mo_gymnasium/envs/mujoco/walker2d_v5.py b/mo_gymnasium/envs/mujoco/walker2d_v5.py index 5b036db..cfe4327 100644 --- a/mo_gymnasium/envs/mujoco/walker2d_v5.py +++ b/mo_gymnasium/envs/mujoco/walker2d_v5.py @@ -29,9 +29,9 @@ def __init__(self, **kwargs): def step(self, action): observation, reward, terminated, truncated, info = super().step(action) velocity = info["x_velocity"] - energy = -np.sum(np.square(action)) + neg_energy_cost = info["reward_ctrl"] / self._ctrl_cost_weight - vec_reward = np.array([velocity, energy], dtype=np.float32) + vec_reward = np.array([velocity, neg_energy_cost], dtype=np.float32) vec_reward += self.healthy_reward # All objectives are penalyzed when the agent falls