Farama-Foundation · pseudo-rnd-thoughts · Dec 11, 2023 · Dec 9, 2023 · Dec 9, 2023 · Dec 9, 2023
diff --git a/gymnasium/envs/mujoco/pusher_v5.py b/gymnasium/envs/mujoco/pusher_v5.py
@@ -157,6 +157,7 @@ class PusherEnv(MujocoEnv, utils.EzPickle):
         - Added `default_camera_config` argument, a dictionary for setting the `mj_camera` properties, mainly useful for custom environments.
         - Added `frame_skip` argument, used to configure the `dt` (duration of `step()`), default varies by environment check environment documentation pages.
         - Added `xml_file` argument.
+        - Fixed bug: `reward_distance` & `reward_near` was based on the state before the physics step, now it is based on the state after the physics step (related [Github issue](https://github.com/Farama-Foundation/Gymnasium/issues/821)).
         - Added `reward_near_weight`, `reward_dist_weight`, `reward_control_weight` arguments, to configure the reward function (defaults are effectively the same as in `v4`).
         - Fixed `info["reward_ctrl"]` being not being multiplied by the reward weight.
         - Added `info["reward_near"]` which is equal to the reward term `reward_near`.
@@ -220,13 +221,14 @@ def __init__(
         }
 
     def step(self, action):
-        reward, reward_info = self._get_rew(action)
         self.do_simulation(action, self.frame_skip)
 
         observation = self._get_obs()
+        reward, reward_info = self._get_rew(action)
         info = reward_info
         if self.render_mode == "human":
             self.render()
+
         return observation, reward, False, False, info
 
     def _get_rew(self, action):

diff --git a/gymnasium/envs/mujoco/reacher_v5.py b/gymnasium/envs/mujoco/reacher_v5.py
@@ -135,6 +135,7 @@ class ReacherEnv(MujocoEnv, utils.EzPickle):
         - Minimum `mujoco` version is now 2.3.3.
         - Added `default_camera_config` argument, a dictionary for setting the `mj_camera` properties, mainly useful for custom environments.
         - Added `frame_skip` argument, used to configure the `dt` (duration of `step()`), default varies by environment check environment documentation pages.
+        - Fixed bug: `reward_distance` was based on the state before the physics step, now it is based on the state after the physics step (related [Github issue](https://github.com/Farama-Foundation/Gymnasium/issues/821)).
         - Removed `"z - position_fingertip"` from the observation space since it is always 0, and therefore provides no useful information to the agent, this should result is slightly faster training (related [Github issue](https://github.com/Farama-Foundation/Gymnasium/issues/204)).
         - Added `xml_file` argument.
         - Added `reward_dist_weight`, `reward_control_weight` arguments, to configure the reward function (defaults are effectively the same as in `v4`).
@@ -197,11 +198,12 @@ def __init__(
         }
 
     def step(self, action):
-        reward, reward_info = self._get_rew(action)
         self.do_simulation(action, self.frame_skip)
 
         observation = self._get_obs()
+        reward, reward_info = self._get_rew(action)
         info = reward_info
+
         if self.render_mode == "human":
             self.render()
         return observation, reward, False, False, info

diff --git a/tests/envs/mujoco/test_mujoco_v5.py b/tests/envs/mujoco/test_mujoco_v5.py
@@ -329,8 +329,8 @@ def test_reward_sum(version: str):
         env_conf("HumanoidStandup", True, False, False, "superset"),
         env_conf("InvertedDoublePendulum", True, True, False, "superset"),
         env_conf("InvertedPendulum", False, True, False, "superset"),
-        env_conf("Pusher", False, False, False, "keys-superset"),
-        env_conf("Reacher", True, False, False, "keys-equivalence"),
+        env_conf("Pusher", False, True, False, "keys-superset"),
+        env_conf("Reacher", True, True, False, "keys-equivalence"),
         env_conf("Swimmer", False, False, False, "skip"),
         env_conf("Walker2d", True, True, True, "keys-superset"),
     ],