Update reacher_v5.py

Farama-Foundation · Dec 9, 2023 · d32bde9 · d32bde9
1 parent ede2ed1
commit d32bde9
Showing 1 changed file with 3 additions and 2 deletions.
diff --git a/gymnasium/envs/mujoco/reacher_v5.py b/gymnasium/envs/mujoco/reacher_v5.py
@@ -133,7 +133,7 @@ class ReacherEnv(MujocoEnv, utils.EzPickle):
     ## Version History
     * v5:
         - Minimum `mujoco` version is now 2.3.3.
-        - Added `default_camera_config` argument, a dictionary for setting the `mj_camera` properties, mainly useful for custom environments.
+        - Fixed bug: `reward_distance` was based on the state before the physics step, now it is based on the state after the physics step (related [Github issue](https://github.com/Farama-Foundation/Gymnasium/issues/821)).        - Added `default_camera_config` argument, a dictionary for setting the `mj_camera` properties, mainly useful for custom environments.
         - Added `frame_skip` argument, used to configure the `dt` (duration of `step()`), default varies by environment check environment documentation pages.
         - Removed `"z - position_fingertip"` from the observation space since it is always 0, and therefore provides no useful information to the agent, this should result is slightly faster training (related [Github issue](https://github.com/Farama-Foundation/Gymnasium/issues/204)).
         - Added `xml_file` argument.
@@ -197,11 +197,12 @@ def __init__(
         }
 
     def step(self, action):
-        reward, reward_info = self._get_rew(action)
         self.do_simulation(action, self.frame_skip)
 
         observation = self._get_obs()
+        reward, reward_info = self._get_rew(action)
         info = reward_info
+
         if self.render_mode == "human":
             self.render()
         return observation, reward, False, False, info