From d32bde93e6b72a8889e2195d0e63816068a3544c Mon Sep 17 00:00:00 2001
From: Kallinteris Andreas
 <30759571+Kallinteris-Andreas@users.noreply.github.com>
Date: Sat, 9 Dec 2023 16:12:19 +0200
Subject: [PATCH 1/6] Update reacher_v5.py

---
 gymnasium/envs/mujoco/reacher_v5.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/gymnasium/envs/mujoco/reacher_v5.py b/gymnasium/envs/mujoco/reacher_v5.py
index db23e1961..1dfb64dc2 100644
--- a/gymnasium/envs/mujoco/reacher_v5.py
+++ b/gymnasium/envs/mujoco/reacher_v5.py
@@ -133,7 +133,7 @@ class ReacherEnv(MujocoEnv, utils.EzPickle):
     ## Version History
     * v5:
         - Minimum `mujoco` version is now 2.3.3.
-        - Added `default_camera_config` argument, a dictionary for setting the `mj_camera` properties, mainly useful for custom environments.
+        - Fixed bug: `reward_distance` was based on the state before the physics step, now it is based on the state after the physics step (related [Github issue](https://github.com/Farama-Foundation/Gymnasium/issues/821)).        - Added `default_camera_config` argument, a dictionary for setting the `mj_camera` properties, mainly useful for custom environments.
         - Added `frame_skip` argument, used to configure the `dt` (duration of `step()`), default varies by environment check environment documentation pages.
         - Removed `"z - position_fingertip"` from the observation space since it is always 0, and therefore provides no useful information to the agent, this should result is slightly faster training (related [Github issue](https://github.com/Farama-Foundation/Gymnasium/issues/204)).
         - Added `xml_file` argument.
@@ -197,11 +197,12 @@ def __init__(
         }
 
     def step(self, action):
-        reward, reward_info = self._get_rew(action)
         self.do_simulation(action, self.frame_skip)
 
         observation = self._get_obs()
+        reward, reward_info = self._get_rew(action)
         info = reward_info
+
         if self.render_mode == "human":
             self.render()
         return observation, reward, False, False, info

From 3b0e7da3713c12338172ae178645bb3adb41708d Mon Sep 17 00:00:00 2001
From: Kallinteris Andreas
 <30759571+Kallinteris-Andreas@users.noreply.github.com>
Date: Sat, 9 Dec 2023 16:16:46 +0200
Subject: [PATCH 2/6] typo

---
 gymnasium/envs/mujoco/reacher_v5.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gymnasium/envs/mujoco/reacher_v5.py b/gymnasium/envs/mujoco/reacher_v5.py
index 1dfb64dc2..e1ac58bca 100644
--- a/gymnasium/envs/mujoco/reacher_v5.py
+++ b/gymnasium/envs/mujoco/reacher_v5.py
@@ -133,7 +133,8 @@ class ReacherEnv(MujocoEnv, utils.EzPickle):
     ## Version History
     * v5:
         - Minimum `mujoco` version is now 2.3.3.
-        - Fixed bug: `reward_distance` was based on the state before the physics step, now it is based on the state after the physics step (related [Github issue](https://github.com/Farama-Foundation/Gymnasium/issues/821)).        - Added `default_camera_config` argument, a dictionary for setting the `mj_camera` properties, mainly useful for custom environments.
+        - Fixed bug: `reward_distance` was based on the state before the physics step, now it is based on the state after the physics step (related [Github issue](https://github.com/Farama-Foundation/Gymnasium/issues/821)).
+        - Added `default_camera_config` argument, a dictionary for setting the `mj_camera` properties, mainly useful for custom environments.
         - Added `frame_skip` argument, used to configure the `dt` (duration of `step()`), default varies by environment check environment documentation pages.
         - Removed `"z - position_fingertip"` from the observation space since it is always 0, and therefore provides no useful information to the agent, this should result is slightly faster training (related [Github issue](https://github.com/Farama-Foundation/Gymnasium/issues/204)).
         - Added `xml_file` argument.

From 43e8b0711b6feb2e4efcd816bcd6ac99da2a3a62 Mon Sep 17 00:00:00 2001
From: Kallinteris Andreas
 <30759571+Kallinteris-Andreas@users.noreply.github.com>
Date: Sat, 9 Dec 2023 16:18:13 +0200
Subject: [PATCH 3/6] Update reacher_v5.py

---
 gymnasium/envs/mujoco/reacher_v5.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gymnasium/envs/mujoco/reacher_v5.py b/gymnasium/envs/mujoco/reacher_v5.py
index e1ac58bca..a40a8564e 100644
--- a/gymnasium/envs/mujoco/reacher_v5.py
+++ b/gymnasium/envs/mujoco/reacher_v5.py
@@ -133,9 +133,9 @@ class ReacherEnv(MujocoEnv, utils.EzPickle):
     ## Version History
     * v5:
         - Minimum `mujoco` version is now 2.3.3.
-        - Fixed bug: `reward_distance` was based on the state before the physics step, now it is based on the state after the physics step (related [Github issue](https://github.com/Farama-Foundation/Gymnasium/issues/821)).
         - Added `default_camera_config` argument, a dictionary for setting the `mj_camera` properties, mainly useful for custom environments.
         - Added `frame_skip` argument, used to configure the `dt` (duration of `step()`), default varies by environment check environment documentation pages.
+        - Fixed bug: `reward_distance` was based on the state before the physics step, now it is based on the state after the physics step (related [Github issue](https://github.com/Farama-Foundation/Gymnasium/issues/821)).
         - Removed `"z - position_fingertip"` from the observation space since it is always 0, and therefore provides no useful information to the agent, this should result is slightly faster training (related [Github issue](https://github.com/Farama-Foundation/Gymnasium/issues/204)).
         - Added `xml_file` argument.
         - Added `reward_dist_weight`, `reward_control_weight` arguments, to configure the reward function (defaults are effectively the same as in `v4`).

From b1f7fb40ab5febe4a49a149713c30a9b4e146306 Mon Sep 17 00:00:00 2001
From: Kallinteris Andreas
 <30759571+Kallinteris-Andreas@users.noreply.github.com>
Date: Sat, 9 Dec 2023 16:20:35 +0200
Subject: [PATCH 4/6] Update pusher_v5.py

---
 gymnasium/envs/mujoco/pusher_v5.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gymnasium/envs/mujoco/pusher_v5.py b/gymnasium/envs/mujoco/pusher_v5.py
index 490c4b016..62ef22a46 100644
--- a/gymnasium/envs/mujoco/pusher_v5.py
+++ b/gymnasium/envs/mujoco/pusher_v5.py
@@ -157,6 +157,7 @@ class PusherEnv(MujocoEnv, utils.EzPickle):
         - Added `default_camera_config` argument, a dictionary for setting the `mj_camera` properties, mainly useful for custom environments.
         - Added `frame_skip` argument, used to configure the `dt` (duration of `step()`), default varies by environment check environment documentation pages.
         - Added `xml_file` argument.
+        - Fixed bug: `reward_distance` & `reward_near` was based on the state before the physics step, now it is based on the state after the physics step (related [Github issue](https://github.com/Farama-Foundation/Gymnasium/issues/821)).
         - Added `reward_near_weight`, `reward_dist_weight`, `reward_control_weight` arguments, to configure the reward function (defaults are effectively the same as in `v4`).
         - Fixed `info["reward_ctrl"]` being not being multiplied by the reward weight.
         - Added `info["reward_near"]` which is equal to the reward term `reward_near`.

From 5ac1bb2cc51c393b523357c6e401ce07ccb16f4b Mon Sep 17 00:00:00 2001
From: Kallinteris Andreas
 <30759571+Kallinteris-Andreas@users.noreply.github.com>
Date: Sat, 9 Dec 2023 16:22:35 +0200
Subject: [PATCH 5/6] Update pusher_v5.py

---
 gymnasium/envs/mujoco/pusher_v5.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gymnasium/envs/mujoco/pusher_v5.py b/gymnasium/envs/mujoco/pusher_v5.py
index 62ef22a46..98091e754 100644
--- a/gymnasium/envs/mujoco/pusher_v5.py
+++ b/gymnasium/envs/mujoco/pusher_v5.py
@@ -221,13 +221,14 @@ def __init__(
         }
 
     def step(self, action):
-        reward, reward_info = self._get_rew(action)
         self.do_simulation(action, self.frame_skip)
 
         observation = self._get_obs()
+        reward, reward_info = self._get_rew(action)
         info = reward_info
         if self.render_mode == "human":
             self.render()
+
         return observation, reward, False, False, info
 
     def _get_rew(self, action):

From 75ec4274b88e03cef565e8efe922c8cbda4b9a6a Mon Sep 17 00:00:00 2001
From: Kallinteris Andreas
 <30759571+Kallinteris-Andreas@users.noreply.github.com>
Date: Sat, 9 Dec 2023 16:31:22 +0200
Subject: [PATCH 6/6] Update test_mujoco_v5.test_identical_behaviour_v45()

---
 tests/envs/mujoco/test_mujoco_v5.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/envs/mujoco/test_mujoco_v5.py b/tests/envs/mujoco/test_mujoco_v5.py
index 2a8df3c2b..52309170f 100644
--- a/tests/envs/mujoco/test_mujoco_v5.py
+++ b/tests/envs/mujoco/test_mujoco_v5.py
@@ -329,8 +329,8 @@ def test_reward_sum(version: str):
         env_conf("HumanoidStandup", True, False, False, "superset"),
         env_conf("InvertedDoublePendulum", True, True, False, "superset"),
         env_conf("InvertedPendulum", False, True, False, "superset"),
-        env_conf("Pusher", False, False, False, "keys-superset"),
-        env_conf("Reacher", True, False, False, "keys-equivalence"),
+        env_conf("Pusher", False, True, False, "keys-superset"),
+        env_conf("Reacher", True, True, False, "keys-equivalence"),
         env_conf("Swimmer", False, False, False, "skip"),
         env_conf("Walker2d", True, True, True, "keys-superset"),
     ],