From d32bde93e6b72a8889e2195d0e63816068a3544c Mon Sep 17 00:00:00 2001 From: Kallinteris Andreas <30759571+Kallinteris-Andreas@users.noreply.github.com> Date: Sat, 9 Dec 2023 16:12:19 +0200 Subject: [PATCH 1/6] Update reacher_v5.py --- gymnasium/envs/mujoco/reacher_v5.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/gymnasium/envs/mujoco/reacher_v5.py b/gymnasium/envs/mujoco/reacher_v5.py index db23e1961..1dfb64dc2 100644 --- a/gymnasium/envs/mujoco/reacher_v5.py +++ b/gymnasium/envs/mujoco/reacher_v5.py @@ -133,7 +133,7 @@ class ReacherEnv(MujocoEnv, utils.EzPickle): ## Version History * v5: - Minimum `mujoco` version is now 2.3.3. - - Added `default_camera_config` argument, a dictionary for setting the `mj_camera` properties, mainly useful for custom environments. + - Fixed bug: `reward_distance` was based on the state before the physics step, now it is based on the state after the physics step (related [Github issue](https://github.com/Farama-Foundation/Gymnasium/issues/821)). - Added `default_camera_config` argument, a dictionary for setting the `mj_camera` properties, mainly useful for custom environments. - Added `frame_skip` argument, used to configure the `dt` (duration of `step()`), default varies by environment check environment documentation pages. - Removed `"z - position_fingertip"` from the observation space since it is always 0, and therefore provides no useful information to the agent, this should result is slightly faster training (related [Github issue](https://github.com/Farama-Foundation/Gymnasium/issues/204)). - Added `xml_file` argument. @@ -197,11 +197,12 @@ def __init__( } def step(self, action): - reward, reward_info = self._get_rew(action) self.do_simulation(action, self.frame_skip) observation = self._get_obs() + reward, reward_info = self._get_rew(action) info = reward_info + if self.render_mode == "human": self.render() return observation, reward, False, False, info From 3b0e7da3713c12338172ae178645bb3adb41708d Mon Sep 17 00:00:00 2001 From: Kallinteris Andreas <30759571+Kallinteris-Andreas@users.noreply.github.com> Date: Sat, 9 Dec 2023 16:16:46 +0200 Subject: [PATCH 2/6] typo --- gymnasium/envs/mujoco/reacher_v5.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gymnasium/envs/mujoco/reacher_v5.py b/gymnasium/envs/mujoco/reacher_v5.py index 1dfb64dc2..e1ac58bca 100644 --- a/gymnasium/envs/mujoco/reacher_v5.py +++ b/gymnasium/envs/mujoco/reacher_v5.py @@ -133,7 +133,8 @@ class ReacherEnv(MujocoEnv, utils.EzPickle): ## Version History * v5: - Minimum `mujoco` version is now 2.3.3. - - Fixed bug: `reward_distance` was based on the state before the physics step, now it is based on the state after the physics step (related [Github issue](https://github.com/Farama-Foundation/Gymnasium/issues/821)). - Added `default_camera_config` argument, a dictionary for setting the `mj_camera` properties, mainly useful for custom environments. + - Fixed bug: `reward_distance` was based on the state before the physics step, now it is based on the state after the physics step (related [Github issue](https://github.com/Farama-Foundation/Gymnasium/issues/821)). + - Added `default_camera_config` argument, a dictionary for setting the `mj_camera` properties, mainly useful for custom environments. - Added `frame_skip` argument, used to configure the `dt` (duration of `step()`), default varies by environment check environment documentation pages. - Removed `"z - position_fingertip"` from the observation space since it is always 0, and therefore provides no useful information to the agent, this should result is slightly faster training (related [Github issue](https://github.com/Farama-Foundation/Gymnasium/issues/204)). - Added `xml_file` argument. From 43e8b0711b6feb2e4efcd816bcd6ac99da2a3a62 Mon Sep 17 00:00:00 2001 From: Kallinteris Andreas <30759571+Kallinteris-Andreas@users.noreply.github.com> Date: Sat, 9 Dec 2023 16:18:13 +0200 Subject: [PATCH 3/6] Update reacher_v5.py --- gymnasium/envs/mujoco/reacher_v5.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gymnasium/envs/mujoco/reacher_v5.py b/gymnasium/envs/mujoco/reacher_v5.py index e1ac58bca..a40a8564e 100644 --- a/gymnasium/envs/mujoco/reacher_v5.py +++ b/gymnasium/envs/mujoco/reacher_v5.py @@ -133,9 +133,9 @@ class ReacherEnv(MujocoEnv, utils.EzPickle): ## Version History * v5: - Minimum `mujoco` version is now 2.3.3. - - Fixed bug: `reward_distance` was based on the state before the physics step, now it is based on the state after the physics step (related [Github issue](https://github.com/Farama-Foundation/Gymnasium/issues/821)). - Added `default_camera_config` argument, a dictionary for setting the `mj_camera` properties, mainly useful for custom environments. - Added `frame_skip` argument, used to configure the `dt` (duration of `step()`), default varies by environment check environment documentation pages. + - Fixed bug: `reward_distance` was based on the state before the physics step, now it is based on the state after the physics step (related [Github issue](https://github.com/Farama-Foundation/Gymnasium/issues/821)). - Removed `"z - position_fingertip"` from the observation space since it is always 0, and therefore provides no useful information to the agent, this should result is slightly faster training (related [Github issue](https://github.com/Farama-Foundation/Gymnasium/issues/204)). - Added `xml_file` argument. - Added `reward_dist_weight`, `reward_control_weight` arguments, to configure the reward function (defaults are effectively the same as in `v4`). From b1f7fb40ab5febe4a49a149713c30a9b4e146306 Mon Sep 17 00:00:00 2001 From: Kallinteris Andreas <30759571+Kallinteris-Andreas@users.noreply.github.com> Date: Sat, 9 Dec 2023 16:20:35 +0200 Subject: [PATCH 4/6] Update pusher_v5.py --- gymnasium/envs/mujoco/pusher_v5.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gymnasium/envs/mujoco/pusher_v5.py b/gymnasium/envs/mujoco/pusher_v5.py index 490c4b016..62ef22a46 100644 --- a/gymnasium/envs/mujoco/pusher_v5.py +++ b/gymnasium/envs/mujoco/pusher_v5.py @@ -157,6 +157,7 @@ class PusherEnv(MujocoEnv, utils.EzPickle): - Added `default_camera_config` argument, a dictionary for setting the `mj_camera` properties, mainly useful for custom environments. - Added `frame_skip` argument, used to configure the `dt` (duration of `step()`), default varies by environment check environment documentation pages. - Added `xml_file` argument. + - Fixed bug: `reward_distance` & `reward_near` was based on the state before the physics step, now it is based on the state after the physics step (related [Github issue](https://github.com/Farama-Foundation/Gymnasium/issues/821)). - Added `reward_near_weight`, `reward_dist_weight`, `reward_control_weight` arguments, to configure the reward function (defaults are effectively the same as in `v4`). - Fixed `info["reward_ctrl"]` being not being multiplied by the reward weight. - Added `info["reward_near"]` which is equal to the reward term `reward_near`. From 5ac1bb2cc51c393b523357c6e401ce07ccb16f4b Mon Sep 17 00:00:00 2001 From: Kallinteris Andreas <30759571+Kallinteris-Andreas@users.noreply.github.com> Date: Sat, 9 Dec 2023 16:22:35 +0200 Subject: [PATCH 5/6] Update pusher_v5.py --- gymnasium/envs/mujoco/pusher_v5.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gymnasium/envs/mujoco/pusher_v5.py b/gymnasium/envs/mujoco/pusher_v5.py index 62ef22a46..98091e754 100644 --- a/gymnasium/envs/mujoco/pusher_v5.py +++ b/gymnasium/envs/mujoco/pusher_v5.py @@ -221,13 +221,14 @@ def __init__( } def step(self, action): - reward, reward_info = self._get_rew(action) self.do_simulation(action, self.frame_skip) observation = self._get_obs() + reward, reward_info = self._get_rew(action) info = reward_info if self.render_mode == "human": self.render() + return observation, reward, False, False, info def _get_rew(self, action): From 75ec4274b88e03cef565e8efe922c8cbda4b9a6a Mon Sep 17 00:00:00 2001 From: Kallinteris Andreas <30759571+Kallinteris-Andreas@users.noreply.github.com> Date: Sat, 9 Dec 2023 16:31:22 +0200 Subject: [PATCH 6/6] Update test_mujoco_v5.test_identical_behaviour_v45() --- tests/envs/mujoco/test_mujoco_v5.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/envs/mujoco/test_mujoco_v5.py b/tests/envs/mujoco/test_mujoco_v5.py index 2a8df3c2b..52309170f 100644 --- a/tests/envs/mujoco/test_mujoco_v5.py +++ b/tests/envs/mujoco/test_mujoco_v5.py @@ -329,8 +329,8 @@ def test_reward_sum(version: str): env_conf("HumanoidStandup", True, False, False, "superset"), env_conf("InvertedDoublePendulum", True, True, False, "superset"), env_conf("InvertedPendulum", False, True, False, "superset"), - env_conf("Pusher", False, False, False, "keys-superset"), - env_conf("Reacher", True, False, False, "keys-equivalence"), + env_conf("Pusher", False, True, False, "keys-superset"), + env_conf("Reacher", True, True, False, "keys-equivalence"), env_conf("Swimmer", False, False, False, "skip"), env_conf("Walker2d", True, True, True, "keys-superset"), ],