From 4bcb801685c3c661f14856c463c2f3990c943865 Mon Sep 17 00:00:00 2001 From: 7oponaut <174741646+7oponaut@users.noreply.github.com> Date: Thu, 4 Jul 2024 22:01:48 +0200 Subject: [PATCH] Adroit hand dense reward fixes * fix hand-to-object dense reward component sign in adroit hand door, hammer, and relocate scenarios --- gymnasium_robotics/envs/adroit_hand/adroit_door.py | 2 +- gymnasium_robotics/envs/adroit_hand/adroit_hammer.py | 2 +- gymnasium_robotics/envs/adroit_hand/adroit_relocate.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gymnasium_robotics/envs/adroit_hand/adroit_door.py b/gymnasium_robotics/envs/adroit_hand/adroit_door.py index e2d02caa..af56d227 100644 --- a/gymnasium_robotics/envs/adroit_hand/adroit_door.py +++ b/gymnasium_robotics/envs/adroit_hand/adroit_door.py @@ -294,7 +294,7 @@ def step(self, a): palm_pos = self.data.site_xpos[self.grasp_site_id].ravel() # get to handle - reward = 0.1 * np.linalg.norm(palm_pos - handle_pos) + reward = -0.1 * np.linalg.norm(palm_pos - handle_pos) # open door reward += -0.1 * (goal_distance - 1.57) * (goal_distance - 1.57) # velocity cost diff --git a/gymnasium_robotics/envs/adroit_hand/adroit_hammer.py b/gymnasium_robotics/envs/adroit_hand/adroit_hammer.py index af68a408..a3700cd5 100644 --- a/gymnasium_robotics/envs/adroit_hand/adroit_hammer.py +++ b/gymnasium_robotics/envs/adroit_hand/adroit_hammer.py @@ -305,7 +305,7 @@ def step(self, a): # override reward if not sparse reward if not self.sparse_reward: # get the palm to the hammer handle - reward = 0.1 * np.linalg.norm(palm_pos - hamm_pos) + reward = -0.1 * np.linalg.norm(palm_pos - hamm_pos) # take hammer head to nail reward -= np.linalg.norm(head_pos - nail_pos) # make nail go inside diff --git a/gymnasium_robotics/envs/adroit_hand/adroit_relocate.py b/gymnasium_robotics/envs/adroit_hand/adroit_relocate.py index db9031cf..39b66ae3 100644 --- a/gymnasium_robotics/envs/adroit_hand/adroit_relocate.py +++ b/gymnasium_robotics/envs/adroit_hand/adroit_relocate.py @@ -294,7 +294,7 @@ def step(self, a): # override reward if not sparse reward if not self.sparse_reward: - reward = 0.1 * np.linalg.norm(palm_pos - obj_pos) # take hand to object + reward = -0.1 * np.linalg.norm(palm_pos - obj_pos) # take hand to object if obj_pos[2] > 0.04: # if object off the table reward += 1.0 # bonus for lifting the object reward += -0.5 * np.linalg.norm(