diff --git a/gymnasium_robotics/envs/maze/maze.py b/gymnasium_robotics/envs/maze/maze.py index 361fc81c..5fa738cb 100644 --- a/gymnasium_robotics/envs/maze/maze.py +++ b/gymnasium_robotics/envs/maze/maze.py @@ -274,9 +274,9 @@ def add_xy_position_noise(self, xy_pos: np.ndarray) -> np.ndarray: def compute_reward( self, achieved_goal: np.ndarray, desired_goal: np.ndarray, info ) -> float: - distance = np.linalg.norm(achieved_goal - desired_goal, axis=-1) + distance = np.linalg.norm(achieved_goal - desired_goal, ord = 2, axis=-1) if self.reward_type == "dense": - return np.exp(-distance) + return -distance elif self.reward_type == "sparse": return (distance <= 0.45).astype(np.float64) diff --git a/gymnasium_robotics/envs/maze/maze_v4.py b/gymnasium_robotics/envs/maze/maze_v4.py index e601a805..71c3606d 100644 --- a/gymnasium_robotics/envs/maze/maze_v4.py +++ b/gymnasium_robotics/envs/maze/maze_v4.py @@ -374,9 +374,9 @@ def add_xy_position_noise(self, xy_pos: np.ndarray) -> np.ndarray: def compute_reward( self, achieved_goal: np.ndarray, desired_goal: np.ndarray, info ) -> float: - distance = np.linalg.norm(achieved_goal - desired_goal, axis=-1) + distance = np.linalg.norm(achieved_goal - desired_goal, ord = 2, axis=-1) if self.reward_type == "dense": - return np.exp(-distance) + return -distance elif self.reward_type == "sparse": return (distance <= 0.45).astype(np.float64)