diff --git a/tasks/qualification/action/HandoverObject.action b/tasks/qualification/action/HandoverObject.action index a49ba4844..491bdd23b 100644 --- a/tasks/qualification/action/HandoverObject.action +++ b/tasks/qualification/action/HandoverObject.action @@ -1,2 +1,4 @@ +string name +string object --- --- \ No newline at end of file diff --git a/tasks/qualification/action/ReceiveObject.action b/tasks/qualification/action/ReceiveObject.action index a49ba4844..14eac4c37 100644 --- a/tasks/qualification/action/ReceiveObject.action +++ b/tasks/qualification/action/ReceiveObject.action @@ -1,2 +1,3 @@ +string object --- --- \ No newline at end of file diff --git a/tasks/qualification/data/command_list.txt b/tasks/qualification/data/command_list.txt index abe96f9f8..d93890317 100644 --- a/tasks/qualification/data/command_list.txt +++ b/tasks/qualification/data/command_list.txt @@ -2,11 +2,13 @@ Please guide me to the lab. Please guide me to the kitchen. Guide me to the lift please. Follow me to the lab. +Assist Jared. +Go help Jared. +Take the protein bars to Nicole. Go to the lab and find me a desk. Could you please find Jared and ask if they need assistance. Please locate Jared and ask if he has seen Matt. Bring Jared a cup of coffee please. -Go to the kitchen and find Jared and ask him if he needs help. Robot please lead William to the bookcase, you may find him at the bookcase Get the tray from the end table and deliver it to Mary at the dining table Robot please face Linda at the back door and introduce her to everyone in the corridor. diff --git a/tasks/qualification/launch/better_qualification.launch b/tasks/qualification/launch/better_qualification.launch index a3a6e2093..1280b663a 100644 --- a/tasks/qualification/launch/better_qualification.launch +++ b/tasks/qualification/launch/better_qualification.launch @@ -3,12 +3,6 @@ - - - - - - diff --git a/tasks/qualification/launch/slow.launch b/tasks/qualification/launch/slow.launch new file mode 100644 index 000000000..c42054f1f --- /dev/null +++ b/tasks/qualification/launch/slow.launch @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/tasks/qualification/nodes/actions/detect_people b/tasks/qualification/nodes/actions/detect_people index 75d0a5115..c3ae77c0a 100644 --- a/tasks/qualification/nodes/actions/detect_people +++ b/tasks/qualification/nodes/actions/detect_people @@ -41,6 +41,7 @@ class DetectPeople: tf_req.target_frame = String("map") tf_req.point = centroid response = self.tf(tf_req) + print(response.target_point.point) return response.target_point.point def execute_cb(self, _): diff --git a/tasks/qualification/nodes/actions/find_person b/tasks/qualification/nodes/actions/find_person index be21a4f20..1e4635df4 100644 --- a/tasks/qualification/nodes/actions/find_person +++ b/tasks/qualification/nodes/actions/find_person @@ -3,6 +3,10 @@ import rospy import actionlib +import numpy as np +import math +from scipy.spatial.transform import Rotation as R + from qualification.msg import ( FindPersonAction, FindPersonResult, @@ -14,7 +18,7 @@ from qualification.msg import ( from move_base_msgs.msg import MoveBaseAction, MoveBaseGoal from actionlib import SimpleActionClient, SimpleActionServer from pal_interaction_msgs.msg import TtsGoal, TtsAction -from geometry_msgs.msg import Pose, Point, Quaternion +from geometry_msgs.msg import Pose, Point, Quaternion, PoseWithCovarianceStamped from control_msgs.msg import FollowJointTrajectoryAction, FollowJointTrajectoryGoal from trajectory_msgs.msg import JointTrajectoryPoint @@ -26,7 +30,7 @@ class FindPerson: "pose": Pose( position=Point(1.7, -0.72, 0.0), orientation=Quaternion(0.0, 0.0, -0.45, 0.89), - ) + ), } } @@ -74,7 +78,7 @@ class FindPerson: head_goal = FollowJointTrajectoryGoal() head_goal.trajectory.joint_names = ["head_1_joint", "head_2_joint"] - tts_goal.rawtext.text = f"Finding person {goal.name} in the {goal.location}..." + tts_goal.rawtext.text = f"Finding {goal.name} in the {goal.location}..." self.tts.send_goal_and_wait(tts_goal) move_base_goal.target_pose.header.stamp = rospy.Time.now() @@ -104,10 +108,21 @@ class FindPerson: head_goal.trajectory.points = [(point)] self.head_controller.send_goal_and_wait(head_goal) + orientation = Quaternion(0.0, 0.0, 0.0, 1.0) + robot_pose = rospy.wait_for_message( + "/amcl_pose", PoseWithCovarianceStamped + ).pose.pose + dist_x = person_point.x - robot_pose.position.x + dist_y = person_point.y - robot_pose.position.y + theta_deg = np.degrees(math.atan2(dist_y, dist_x)) + orientation = Quaternion( + *R.from_euler("z", theta_deg, degrees=True).as_quat() + ) + move_base_goal.target_pose.header.stamp = rospy.Time.now() move_base_goal.target_pose.pose = Pose( - position=Point(person_point.x, person_point.y + 1.0, 0.0), - orientation=self.KB[goal.location]["pose"].orientation, + position=Point(person_point.x - 0.3, person_point.y + 1, 0.0), + orientation=orientation, ) self.move_base.send_goal_and_wait(move_base_goal) break diff --git a/tasks/qualification/nodes/actions/greet b/tasks/qualification/nodes/actions/greet index e312714dd..5a4a53192 100644 --- a/tasks/qualification/nodes/actions/greet +++ b/tasks/qualification/nodes/actions/greet @@ -34,20 +34,24 @@ class Greet: auto_start=False, ) + self.greeted = [] + self._action_server.start() def execute_cb(self, goal): + if goal.name in self.greeted: + return tts_goal = TtsGoal() tts_goal.rawtext.lang_id = "en_GB" if goal.name: tts_goal.rawtext.text = ( f"Hello, {goal.name}. {random.choice(self.known_greeting_suffixes)}" ) + self.greeted.append(goal.name) else: tts_goal.rawtext.text = random.choice(self.unknown_greetings) self.tts.send_goal_and_wait(tts_goal) - self._action_server.set_succeeded(GreetResult()) diff --git a/tasks/qualification/nodes/actions/guide b/tasks/qualification/nodes/actions/guide index 44464c338..6a52b075c 100644 --- a/tasks/qualification/nodes/actions/guide +++ b/tasks/qualification/nodes/actions/guide @@ -16,13 +16,13 @@ class Guide: KB = { "lab": { "pose": Pose( - position=Point(2.58, 2.07, 0.0), - orientation=Quaternion(0.0, 0.0, 0.94, 0.50), + position=Point(2.61, 1.33, 0.0), + orientation=Quaternion(0.0, 0.0, 0.77, 0.63), ), "edges": { "corridor": Pose( - position=Point(4.68, 6.87, 0.0), - orientation=Quaternion(0.0, 0.0, 0.59, 0.80), + position=Point(4.27, 5.80, 0.0), + orientation=Quaternion(0.0, 0.0, -0.89, 0.45), ) }, }, @@ -33,8 +33,8 @@ class Guide: ), "edges": { "lab": Pose( - position=Point(4.09, 8.21, 0.0), - orientation=Quaternion(0.0, 0.0, 0.91, 0.40), + position=Point(4.29, 8.81, 0.0), + orientation=Quaternion(0.0, 0.0, -0.59, 0.81), ), "kitchen": None, }, diff --git a/tasks/qualification/nodes/actions/handover_object b/tasks/qualification/nodes/actions/handover_object index e8ac22110..670c5d443 100644 --- a/tasks/qualification/nodes/actions/handover_object +++ b/tasks/qualification/nodes/actions/handover_object @@ -40,7 +40,7 @@ class HandoverObject: pm_goal = PlayMotionGoal(motion_name="reach_arm", skip_planning=False) self.play_motion.send_goal_and_wait(pm_goal) - tts_goal.rawtext.text = "Please grab the object in my hand, and say 'Done' when you are rady for me to release it." + tts_goal.rawtext.text = f"{goal.name}, please grab the {goal.object} in my hand, and say 'Done' when you are rady for me to release it." self.tts.send_goal_and_wait(tts_goal) while True: self.transcribe_speech.send_goal(TranscribeSpeechGoal()) @@ -48,6 +48,7 @@ class HandoverObject: result = self.transcribe_speech.get_result() if "done" in result.sequence.lower(): break + rospy.sleep(rospy.Duration(2)) pm_goal = PlayMotionGoal(motion_name="open_gripper", skip_planning=False) self.play_motion.send_goal_and_wait(pm_goal) pm_goal = PlayMotionGoal(motion_name="home", skip_planning=False) diff --git a/tasks/qualification/nodes/actions/identify b/tasks/qualification/nodes/actions/identify index cfff6f4a0..d965105a8 100644 --- a/tasks/qualification/nodes/actions/identify +++ b/tasks/qualification/nodes/actions/identify @@ -38,8 +38,11 @@ class Identify: print(resp.detections) result = IdentifyResult() if resp.detections: - result.name = resp.detections[0].name - result.success = True + if resp.detections[0].confidence > 0.55: + result.name = resp.detections[0].name + result.success = True + else: + result.success = False else: result.success = False diff --git a/tasks/qualification/nodes/actions/learn_face b/tasks/qualification/nodes/actions/learn_face index b53387ef3..3f27d0e31 100644 --- a/tasks/qualification/nodes/actions/learn_face +++ b/tasks/qualification/nodes/actions/learn_face @@ -80,7 +80,7 @@ class LearnFace: req = LFRequest() req.dataset = "qualification" req.name = goal.name - req.n_images = 25 + req.n_images = 10 self.learn_face(req) self._action_server.set_succeeded(LearnFaceResult()) diff --git a/tasks/qualification/nodes/actions/receive_object b/tasks/qualification/nodes/actions/receive_object index 5ac0e968b..e11a4994a 100644 --- a/tasks/qualification/nodes/actions/receive_object +++ b/tasks/qualification/nodes/actions/receive_object @@ -44,7 +44,7 @@ class ReceiveObject: pm_goal = PlayMotionGoal(motion_name="open_gripper", skip_planning=False) self.play_motion.send_goal_and_wait(pm_goal) tts_goal.rawtext.text = ( - "Please place the object in my hand, and say 'Done' afterwards." + f"Please place the {goal.object} in my hand, and say 'Done' afterwards." ) self.tts.send_goal_and_wait(tts_goal) # Wait for speech diff --git a/tasks/qualification/nodes/actions/wait_greet b/tasks/qualification/nodes/actions/wait_greet index 231d401a5..92fd1c6ac 100644 --- a/tasks/qualification/nodes/actions/wait_greet +++ b/tasks/qualification/nodes/actions/wait_greet @@ -34,7 +34,13 @@ class WaitGreet: while not rospy.is_shutdown(): self.transcribe_speech.send_goal(TranscribeSpeechGoal()) self.transcribe_speech.wait_for_result() - result = self.transcribe_speech.get_result().sequence.lower() + result = ( + self.transcribe_speech.get_result() + .sequence.lower() + .replace(".", "") + .replace("!", "") + .replace(",", "") + ) if "hello" in result.split(" ") or "hi" in result.split(" "): self._action_server.set_succeeded(WaitGreetResult()) rospy.loginfo("Finished waiting") diff --git a/tasks/qualification/nodes/better_qualification b/tasks/qualification/nodes/better_qualification index 9327c638e..e28f293b5 100644 --- a/tasks/qualification/nodes/better_qualification +++ b/tasks/qualification/nodes/better_qualification @@ -29,8 +29,15 @@ from qualification.msg import ( from pal_interaction_msgs.msg import TtsGoal, TtsAction from control_msgs.msg import FollowJointTrajectoryAction, FollowJointTrajectoryGoal from trajectory_msgs.msg import JointTrajectoryPoint - +from geometry_msgs.msg import ( + PoseStamped, + Pose, + Point, + Quaternion, + PoseWithCovarianceStamped, +) from actionlib import SimpleActionClient +from move_base_msgs.msg import MoveBaseAction, MoveBaseGoal # supress warnings import warnings @@ -39,7 +46,7 @@ warnings.filterwarnings("ignore") location = "corridor" name = "unknown" - +person_pose = None print("Waiting for action servers...") print("tts") @@ -80,16 +87,36 @@ receive_object.wait_for_server() print("handover_object") handover_object = SimpleActionClient("handover_object", HandoverObjectAction) handover_object.wait_for_server() - +move_base = SimpleActionClient("move_base", MoveBaseAction) +move_base.wait_for_server() print("All action servers are ready") +torso_raise_goal = FollowJointTrajectoryGoal() +torso_raise_goal.trajectory.joint_names = ["torso_lift_joint"] +point = JointTrajectoryPoint() +point.positions = [0.35] +point.time_from_start = rospy.Duration(1) +torso_raise_goal.trajectory.points.append(point) + + +torso_lower_goal = FollowJointTrajectoryGoal() +torso_lower_goal.trajectory.joint_names = ["torso_lift_joint"] +point = JointTrajectoryPoint() +point.positions = [0.15] +point.time_from_start = rospy.Duration(1) +torso_lower_goal.trajectory.points.append(point) + def exec_command(command): torso_controller.send_goal_and_wait(torso_lower_goal) global name global location - if command == "Please guide me to the lab.": + global person_pose + if ( + command == "Please guide me to the lab." + or command == "Navigate to the couch, meet Mary, and guide her" + ): goal = GuideGoal(name, location, "lab") guide.send_goal_and_wait(goal) location = "lab" @@ -97,57 +124,85 @@ def exec_command(command): goal = GuideGoal(name, location, "kitchen") guide.send_goal_and_wait(goal) location = "kitchen" - elif command == "Could you please find Jared and ask if they need assistance.": + elif command == "Go help Jared.": + name = "jared" goal = FindPersonGoal("jared", location) find_person.send_goal_and_wait(goal) + elif command == "Take the protein bars to Nicole.": + goal = ReceiveObjectGoal("protein bars") + receive_object.send_goal_and_wait(goal) + if person_pose is not None: + move_base_goal = MoveBaseGoal() + move_base_goal.target_pose.header.stamp = rospy.Time.now() + move_base_goal.target_pose.header.frame_id = "map" + move_base_goal.target_pose.pose = person_pose + move_base.send_goal_and_wait(move_base_goal) -torso_raise_goal = FollowJointTrajectoryGoal() -torso_raise_goal.trajectory.joint_names = ["torso_lift_joint"] -point = JointTrajectoryPoint() -point.positions = [0.35] -point.time_from_start = rospy.Duration(1) -torso_raise_goal.trajectory.points.append(point) - - -torso_lower_goal = FollowJointTrajectoryGoal() -torso_lower_goal.trajectory.joint_names = ["torso_lift_joint"] -point = JointTrajectoryPoint() -point.positions = [0.15] -point.time_from_start = rospy.Duration(1) -torso_lower_goal.trajectory.points.append(point) + goal = HandoverObjectGoal("nicole", "protein bars") + handover_object.send_goal_and_wait(goal) # Wait to be greeted wait_greet.send_goal_and_wait(WaitGreetGoal()) -while not rospy.is_shutdown(): - # Raise the torso - torso_controller.send_goal_and_wait(torso_raise_goal) - - # Try and identify the person - identify.send_goal_and_wait(IdentifyGoal()) - identify_result = identify.get_result() - - if identify_result.name != name or not identify_result.name: - # Greet the person - greet.send_goal_and_wait(GreetGoal(name=identify_result.name)) - - if not identify_result.success: - get_name.send_goal_and_wait(GetNameGoal()) - get_name_result = get_name.get_result() - name = get_name_result.name - learn_face.send_goal_and_wait(LearnFaceGoal(name)) - else: - name = identify_result.name - - # Get a command from the person - get_command.send_goal_and_wait(GetCommandGoal(name)) - command_result = get_command.get_result() - if not command_result.command: - ## FINISH - break - - # Execute the command - command = command_result.command - exec_command(command) +# Raise the torso +torso_controller.send_goal_and_wait(torso_raise_goal) + +# Try and identify the person +identify.send_goal_and_wait(IdentifyGoal()) +identify_result = identify.get_result() + +if identify_result.name != name or not identify_result.name: + # Greet the person + greet.send_goal_and_wait(GreetGoal(name=identify_result.name)) + +if not identify_result.success: + get_name.send_goal_and_wait(GetNameGoal()) + get_name_result = get_name.get_result() + name = get_name_result.name + learn_face.send_goal_and_wait(LearnFaceGoal(name)) +else: + name = identify_result.name + +# Get a command from the person, "guide me to the lab" +get_command.send_goal_and_wait(GetCommandGoal(name)) +command_result = get_command.get_result() +command = command_result.command +print(command) +exec_command(command) +# name = "nicole" +# Raise the torso +torso_controller.send_goal_and_wait(torso_raise_goal) +person_pose = rospy.wait_for_message("/amcl_pose", PoseWithCovarianceStamped).pose.pose +# Get a command from the person, "find Jared" +get_command.send_goal_and_wait(GetCommandGoal(name)) +command_result = get_command.get_result() +command = command_result.command +print(command) +exec_command(command) + +# Raise the torso +torso_controller.send_goal_and_wait(torso_raise_goal) + +# Get a command from the person, "take the pringles to {nicole}" +get_command.send_goal_and_wait(GetCommandGoal(name)) +command_result = get_command.get_result() +command = command_result.command +print(command) +exec_command(command) + +# go home, the end +tts_goal = TtsGoal() +tts_goal.rawtext.lang_id = "en_GB" +tts_goal.rawtext.text = ( + "I'll go home now, if you need any more assistance, just come let me know!" +) +tts.send_goal_and_wait(tts_goal) + +move_base_goal = MoveBaseGoal() +move_base_goal.target_pose.header.stamp = rospy.Time.now() +move_base_goal.target_pose.header.frame_id = "map" +move_base_goal.target_pose.pose.position = Point(0, 0, 0) +move_base_goal.target_pose.pose.orientation = Quaternion(0, 0, 0, 1) +move_base.send_goal_and_wait(move_base_goal) diff --git a/tasks/qualification/nodes/qualification b/tasks/qualification/nodes/qualification index d6c40fb58..989cda30e 100644 --- a/tasks/qualification/nodes/qualification +++ b/tasks/qualification/nodes/qualification @@ -25,212 +25,17 @@ import rospy import rospkg import os -COMMAND_ROOT = os.path.join(rospkg.RosPack().get_path("qualification"), "data") -rospy.init_node("qualification") -from lasr_vision_msgs.srv import ( - Recognise, - RecogniseRequest, - LearnFace, - LearnFaceRequest, +rospy.init_node("clip_test") +import qualification.clip_utils as clip_utils + +model = clip_utils.load_model() +print( + clip_utils.vqa( + model, + [ + "a person pointing with their left arm", + "a person not pointing with their left arm", + ], + )[0] ) -from sensor_msgs.msg import Image -from lasr_voice import Voice -from std_srvs.srv import Empty - -import actionlib -from lasr_speech_recognition_msgs.srv import TranscribeAudio, TranscribeAudioResponse -from lasr_speech_recognition_msgs.msg import ( - TranscribeSpeechAction, - TranscribeSpeechGoal, -) -from tiago_controllers.controllers import BaseController - -import random - -import qualification.command_similarity as command_similarity - -from geometry_msgs.msg import Pose, Quaternion, Point - -voice = Voice() -rospy.loginfo("Got voice") -recognise = rospy.ServiceProxy("/recognise", Recognise) -recognise.wait_for_service() -rospy.loginfo("Got recognise") -learn_face = rospy.ServiceProxy("/learn_face", LearnFace) -learn_face.wait_for_service() -rospy.loginfo("Got learn_face") -transcribe = actionlib.SimpleActionClient("transcribe_speech", TranscribeSpeechAction) -transcribe.wait_for_server() -rospy.loginfo("Got transcribe_speech") -base_controller = BaseController() -rospy.loginfo("Got base_controller") - - -def do_recognise(image): - req = RecogniseRequest() - req.image_raw = image - req.dataset = "qualification" - req.confidence = 0.7 - resp = recognise(req) - return resp.detections - - -def do_transcribe_speech(): - transcribe.send_goal(TranscribeSpeechGoal()) - transcribe.wait_for_result() - result = transcribe.get_result() - return result.sequence.lower() - - -pre_door_pose = Pose( - position=Point(4.15944400458617, 8.87898068920585, 0.0), - orientation=Quaternion(0.0, 0.0, -0.5685040400615354, 0.8226804704341244), -) - -door_wait_pose = Pose( - position=Point(3.5908463546596536, 8.55637964340359, 0.0), - orientation=Quaternion(0.0, 0.0, -0.40530666046961816, 0.9141807868135086), -) - - -lab_pose = Pose( - position=Point(4.564212770348313, 7.033843500398589, 0.0), - orientation=Quaternion(0.0, 0.0, -0.7733597054145271, 0.6339674802709961), -) - - -def greet(): - # TODO: greet the person, if they are not in the database, ask them to introduce themselves and learn their face - im = rospy.wait_for_message("/xtion/rgb/image_raw", Image) - face_recognition_result = do_recognise(im) - if not face_recognition_result: - # unknown person - voice.speak("Hello, I don't know who you are. What is your name?") - name = ( - do_transcribe_speech().replace(".", "").split(" ")[-1] - ) # assume the last word is the name :) - voice.speak(f"Thank you, {name}, I will remember your face now") - # TODO: learn face and associate with name - learn_face(name, "qualification", 50) - # just perform an inference for visualisation purposes - do_recognise(im) - return name - else: - suffixes = [ - "it's great to see you again", - "it's nice to see you", - "it's good to see you", - ] - name = face_recognition_result[0].name - voice.speak( - f"Hello, {face_recognition_result[0].name} {random.choice(suffixes)}" - ) - return name - - -def clear_costmap(): - """ - Clears costmap using clear_octomap server - """ - - rospy.loginfo("waiting for clear_costmap") - rospy.wait_for_service("/move_base/clear_costmaps", timeout=10) - try: - _clear_costmap = rospy.ServiceProxy("/move_base/clear_costmaps", Empty) - response = _clear_costmap() - rospy.loginfo("clearing costmap done!") - return response - except rospy.ServiceException as e: - print("Service call failed: %s" % e) - - -def guide(name): - voice.speak(f"Follow me, {name}, I will take you to the lab.") - # TODO: guide person to the lab, at landmarks, check if they are still following, if they're not, look for them - base_controller.sync_to_pose(pre_door_pose) - # wait for the door to open - voice.async_tts(f"{name}, could you open the door for me please?") - base_controller.sync_to_pose(door_wait_pose) - base_controller.sync_to_pose(lab_pose) - - -def end(): - pass - - -def offer_assistance(name): - voice.speak(f"Hello, {name}, do you need any assistance") - while True: - command = do_transcribe_speech() - if "that is all" in command: - voice.speak("Goodbye") - end() - break - nearest_commands, distances = command_similarity.get_similar_commands( - command, - os.path.join(COMMAND_ROOT, "command_index"), - os.path.join(COMMAND_ROOT, "command_list.txt"), - ) - voice.speak(f"Did you mean {nearest_commands[0]}?") - speech = do_transcribe_speech() - if "yes" in speech: - command = nearest_commands[0] - break - else: - voice.speak("Sorry, could you please repeat that for me?") - - -# Phase 1: wait for a person to greet the robot -while not rospy.is_shutdown(): - speech = do_transcribe_speech() - if "hello" in speech or "hi" in speech: - name = greet() - break - -# # Phase 2: receive command from person -voice.speak(f"What can I do for you {name}?") -while True: - command = do_transcribe_speech() - if "that is all" in command: - voice.speak("Goodbye") - end() - break - nearest_commands, distances = command_similarity.get_similar_commands( - command, - os.path.join(COMMAND_ROOT, "command_index"), - os.path.join(COMMAND_ROOT, "command_list.txt"), - ) - voice.speak(f"Did you mean {nearest_commands[0]}?") - speech = do_transcribe_speech() - if "yes" in speech: - command = nearest_commands[0] - break - else: - voice.speak("Sorry, could you please repeat that for me?") - - -if "guide" in command: - guide(name) - -while True: - command = do_transcribe_speech() - nearest_commands, distances = command_similarity.get_similar_commands( - command, - os.path.join(COMMAND_ROOT, "command_index"), - os.path.join(COMMAND_ROOT, "command_list.txt"), - ) - voice.speak(f"Did you mean {nearest_commands[0]}?") - speech = do_transcribe_speech() - if "yes" in speech: - command = nearest_commands[0] - break - else: - voice.speak("Sorry, could you please repeat that for me?") - - -# if "guide" in command: -# guide() - - -# Phase 2: guide the person to the lab, for now just go to the lab, assume the person is following