From 23e004faa0a07326faddeb34e5cd673aec63f60c Mon Sep 17 00:00:00 2001 From: m-barker Date: Tue, 23 Apr 2024 11:02:08 +0100 Subject: [PATCH 1/5] fix: incorrect folder for command similarity states --- tasks/gpsr/{ => src/gpsr}/states/command_similarity_matcher.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tasks/gpsr/{ => src/gpsr}/states/command_similarity_matcher.py (100%) diff --git a/tasks/gpsr/states/command_similarity_matcher.py b/tasks/gpsr/src/gpsr/states/command_similarity_matcher.py similarity index 100% rename from tasks/gpsr/states/command_similarity_matcher.py rename to tasks/gpsr/src/gpsr/states/command_similarity_matcher.py From 874067339f0bb4dd1b19dc1d0ed2fdf18b074caf Mon Sep 17 00:00:00 2001 From: m-barker Date: Tue, 23 Apr 2024 11:23:50 +0100 Subject: [PATCH 2/5] feat: add command parser state machine --- tasks/gpsr/scripts/main.py | 0 tasks/gpsr/src/gpsr/states/__init__.py | 2 + tasks/gpsr/src/gpsr/states/command_parser.py | 84 ++++++++++++++++++++ 3 files changed, 86 insertions(+) create mode 100644 tasks/gpsr/scripts/main.py create mode 100644 tasks/gpsr/src/gpsr/states/command_parser.py diff --git a/tasks/gpsr/scripts/main.py b/tasks/gpsr/scripts/main.py new file mode 100644 index 000000000..e69de29bb diff --git a/tasks/gpsr/src/gpsr/states/__init__.py b/tasks/gpsr/src/gpsr/states/__init__.py index 74a382e32..fcf034f64 100644 --- a/tasks/gpsr/src/gpsr/states/__init__.py +++ b/tasks/gpsr/src/gpsr/states/__init__.py @@ -1 +1,3 @@ from .talk import Talk +from .command_parser import ParseCommand, CommandParserStateMachine +from .command_similarity_matcher import CommandSimilarityMatcher diff --git a/tasks/gpsr/src/gpsr/states/command_parser.py b/tasks/gpsr/src/gpsr/states/command_parser.py new file mode 100644 index 000000000..60dfb510d --- /dev/null +++ b/tasks/gpsr/src/gpsr/states/command_parser.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 +import argparse +import smach +import rospy + +from gpsr.load_known_data import GPSRDataLoader +from gpsr.regex_command_parser import Configuration, gpsr_compile_and_parse +from gpsr.states import CommandSimilarityMatcher +from lasr_skills import AskAndListen, Say + + +class ParseCommand(smach.State): + def __init__(self, data_config: Configuration): + """Takes in a string containing the command and runs the command parser + that outputs a dictionary of parameters for the command. + + Args: + data_config (Configuration): Configuration object containing the regex patterns + """ + smach.State.__init__( + self, + outcomes=["succeeded", "failed"], + input_keys=["raw_command"], + output_keys=["parsed_command"], + ) + self.data_config = data_config + + def execute(self, userdata): + rospy.loginfo(f"Received command : {userdata.raw_command.lower()}") + try: + userdata.parsed_command = gpsr_compile_and_parse( + self.data_config, userdata.transcribed_speech.lower() + ) + except Exception as e: + rospy.logerr(e) + return "failed" + return "succeeded" + + +class CommandParserStateMachine(smach.StateMachine): + def __init__( + self, + data_config: Configuration, + n_vecs_per_txt_file: int = 1177943, + total_txt_files: int = 10, + ): + """State machine that takes in a command, matches it to a known command, and + outputs the parsed command. + + Args: + data_config (Configuration): Configuration object containing the regex patterns + n_vecs_per_txt_file (int, optional): number of vectors in each gpsr txt + file. Defaults to 100. + total_txt_files (int, optional): total number of gpsr txt files. Defaults to 10. + """ + smach.StateMachine.__init__(self, outcomes=["succeeded", "failed"]) + + with self: + smach.StateMachine.add( + "ASK_FOR_COMMAND", + AskAndListen(), + transitions={"succeeded": "PARSE_COMMAND", "failed": "failed"}, + remapping={"transcribed_speech": "raw_command"}, + ) + + smach.StateMachine.add( + "PARSE_COMMAND", + ParseCommand(data_config), + transitions={ + "succeeded": "succeeded", + "failed": "COMMAND_SIMILARITY_MATCHER", + }, + remapping={"parsed_command": "parsed_command"}, + ) + + smach.StateMachine.add( + "COMMAND_SIMILARITY_MATCHER", + CommandSimilarityMatcher([n_vecs_per_txt_file] * total_txt_files), + transitions={"succeeded": "PARSE_COMMAND", "failed": "failed"}, + remapping={ + "command": "parsed_command", + "matched_command": "matched_command", + }, + ) From 6f822bf159fc58617f29633e334f3da5ef962605 Mon Sep 17 00:00:00 2001 From: m-barker Date: Tue, 23 Apr 2024 11:48:08 +0100 Subject: [PATCH 3/5] feat: working command parser sm --- tasks/gpsr/CMakeLists.txt | 1 + tasks/gpsr/scripts/main.py | 41 ++++++++++++++++++++ tasks/gpsr/src/gpsr/states/__init__.py | 2 +- tasks/gpsr/src/gpsr/states/command_parser.py | 12 +++--- 4 files changed, 48 insertions(+), 8 deletions(-) diff --git a/tasks/gpsr/CMakeLists.txt b/tasks/gpsr/CMakeLists.txt index af1f6c908..a1c9b8bc6 100644 --- a/tasks/gpsr/CMakeLists.txt +++ b/tasks/gpsr/CMakeLists.txt @@ -155,6 +155,7 @@ include_directories( ## in contrast to setup.py, you can choose the destination catkin_install_python(PROGRAMS scripts/parse_gpsr_xmls.py + scripts/main.py nodes/commands/question_answer nodes/command_parser DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} diff --git a/tasks/gpsr/scripts/main.py b/tasks/gpsr/scripts/main.py index e69de29bb..d4d44bda3 100644 --- a/tasks/gpsr/scripts/main.py +++ b/tasks/gpsr/scripts/main.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 +import smach +import rospy +import sys +from typing import Dict +from gpsr.load_known_data import GPSRDataLoader +from gpsr.regex_command_parser import Configuration +from gpsr.states import CommandParserStateMachine + + +def load_gpsr_configuration() -> Configuration: + gpsr_data_dir = sys.argv[1] + """Loads the configuration for the GPSR command parser""" + data_loader = GPSRDataLoader(data_dir=gpsr_data_dir) + gpsr_known_data: Dict = data_loader.load_data() + config = Configuration( + { + "person_names": gpsr_known_data["names"], + "location_names": gpsr_known_data["non_placeable_locations"], + "placement_location_names": gpsr_known_data["placeable_locations"], + "room_names": gpsr_known_data["rooms"], + "object_names": gpsr_known_data["objects"], + "object_categories_plural": gpsr_known_data["categories_plural"], + "object_categories_singular": gpsr_known_data["categories_singular"], + } + ) + return config + + +def main(): + config = load_gpsr_configuration() + command_parser_sm = CommandParserStateMachine(data_config=config) + command_parser_sm.execute() + parsed_command: Dict = command_parser_sm.userdata.parsed_command + rospy.loginfo(f"Parsed command: {parsed_command}") + + +if __name__ == "__main__": + rospy.init_node("gpsr_main") + main() + rospy.spin() diff --git a/tasks/gpsr/src/gpsr/states/__init__.py b/tasks/gpsr/src/gpsr/states/__init__.py index fcf034f64..84504f968 100644 --- a/tasks/gpsr/src/gpsr/states/__init__.py +++ b/tasks/gpsr/src/gpsr/states/__init__.py @@ -1,3 +1,3 @@ from .talk import Talk -from .command_parser import ParseCommand, CommandParserStateMachine from .command_similarity_matcher import CommandSimilarityMatcher +from .command_parser import ParseCommand, CommandParserStateMachine diff --git a/tasks/gpsr/src/gpsr/states/command_parser.py b/tasks/gpsr/src/gpsr/states/command_parser.py index 60dfb510d..f0345bdcb 100644 --- a/tasks/gpsr/src/gpsr/states/command_parser.py +++ b/tasks/gpsr/src/gpsr/states/command_parser.py @@ -1,12 +1,10 @@ #!/usr/bin/env python3 -import argparse import smach import rospy -from gpsr.load_known_data import GPSRDataLoader from gpsr.regex_command_parser import Configuration, gpsr_compile_and_parse from gpsr.states import CommandSimilarityMatcher -from lasr_skills import AskAndListen, Say +from lasr_skills import AskAndListen class ParseCommand(smach.State): @@ -29,7 +27,7 @@ def execute(self, userdata): rospy.loginfo(f"Received command : {userdata.raw_command.lower()}") try: userdata.parsed_command = gpsr_compile_and_parse( - self.data_config, userdata.transcribed_speech.lower() + self.data_config, userdata.raw_command.lower() ) except Exception as e: rospy.logerr(e) @@ -58,7 +56,7 @@ def __init__( with self: smach.StateMachine.add( "ASK_FOR_COMMAND", - AskAndListen(), + AskAndListen(tts_phrase="Hello, please tell me your command."), transitions={"succeeded": "PARSE_COMMAND", "failed": "failed"}, remapping={"transcribed_speech": "raw_command"}, ) @@ -78,7 +76,7 @@ def __init__( CommandSimilarityMatcher([n_vecs_per_txt_file] * total_txt_files), transitions={"succeeded": "PARSE_COMMAND", "failed": "failed"}, remapping={ - "command": "parsed_command", - "matched_command": "matched_command", + "command": "raw_command", + "matched_command": "raw_command", }, ) From 64a1b4bbe7d67e11b832409f32e4898b1e140221 Mon Sep 17 00:00:00 2001 From: m-barker Date: Tue, 23 Apr 2024 13:54:38 +0100 Subject: [PATCH 4/5] feat: initial state machine factory --- tasks/gpsr/data/mock_data/names.json | 2 +- tasks/gpsr/scripts/main.py | 3 + tasks/gpsr/src/gpsr/state_machine_factory.py | 76 ++++++++++++++++++++ tasks/gpsr/src/gpsr/states/talk.py | 14 ++-- 4 files changed, 87 insertions(+), 8 deletions(-) create mode 100644 tasks/gpsr/src/gpsr/state_machine_factory.py diff --git a/tasks/gpsr/data/mock_data/names.json b/tasks/gpsr/data/mock_data/names.json index 7dbbe563f..0882a43c4 100644 --- a/tasks/gpsr/data/mock_data/names.json +++ b/tasks/gpsr/data/mock_data/names.json @@ -4,7 +4,7 @@ "angel", "axel", "charlie", - "janes", + "jane", "jules", "morgan", "paris", diff --git a/tasks/gpsr/scripts/main.py b/tasks/gpsr/scripts/main.py index d4d44bda3..4c0e15f39 100644 --- a/tasks/gpsr/scripts/main.py +++ b/tasks/gpsr/scripts/main.py @@ -4,6 +4,7 @@ import sys from typing import Dict from gpsr.load_known_data import GPSRDataLoader +from gpsr.state_machine_factory import build_state_machine from gpsr.regex_command_parser import Configuration from gpsr.states import CommandParserStateMachine @@ -33,6 +34,8 @@ def main(): command_parser_sm.execute() parsed_command: Dict = command_parser_sm.userdata.parsed_command rospy.loginfo(f"Parsed command: {parsed_command}") + sm = build_state_machine(parsed_command) + sm.execute() if __name__ == "__main__": diff --git a/tasks/gpsr/src/gpsr/state_machine_factory.py b/tasks/gpsr/src/gpsr/state_machine_factory.py new file mode 100644 index 000000000..1618aa415 --- /dev/null +++ b/tasks/gpsr/src/gpsr/state_machine_factory.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 +import rospy +import smach +from smach_ros import ServiceState +from typing import Dict, List +from lasr_skills import GoToLocation, FindNamedPerson +from gpsr.states import Talk + +STATE_COUNT = 0 + + +def increment_state_count() -> int: + global STATE_COUNT + STATE_COUNT += 1 + return STATE_COUNT + + +def build_state_machine(parsed_command: Dict) -> smach.StateMachine: + """Constructs the parameterized state machine for the GPSR task, + given the parsed command. + + Args: + parsed_command (Dict): parsed command. + + Returns: + smach.StateMachine: paramaterized state machine ready to be executed. + """ + command_verbs: List[str] = parsed_command["commands"] + command_params: List[Dict] = parsed_command["params"] + sm = smach.StateMachine(outcomes=["succeeded", "failed"]) + with sm: + for command_verb, command_param in zip(command_verbs, command_params): + if command_verb == "greet": + if "name" in command_param: + location_param = ( + f"/gpsr/arena/rooms/{command_param['location']}/pose" + ) + sm.add( + f"STATE_{increment_state_count()}", + GoToLocation(location_param=location_param), + transitions={ + "succeeded": f"STATE_{STATE_COUNT + 1}", + "failed": "failed", + }, + ) + sm.add( + f"STATE_{increment_state_count()}", + FindNamedPerson( + name=command_param["name"], location_param=location_param + ), + transitions={ + "succeeded": f"STATE_{STATE_COUNT + 1}", + "failed": "failed", + }, + ) + elif "clothes" in command_param: + pass + else: + raise ValueError( + "Greet command received with no name or clothes in command parameters" + ) + elif command_verb == "talk": + if "gesture" in command_param: + pass + elif "talk" in command_param: + sm.add( + f"STATE_{increment_state_count()}", + Talk(command_param["talk"]), + transitions={"succeeded": "succeded", "failed": "failed"}, + ) + else: + raise ValueError( + "Talk command received with no gesture or talk in command parameters" + ) + + return sm diff --git a/tasks/gpsr/src/gpsr/states/talk.py b/tasks/gpsr/src/gpsr/states/talk.py index ea22ff951..29204c573 100644 --- a/tasks/gpsr/src/gpsr/states/talk.py +++ b/tasks/gpsr/src/gpsr/states/talk.py @@ -8,13 +8,13 @@ # In future we might want to add looking at person talking to the state machine. class Talk(smach.StateMachine): class GenerateResponse(smach.State): - def __init__(self): + def __init__(self, talk_phrase: str): smach.State.__init__( self, outcomes=["succeeded", "failed"], - input_keys=["talk_phrase"], output_keys=["response"], ) + self._talk_phrase = talk_phrase def _create_responses(self) -> Dict[str, str]: response = {} @@ -43,23 +43,23 @@ def _create_responses(self) -> Dict[str, str]: def execute(self, userdata): try: - userdata.response = self._create_responses()[userdata.talk_phrase] + userdata.response = self._create_responses()[self._talk_phrase] except KeyError: rospy.loginfo( - f"Failed to generate response for {userdata.talk_phrase} as it is not in the list of possible questions." + f"Failed to generate response for {self._talk_phrase} as it is not in the list of possible questions." ) return "failed" return "succeeded" - def __init__(self): + def __init__(self, talk_phrase: str): smach.StateMachine.__init__(self, outcomes=["succeeded", "failed"]) with self: smach.StateMachine.add( "GENERATE_RESPONSE", - self.GenerateResponse(), + self.GenerateResponse(talk_phrase), transitions={"succeeded": "SAY_RESPONSE", "failed": "failed"}, - remapping={"talk_phrase": "talk_phrase", "response": "response"}, + remapping={"response": "response"}, ) smach.StateMachine.add( From aa5dc3ed8a2da35e6a13bee4c4de00295ede6355 Mon Sep 17 00:00:00 2001 From: Jared Swift Date: Fri, 5 Jul 2024 03:06:49 +0100 Subject: [PATCH 5/5] Receptionist polishing (#236) Co-authored-by: Matt Co-authored-by: fireblonde Co-authored-by: Haiwei L --- .../src/numpy2message/__init__.py | 3 +- common/helpers/tf_pcl/CMakeLists.txt | 202 ++++ common/helpers/tf_pcl/package.xml | 65 ++ common/helpers/tf_pcl/setup.py | 8 + common/helpers/tf_pcl/src/tf_pcl/__init__.py | 59 ++ .../scripts/microphone_tuning_test.py | 0 .../scripts/test_microphones.py | 2 +- .../src/lasr_vision_bodypix/bodypix.py | 31 +- common/vision/lasr_vision_clip/CMakeLists.txt | 5 +- .../examples/encode_image_example.py | 25 + .../examples/test_person_detector.py | 107 +++ .../lasr_vision_clip/nodes/img_encoder.py | 47 + .../lasr_vision_clip/nodes/learn_face.py | 8 + .../vision/lasr_vision_clip/requirements.in | 4 +- .../vision/lasr_vision_clip/requirements.txt | 51 +- .../src/lasr_vision_clip/__init__.py | 3 +- .../src/lasr_vision_clip/clip_utils.py | 34 +- .../src/lasr_vision_clip/learn_face.py | 112 +++ .../examples/request.py | 4 +- ...{setup.launch => cropped_detection.launch} | 4 +- .../cropped_detection.py | 92 +- .../lasr_vision_deepface/launch/camera.launch | 4 +- .../launch/service.launch | 4 +- .../vision/lasr_vision_deepface/nodes/service | 85 +- .../src/lasr_vision_deepface/deepface.py | 71 +- common/vision/lasr_vision_msgs/CMakeLists.txt | 4 + .../msg/BodyPixKeypointNormalized.msg | 8 + .../lasr_vision_msgs/msg/CDResponse.msg | 2 +- .../srv/BodyPixKeypointDetection.srv | 6 + .../lasr_vision_msgs/srv/ClipImageEncoder.srv | 8 + .../lasr_vision_msgs/srv/ClipLearnFace.srv | 7 + .../srv/ClipRecogniseFace.srv | 13 + .../vision/lasr_vision_msgs/srv/LearnFace.srv | 4 +- .../src/lasr_vision_yolov8/yolo.py | 3 + skills/CMakeLists.txt | 1 + skills/config/motions.yaml | 64 +- .../launch/unit_test_describe_people.launch | 6 +- skills/scripts/test_learn_face.py | 22 - skills/scripts/unit_test_adjust_camera.py | 18 + skills/src/lasr_skills/__init__.py | 3 +- skills/src/lasr_skills/adjust_camera.py | 445 +++++++++ skills/src/lasr_skills/describe_people.py | 62 +- skills/src/lasr_skills/detect_3d_in_area.py | 9 +- skills/src/lasr_skills/detect_gesture.py | 1 + skills/src/lasr_skills/learn_face.py | 26 - skills/src/lasr_skills/look_at_person.py | 20 +- skills/src/lasr_skills/look_to_given_point.py | 58 -- skills/src/lasr_skills/look_to_point.py | 26 +- skills/src/lasr_skills/validate_keypoints.py | 1 - .../lasr_skills/vision/get_cropped_image.py | 382 +------- tasks/receptionist/config/lab.yaml | 64 +- tasks/receptionist/config/motions.yaml | 10 + tasks/receptionist/launch/setup.launch | 1 + tasks/receptionist/scripts/main.py | 79 +- tasks/receptionist/scripts/test_seat_guest.py | 36 + .../src/receptionist/state_machine.py | 868 +++--------------- .../src/receptionist/states/__init__.py | 8 +- .../src/receptionist/states/check_sofa.py | 75 ++ .../receptionist/states/find_and_look_at.py | 493 +++++----- .../receptionist/states/get_name_or_drink.py | 1 - .../src/receptionist/states/handle_guest.py | 367 ++++++++ .../src/receptionist/states/introduce.py | 5 +- .../states/introduce_and_seat_guest.py | 402 ++++++++ .../receptionist/states/pointcloud_sweep.py | 145 +++ .../states/receptionist_learn_face.py | 76 +- .../receptionist/states/recognise_people.py | 36 + .../states/run_and_process_detections.py | 348 +++++++ .../src/receptionist/states/seat_guest.py | 110 +-- 68 files changed, 3593 insertions(+), 1760 deletions(-) create mode 100644 common/helpers/tf_pcl/CMakeLists.txt create mode 100644 common/helpers/tf_pcl/package.xml create mode 100644 common/helpers/tf_pcl/setup.py create mode 100644 common/helpers/tf_pcl/src/tf_pcl/__init__.py mode change 100755 => 100644 common/speech/lasr_speech_recognition_whisper/scripts/microphone_tuning_test.py create mode 100644 common/vision/lasr_vision_clip/examples/encode_image_example.py create mode 100755 common/vision/lasr_vision_clip/examples/test_person_detector.py create mode 100644 common/vision/lasr_vision_clip/nodes/img_encoder.py create mode 100644 common/vision/lasr_vision_clip/nodes/learn_face.py create mode 100644 common/vision/lasr_vision_clip/src/lasr_vision_clip/learn_face.py rename common/vision/lasr_vision_cropped_detection/launch/{setup.launch => cropped_detection.launch} (66%) create mode 100644 common/vision/lasr_vision_msgs/msg/BodyPixKeypointNormalized.msg create mode 100644 common/vision/lasr_vision_msgs/srv/ClipImageEncoder.srv create mode 100644 common/vision/lasr_vision_msgs/srv/ClipLearnFace.srv create mode 100644 common/vision/lasr_vision_msgs/srv/ClipRecogniseFace.srv delete mode 100755 skills/scripts/test_learn_face.py create mode 100644 skills/scripts/unit_test_adjust_camera.py create mode 100644 skills/src/lasr_skills/adjust_camera.py delete mode 100755 skills/src/lasr_skills/learn_face.py delete mode 100755 skills/src/lasr_skills/look_to_given_point.py create mode 100755 tasks/receptionist/scripts/test_seat_guest.py create mode 100644 tasks/receptionist/src/receptionist/states/check_sofa.py create mode 100644 tasks/receptionist/src/receptionist/states/handle_guest.py create mode 100644 tasks/receptionist/src/receptionist/states/introduce_and_seat_guest.py create mode 100755 tasks/receptionist/src/receptionist/states/pointcloud_sweep.py create mode 100644 tasks/receptionist/src/receptionist/states/recognise_people.py create mode 100755 tasks/receptionist/src/receptionist/states/run_and_process_detections.py diff --git a/common/helpers/numpy2message/src/numpy2message/__init__.py b/common/helpers/numpy2message/src/numpy2message/__init__.py index 328194092..8696e7247 100644 --- a/common/helpers/numpy2message/src/numpy2message/__init__.py +++ b/common/helpers/numpy2message/src/numpy2message/__init__.py @@ -1,7 +1,8 @@ +from typing import Tuple import numpy as np -def numpy2message(np_array: np.ndarray) -> list: +def numpy2message(np_array: np.ndarray) -> Tuple: data = np_array.tobytes() shape = list(np_array.shape) dtype = str(np_array.dtype) diff --git a/common/helpers/tf_pcl/CMakeLists.txt b/common/helpers/tf_pcl/CMakeLists.txt new file mode 100644 index 000000000..bc77c47f1 --- /dev/null +++ b/common/helpers/tf_pcl/CMakeLists.txt @@ -0,0 +1,202 @@ +cmake_minimum_required(VERSION 3.0.2) +project(tf_pcl) + +## Compile as C++11, supported in ROS Kinetic and newer +# add_compile_options(-std=c++11) + +## Find catkin macros and libraries +## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz) +## is used, also find other catkin packages +find_package(catkin REQUIRED) + +## System dependencies are found with CMake's conventions +# find_package(Boost REQUIRED COMPONENTS system) + + +## Uncomment this if the package has a setup.py. This macro ensures +## modules and global scripts declared therein get installed +## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html +catkin_python_setup() + +################################################ +## Declare ROS messages, services and actions ## +################################################ + +## To declare and build messages, services or actions from within this +## package, follow these steps: +## * Let MSG_DEP_SET be the set of packages whose message types you use in +## your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...). +## * In the file package.xml: +## * add a build_depend tag for "message_generation" +## * add a build_depend and a exec_depend tag for each package in MSG_DEP_SET +## * If MSG_DEP_SET isn't empty the following dependency has been pulled in +## but can be declared for certainty nonetheless: +## * add a exec_depend tag for "message_runtime" +## * In this file (CMakeLists.txt): +## * add "message_generation" and every package in MSG_DEP_SET to +## find_package(catkin REQUIRED COMPONENTS ...) +## * add "message_runtime" and every package in MSG_DEP_SET to +## catkin_package(CATKIN_DEPENDS ...) +## * uncomment the add_*_files sections below as needed +## and list every .msg/.srv/.action file to be processed +## * uncomment the generate_messages entry below +## * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...) + +## Generate messages in the 'msg' folder +# add_message_files( +# FILES +# Message1.msg +# Message2.msg +# ) + +## Generate services in the 'srv' folder +# add_service_files( +# FILES +# Service1.srv +# Service2.srv +# ) + +## Generate actions in the 'action' folder +# add_action_files( +# FILES +# Action1.action +# Action2.action +# ) + +## Generate added messages and services with any dependencies listed here +# generate_messages( +# DEPENDENCIES +# std_msgs # Or other packages containing msgs +# ) + +################################################ +## Declare ROS dynamic reconfigure parameters ## +################################################ + +## To declare and build dynamic reconfigure parameters within this +## package, follow these steps: +## * In the file package.xml: +## * add a build_depend and a exec_depend tag for "dynamic_reconfigure" +## * In this file (CMakeLists.txt): +## * add "dynamic_reconfigure" to +## find_package(catkin REQUIRED COMPONENTS ...) +## * uncomment the "generate_dynamic_reconfigure_options" section below +## and list every .cfg file to be processed + +## Generate dynamic reconfigure parameters in the 'cfg' folder +# generate_dynamic_reconfigure_options( +# cfg/DynReconf1.cfg +# cfg/DynReconf2.cfg +# ) + +################################### +## catkin specific configuration ## +################################### +## The catkin_package macro generates cmake config files for your package +## Declare things to be passed to dependent projects +## INCLUDE_DIRS: uncomment this if your package contains header files +## LIBRARIES: libraries you create in this project that dependent projects also need +## CATKIN_DEPENDS: catkin_packages dependent projects also need +## DEPENDS: system dependencies of this project that dependent projects also need +catkin_package( +# INCLUDE_DIRS include +# LIBRARIES tf_pcl +# CATKIN_DEPENDS other_catkin_pkg +# DEPENDS system_lib +) + +########### +## Build ## +########### + +## Specify additional locations of header files +## Your package locations should be listed before other locations +include_directories( +# include +# ${catkin_INCLUDE_DIRS} +) + +## Declare a C++ library +# add_library(${PROJECT_NAME} +# src/${PROJECT_NAME}/tf_pcl.cpp +# ) + +## Add cmake target dependencies of the library +## as an example, code may need to be generated before libraries +## either from message generation or dynamic reconfigure +# add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS}) + +## Declare a C++ executable +## With catkin_make all packages are built within a single CMake context +## The recommended prefix ensures that target names across packages don't collide +# add_executable(${PROJECT_NAME}_node src/tf_pcl_node.cpp) + +## Rename C++ executable without prefix +## The above recommended prefix causes long target names, the following renames the +## target back to the shorter version for ease of user use +## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node" +# set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "") + +## Add cmake target dependencies of the executable +## same as for the library above +# add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS}) + +## Specify libraries to link a library or executable target against +# target_link_libraries(${PROJECT_NAME}_node +# ${catkin_LIBRARIES} +# ) + +############# +## Install ## +############# + +# all install targets should use catkin DESTINATION variables +# See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html + +## Mark executable scripts (Python etc.) for installation +## in contrast to setup.py, you can choose the destination +# catkin_install_python(PROGRAMS +# scripts/my_python_script +# DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} +# ) + +## Mark executables for installation +## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_executables.html +# install(TARGETS ${PROJECT_NAME}_node +# RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} +# ) + +## Mark libraries for installation +## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_libraries.html +# install(TARGETS ${PROJECT_NAME} +# ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} +# LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} +# RUNTIME DESTINATION ${CATKIN_GLOBAL_BIN_DESTINATION} +# ) + +## Mark cpp header files for installation +# install(DIRECTORY include/${PROJECT_NAME}/ +# DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION} +# FILES_MATCHING PATTERN "*.h" +# PATTERN ".svn" EXCLUDE +# ) + +## Mark other files for installation (e.g. launch and bag files, etc.) +# install(FILES +# # myfile1 +# # myfile2 +# DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} +# ) + +############# +## Testing ## +############# + +## Add gtest based cpp test target and link libraries +# catkin_add_gtest(${PROJECT_NAME}-test test/test_tf_pcl.cpp) +# if(TARGET ${PROJECT_NAME}-test) +# target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME}) +# endif() + +## Add folders to be run by python nosetests +# catkin_add_nosetests(test) diff --git a/common/helpers/tf_pcl/package.xml b/common/helpers/tf_pcl/package.xml new file mode 100644 index 000000000..9397064b6 --- /dev/null +++ b/common/helpers/tf_pcl/package.xml @@ -0,0 +1,65 @@ + + + tf_pcl + 0.0.0 + The tf_pcl package + + + + + mattbarker + + + + + + TODO + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + catkin + sensor_msgs + geometry_msgs + scipy + tf2_ros + numpy + ros_numpy + + + + + + + + diff --git a/common/helpers/tf_pcl/setup.py b/common/helpers/tf_pcl/setup.py new file mode 100644 index 000000000..3d7d01673 --- /dev/null +++ b/common/helpers/tf_pcl/setup.py @@ -0,0 +1,8 @@ +#!/usr/bin/env python3 + +from distutils.core import setup +from catkin_pkg.python_setup import generate_distutils_setup + +setup_args = generate_distutils_setup(packages=["tf_pcl"], package_dir={"": "src"}) + +setup(**setup_args) diff --git a/common/helpers/tf_pcl/src/tf_pcl/__init__.py b/common/helpers/tf_pcl/src/tf_pcl/__init__.py new file mode 100644 index 000000000..cc2cd2035 --- /dev/null +++ b/common/helpers/tf_pcl/src/tf_pcl/__init__.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +from copy import deepcopy +import numpy as np +import tf2_ros as tf +import ros_numpy as rnp + +from scipy.spatial.transform import Rotation as R +from sensor_msgs.msg import PointCloud2 +from geometry_msgs.msg import TransformStamped + + +def pcl_transform( + pcl: PointCloud2, transform: TransformStamped, target_frame: str = "map" +) -> PointCloud2: + """Transforms a pointclound using a given transform message. + Needed as the tf2 transform function returns an un-orderded pcl. + Whilst we want an ordered pcl. + + Args: + pcl (PointCloud2): source pointcloud to transform. + transform (TransformStamped): transform to apply + + Returns: + PointCloud2: transformed pointcloud + """ + + pcl_arr = deepcopy(rnp.point_cloud2.pointcloud2_to_array(pcl)) + + translation = transform.transform.translation + rotation_q = transform.transform.rotation + + rotation_matrix = R.from_quat( + [rotation_q.x, rotation_q.y, rotation_q.z, rotation_q.w] + ) + + pcl_x = pcl_arr["x"] + pcl_y = pcl_arr["y"] + pcl_z = pcl_arr["z"] + + pcl_x_y_z_arr = np.array([pcl_x, pcl_y, pcl_z]) + + C, H, W = pcl_x_y_z_arr.shape + + pcl_x_y_z_arr = pcl_x_y_z_arr.reshape(-1, H * W).T + + transformed_pcl = rotation_matrix.apply(pcl_x_y_z_arr) + np.array( + [translation.x, translation.y, translation.z] + ) + + transformed_pcl = transformed_pcl.T.reshape(C, H, W) + + pcl_arr["x"] = transformed_pcl[0] + pcl_arr["y"] = transformed_pcl[1] + pcl_arr["z"] = transformed_pcl[2] + + transformed_pcl = rnp.point_cloud2.array_to_pointcloud2( + pcl_arr, stamp=pcl.header.stamp, frame_id=target_frame + ) + return transformed_pcl diff --git a/common/speech/lasr_speech_recognition_whisper/scripts/microphone_tuning_test.py b/common/speech/lasr_speech_recognition_whisper/scripts/microphone_tuning_test.py old mode 100755 new mode 100644 diff --git a/common/speech/lasr_speech_recognition_whisper/scripts/test_microphones.py b/common/speech/lasr_speech_recognition_whisper/scripts/test_microphones.py index 418f9bb78..921097691 100644 --- a/common/speech/lasr_speech_recognition_whisper/scripts/test_microphones.py +++ b/common/speech/lasr_speech_recognition_whisper/scripts/test_microphones.py @@ -34,7 +34,7 @@ def main(args: dict) -> None: output_dir = args["output_dir"] r = sr.Recognizer() - with sr.Microphone(device_index=mic_index) as source: + with sr.Microphone(device_index=13, sample_rate=16000) as source: print("Say something!") audio = r.listen(source, timeout=5, phrase_time_limit=5) print("Finished listening") diff --git a/common/vision/lasr_vision_bodypix/src/lasr_vision_bodypix/bodypix.py b/common/vision/lasr_vision_bodypix/src/lasr_vision_bodypix/bodypix.py index db4c1cd60..1e34fc738 100644 --- a/common/vision/lasr_vision_bodypix/src/lasr_vision_bodypix/bodypix.py +++ b/common/vision/lasr_vision_bodypix/src/lasr_vision_bodypix/bodypix.py @@ -12,7 +12,7 @@ from sensor_msgs.msg import Image as SensorImage -from lasr_vision_msgs.msg import BodyPixMask, BodyPixKeypoint +from lasr_vision_msgs.msg import BodyPixMask, BodyPixKeypoint, BodyPixKeypointNormalized from lasr_vision_msgs.srv import ( BodyPixMaskDetectionRequest, BodyPixMaskDetectionResponse, @@ -23,7 +23,11 @@ import rospkg # model cache -loaded_models = {} +# preload resnet 50 model so that it won't waste the time +# doing that in the middle of the task. +loaded_models = { + "resnet50": load_model(download_model(BodyPixModelPaths.RESNET50_FLOAT_STRIDE_16)) +} r = rospkg.RosPack() @@ -143,6 +147,7 @@ def detect_keypoints( poses = result.get_poses() detected_keypoints: List[BodyPixKeypoint] = [] + detected_keypoints_normalized: List[BodyPixKeypointNormalized] = [] for pose in poses: for keypoint in pose.keypoints.values(): @@ -150,8 +155,13 @@ def detect_keypoints( x = int(keypoint.position.x) y = int(keypoint.position.y) try: - if mask[y, x] == 0: - continue + # if mask[y, x] == 0: + # continue + if not request.keep_out_of_bounds: + if x < 0.0 or y < 0.0: + continue + if x >= mask.shape[1] or y >= mask.shape[0]: + continue # Throws an error if the keypoint is out of bounds # but not clear what type (some TF stuff) except: @@ -160,6 +170,13 @@ def detect_keypoints( detected_keypoints.append( BodyPixKeypoint(keypoint_name=keypoint.part, x=x, y=y) ) + detected_keypoints_normalized.append( + BodyPixKeypointNormalized( + keypoint_name=keypoint.part, + x=float(x) / mask.shape[1], + y=float(y) / mask.shape[0], + ) + ) # publish to debug topic if debug_publisher is not None: @@ -179,7 +196,7 @@ def detect_keypoints( cv2.putText( coloured_mask, f"{keypoint.keypoint_name}", - (keypoint.x, keypoint.y), + (int(keypoint.x), int(keypoint.y)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), @@ -188,4 +205,6 @@ def detect_keypoints( ) debug_publisher.publish(cv2_img.cv2_img_to_msg(coloured_mask)) - return BodyPixKeypointDetectionResponse(keypoints=detected_keypoints) + return BodyPixKeypointDetectionResponse( + keypoints=detected_keypoints, normalized_keypoints=detected_keypoints_normalized + ) diff --git a/common/vision/lasr_vision_clip/CMakeLists.txt b/common/vision/lasr_vision_clip/CMakeLists.txt index 739bfda15..1b6de6c4a 100644 --- a/common/vision/lasr_vision_clip/CMakeLists.txt +++ b/common/vision/lasr_vision_clip/CMakeLists.txt @@ -19,7 +19,7 @@ find_package(catkin REQUIRED catkin_virtualenv) catkin_python_setup() catkin_generate_virtualenv( INPUT_REQUIREMENTS requirements.in - PYTHON_INTERPRETER python3.10 + PYTHON_INTERPRETER python3.9 ) ################################################ ## Declare ROS messages, services and actions ## @@ -157,6 +157,9 @@ include_directories( ## in contrast to setup.py, you can choose the destination catkin_install_python(PROGRAMS nodes/vqa + nodes/img_encoder.py + nodes/learn_face.py + examples/encode_image_example.py DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} ) diff --git a/common/vision/lasr_vision_clip/examples/encode_image_example.py b/common/vision/lasr_vision_clip/examples/encode_image_example.py new file mode 100644 index 000000000..3e45e8329 --- /dev/null +++ b/common/vision/lasr_vision_clip/examples/encode_image_example.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python3 +import rospy +from typing import List +from lasr_vision_clip.clip_utils import load_model, encode_img +from lasr_vision_msgs.srv import ( + ClipImageEncoder, + ClipImageEncoderResponse, + ClipImageEncoderRequest, +) +from sensor_msgs.msg import Image +from cv2_img import msg_to_cv2_img + + +if __name__ == "__main__": + rospy.init_node("clip_encoder_test") + img_topic = "/usb_cam/image_raw" + rospy.wait_for_service("/clip/img_encoder") + clip_encoder = rospy.ServiceProxy("/clip/img_encoder", ClipImageEncoder) + while not rospy.is_shutdown(): + img_msg = rospy.wait_for_message(img_topic, Image) + request = ClipImageEncoderRequest(image_raw=img_msg) + response = clip_encoder(request) + rospy.loginfo(f"Received response: {response}") + + rospy.spin() diff --git a/common/vision/lasr_vision_clip/examples/test_person_detector.py b/common/vision/lasr_vision_clip/examples/test_person_detector.py new file mode 100755 index 000000000..a9d2e00d9 --- /dev/null +++ b/common/vision/lasr_vision_clip/examples/test_person_detector.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python3 +from lasr_vision_msgs.srv import ( + ClipLearnFaceRequest, + ClipLearnFace, + ClipLearnFaceResponse, + CroppedDetection, + CroppedDetectionRequest, + CroppedDetectionResponse, + ClipRecogniseFaceRequest, + ClipRecogniseFace, + ClipRecogniseFaceResponse, +) +from lasr_vision_msgs.msg import CDRequest +from sensor_msgs.msg import Image +import cv2 +from cv2_img import msg_to_cv2_img, cv2_img_to_msg +import rospy +from typing import List +import numpy as np + + +if __name__ == "__main__": + rospy.init_node("clip_encoder_test") + cropped_detector = rospy.ServiceProxy("/vision/cropped_detection", CroppedDetection) + learn_face_service = rospy.ServiceProxy("/vision/learn_face", ClipLearnFace) + detect_face_service = rospy.ServiceProxy( + "/vision/face_detection", ClipRecogniseFace + ) + debug_pub = rospy.Publisher("/clip/recognise/debug", Image, queue_size=1) + input_str = "" + while True: + input_str = input("Please enter your name and hit enter to learn your face: ") + if input_str == "done": + break + person_1_imgs = [] + for i in range(10): + cropped_response = cropped_detector( + CroppedDetectionRequest( + [ + CDRequest( + method="centered", + use_mask=True, + object_names=["person"], + yolo_model="yolov8x-seg.pt", + yolo_model_confidence=0.8, + yolo_nms_threshold=0.4, + ) + ] + ) + ) + rospy.sleep(0.1) + try: + person_1_imgs.append(cropped_response.responses[0].cropped_imgs[0]) + except: + continue + + learn_face_service(ClipLearnFaceRequest(raw_imgs=person_1_imgs, name=input_str)) + + # Run inference + while not rospy.is_shutdown(): + cropped_response = cropped_detector( + CroppedDetectionRequest( + [ + CDRequest( + method="centered", + use_mask=True, + object_names=["person"], + yolo_model="yolov8x-seg.pt", + yolo_model_confidence=0.8, + yolo_nms_threshold=0.4, + ) + ] + ) + ) + + try: + names = [] + xywhs = [] + for cropped_img in cropped_response.responses[0].cropped_imgs: + response = detect_face_service( + ClipRecogniseFaceRequest(image_raw=cropped_img) + ) + names.append(response.name) + xywhs.append(response.xywh) + rospy.loginfo(f"Recognised face: {response.name}") + + # Add names to image + cv2_img = msg_to_cv2_img(cropped_response.responses[0].masked_img) + for name, xywh in zip(names, xywhs): + x, y, w, h = xywh[0], xywh[1], xywh[2], xywh[3] + cv2.rectangle(cv2_img, (x, y), (x + w, y + h), (0, 255, 0), 2) + cv2.putText( + cv2_img, + name, + (x, y), + cv2.FONT_HERSHEY_SIMPLEX, + 0.5, + (0, 255, 0), + 2, + cv2.LINE_AA, + ) + debug_pub.publish(cv2_img_to_msg(cv2_img)) + except Exception as e: + rospy.loginfo(e) + continue + + rospy.spin() diff --git a/common/vision/lasr_vision_clip/nodes/img_encoder.py b/common/vision/lasr_vision_clip/nodes/img_encoder.py new file mode 100644 index 000000000..958d329ba --- /dev/null +++ b/common/vision/lasr_vision_clip/nodes/img_encoder.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +import rospy +from typing import List +from lasr_vision_clip.clip_utils import load_model, encode_img +from lasr_vision_msgs.srv import ( + ClipImageEncoder, + ClipImageEncoderResponse, + ClipImageEncoderRequest, +) +from sensor_msgs.msg import Image +from cv2_img import msg_to_cv2_img + + +class EncoderService: + def __init__(self, model_device: str = "cuda") -> None: + """Caches the clip model. + + Args: + model_device (str, optional): device to load model onto. Defaults to "cuda". + + """ + + self._model = load_model(model_device) + self._debug_pub = rospy.Publisher( + "/clip/img_encoder/debug", Image, queue_size=1 + ) + rospy.loginfo("Clip encoder service started") + + def encode_image( + self, request: ClipImageEncoderRequest + ) -> ClipImageEncoderResponse: + """Encodes a given image to a vector. + + Returns: + ClipImageEncoderResponse: the encoded vector + """ + raw_image = request.image_raw + encoded_vector = encode_img(self._model, raw_image) + encoded_vector = encoded_vector.flatten() + return ClipImageEncoderResponse(encoded_vector=encoded_vector.tolist()) + + +if __name__ == "__main__": + rospy.init_node("clip_vqa_service") + service = EncoderService() + rospy.Service("/clip/img_encoder", ClipImageEncoder, service.encode_image) + rospy.spin() diff --git a/common/vision/lasr_vision_clip/nodes/learn_face.py b/common/vision/lasr_vision_clip/nodes/learn_face.py new file mode 100644 index 000000000..2d9185e46 --- /dev/null +++ b/common/vision/lasr_vision_clip/nodes/learn_face.py @@ -0,0 +1,8 @@ +import rospy +from lasr_vision_clip import FaceService + + +if __name__ == "__main__": + rospy.init_node("clip_vqa_service") + face_service = FaceService() + rospy.spin() diff --git a/common/vision/lasr_vision_clip/requirements.in b/common/vision/lasr_vision_clip/requirements.in index b03f1e5cc..7ee0832cf 100644 --- a/common/vision/lasr_vision_clip/requirements.in +++ b/common/vision/lasr_vision_clip/requirements.in @@ -1,2 +1,4 @@ +facenet-pytorch sentence-transformers -opencv-python \ No newline at end of file +opencv-python +opencv-contrib-python \ No newline at end of file diff --git a/common/vision/lasr_vision_clip/requirements.txt b/common/vision/lasr_vision_clip/requirements.txt index 7c61ba101..264d9aefb 100644 --- a/common/vision/lasr_vision_clip/requirements.txt +++ b/common/vision/lasr_vision_clip/requirements.txt @@ -1,15 +1,16 @@ -certifi==2024.2.2 # via requests +certifi==2024.7.4 # via requests charset-normalizer==3.3.2 # via requests -filelock==3.13.4 # via huggingface-hub, torch, transformers, triton -fsspec==2024.3.1 # via huggingface-hub, torch -huggingface-hub==0.22.2 # via sentence-transformers, tokenizers, transformers +facenet-pytorch==2.6.0 # via -r requirements.in +filelock==3.15.4 # via huggingface-hub, torch, transformers, triton +fsspec==2024.6.1 # via huggingface-hub, torch +huggingface-hub==0.23.4 # via sentence-transformers, tokenizers, transformers idna==3.7 # via requests -jinja2==3.1.3 # via torch -joblib==1.4.0 # via scikit-learn +jinja2==3.1.4 # via torch +joblib==1.4.2 # via scikit-learn markupsafe==2.1.5 # via jinja2 mpmath==1.3.0 # via sympy networkx==3.2.1 # via torch -numpy==1.26.4 # via opencv-python, scikit-learn, scipy, sentence-transformers, transformers +numpy==1.26.4 # via facenet-pytorch, opencv-contrib-python, opencv-python, scikit-learn, scipy, sentence-transformers, torchvision, transformers nvidia-cublas-cu12==12.1.3.1 # via nvidia-cudnn-cu12, nvidia-cusolver-cu12, torch nvidia-cuda-cupti-cu12==12.1.105 # via torch nvidia-cuda-nvrtc-cu12==12.1.105 # via torch @@ -20,24 +21,26 @@ nvidia-curand-cu12==10.3.2.106 # via torch nvidia-cusolver-cu12==11.4.5.107 # via torch nvidia-cusparse-cu12==12.1.0.106 # via nvidia-cusolver-cu12, torch nvidia-nccl-cu12==2.19.3 # via torch -nvidia-nvjitlink-cu12==12.4.127 # via nvidia-cusolver-cu12, nvidia-cusparse-cu12 +nvidia-nvjitlink-cu12==12.5.82 # via nvidia-cusolver-cu12, nvidia-cusparse-cu12 nvidia-nvtx-cu12==12.1.105 # via torch -opencv-python==4.9.0.80 # via -r requirements.in -packaging==24.0 # via huggingface-hub, transformers -pillow==10.3.0 # via sentence-transformers +opencv-contrib-python==4.10.0.84 # via -r requirements.in +opencv-python==4.10.0.84 # via -r requirements.in +packaging==24.1 # via huggingface-hub, transformers +pillow==10.2.0 # via facenet-pytorch, sentence-transformers, torchvision pyyaml==6.0.1 # via huggingface-hub, transformers -regex==2024.4.16 # via transformers -requests==2.31.0 # via huggingface-hub, transformers +regex==2024.5.15 # via transformers +requests==2.32.3 # via facenet-pytorch, huggingface-hub, transformers safetensors==0.4.3 # via transformers -scikit-learn==1.4.2 # via sentence-transformers -scipy==1.13.0 # via scikit-learn, sentence-transformers -sentence-transformers==2.7.0 # via -r requirements.in -sympy==1.12 # via torch -threadpoolctl==3.4.0 # via scikit-learn -tokenizers==0.15.2 # via transformers -torch==2.2.2 # via sentence-transformers -tqdm==4.66.2 # via huggingface-hub, sentence-transformers, transformers -transformers==4.39.3 # via sentence-transformers +scikit-learn==1.5.1 # via sentence-transformers +scipy==1.13.1 # via scikit-learn, sentence-transformers +sentence-transformers==3.0.1 # via -r requirements.in +sympy==1.12.1 # via torch +threadpoolctl==3.5.0 # via scikit-learn +tokenizers==0.19.1 # via transformers +torch==2.2.2 # via facenet-pytorch, sentence-transformers, torchvision +torchvision==0.17.2 # via facenet-pytorch +tqdm==4.66.4 # via facenet-pytorch, huggingface-hub, sentence-transformers, transformers +transformers==4.42.3 # via sentence-transformers triton==2.2.0 # via torch -typing-extensions==4.11.0 # via huggingface-hub, torch -urllib3==2.2.1 # via requests +typing-extensions==4.12.2 # via huggingface-hub, torch +urllib3==2.2.2 # via requests diff --git a/common/vision/lasr_vision_clip/src/lasr_vision_clip/__init__.py b/common/vision/lasr_vision_clip/src/lasr_vision_clip/__init__.py index 8b1378917..b783e5137 100644 --- a/common/vision/lasr_vision_clip/src/lasr_vision_clip/__init__.py +++ b/common/vision/lasr_vision_clip/src/lasr_vision_clip/__init__.py @@ -1 +1,2 @@ - +from .clip_utils import load_model, encode_img, load_face_model, infer +from .learn_face import FaceService diff --git a/common/vision/lasr_vision_clip/src/lasr_vision_clip/clip_utils.py b/common/vision/lasr_vision_clip/src/lasr_vision_clip/clip_utils.py index 2d98a2169..ddf9ffd74 100644 --- a/common/vision/lasr_vision_clip/src/lasr_vision_clip/clip_utils.py +++ b/common/vision/lasr_vision_clip/src/lasr_vision_clip/clip_utils.py @@ -6,7 +6,6 @@ import numpy as np from copy import deepcopy from sentence_transformers import SentenceTransformer, util - from sensor_msgs.msg import Image @@ -41,11 +40,42 @@ def run_clip( txt = model.encode(labels) img = model.encode(img) with torch.no_grad(): - torch cos_scores = util.cos_sim(img, txt) return cos_scores +def load_face_model(): + from transformers import AutoImageProcessor, AutoModel + + processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224") + model = AutoModel.from_pretrained("google/vit-base-patch16-224").to("cuda") + + return processor, model + + +def infer(image, processor, model): + image = cv2_img.msg_to_cv2_img(image) + inputs = processor(image, return_tensors="pt").to("cuda") + outputs = model(**inputs) + # squeeze and flatten + outputs.pooler_output = outputs.pooler_output.squeeze(0).flatten() + return outputs.pooler_output.detach().cpu().numpy() + + +def encode_img(model, img_msg: Image) -> np.ndarray: + """Run the CLIP model. + + Args: + model (Any): clip model loaded into memory + img (np.ndarray): the image to query + + Returns: + np.ndarray: the image embedding + """ + img = cv2_img.msg_to_cv2_img(img_msg) + return model(img.unsqueeze(0)).detach().numpy() + + def query_image_stream( model: SentenceTransformer, answers: list[str], diff --git a/common/vision/lasr_vision_clip/src/lasr_vision_clip/learn_face.py b/common/vision/lasr_vision_clip/src/lasr_vision_clip/learn_face.py new file mode 100644 index 000000000..90945fb53 --- /dev/null +++ b/common/vision/lasr_vision_clip/src/lasr_vision_clip/learn_face.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 +import os +import cv2 +import rospy +from typing import Dict +import numpy as np +import rospkg +from lasr_vision_msgs.srv import ( + ClipRecogniseFaceRequest, + ClipRecogniseFaceResponse, + ClipLearnFace, + ClipRecogniseFace, + ClipLearnFaceRequest, + ClipLearnFaceResponse, +) +from sensor_msgs.msg import Image +from cv2_img import msg_to_cv2_img, cv2_img_to_msg +from lasr_vision_clip import load_face_model, encode_img, infer + + +class FaceService: + def __init__(self, similarity_threshold: float = 6.0) -> None: + self._face_classifier = cv2.CascadeClassifier( + os.path.join( + rospkg.RosPack().get_path("lasr_vision_clip"), + "data", + "haarcascade_frontalface_default.xml", + ) + ) + self.learned_faces: Dict[str, np.ndarray] = {} + self._similarity_threshold = similarity_threshold + self.processor, self.model = load_face_model() + self._face_pub = rospy.Publisher("/clip/face_detection", Image, queue_size=1) + + rospy.Service("/vision/face_detection", ClipRecogniseFace, self.face_detection) + rospy.Service("/vision/learn_face", ClipLearnFace, self.learn_face) + + rospy.loginfo("Face detector service started") + + def _detect_faces(self, img: np.ndarray): + faces = self._face_classifier.detectMultiScale( + img, 1.1, minNeighbors=5, minSize=(10, 10) + ) + return faces + + def face_detection( + self, req: ClipRecogniseFaceRequest + ) -> ClipRecogniseFaceResponse: + img = req.image_raw + cv2_img = msg_to_cv2_img(img) + # cv2_img = cv2.cvtColor(cv2_img, cv2.COLOR_BGR2GRAY) + try: + faces = self._detect_faces(cv2_img) + + # Assume only one face in image + encoded_face = None + closest_name = "Unknown" + min_dist = float("inf") + min_xywh = None + for x, y, w, h in faces: + cv2_face = cv2_img[y : y + h, x : x + w] + # cv2_face = cv2.cvtColor(cv2_face, cv2.COLOR_GRAY2BGR) + face_msg = cv2_img_to_msg(cv2_face) + self._face_pub.publish(face_msg) + encoded_face = infer( + cv2_img_to_msg(cv2_img), self.processor, self.model + ) + encoded_face = encoded_face.flatten() + for name, face in self.learned_faces.items(): + distance = np.linalg.norm(encoded_face - face) + rospy.loginfo(f"Distance to {name} : {distance}") + if distance < min_dist: + min_dist = distance + min_xywh = [x, y, w, h] + closest_name = name + return ClipRecogniseFaceResponse( + name=closest_name, distance=min_dist, xywh=min_xywh + ) + except Exception as e: + rospy.loginfo(e) + return ClipRecogniseFaceResponse(name="Unknown", distance=None, xywh=None) + + def learn_face(self, request: ClipLearnFaceRequest) -> ClipLearnFaceResponse: + imgs = request.raw_imgs + + embedding_vectors = [] + for img in imgs: + cv2_img = msg_to_cv2_img(img) + # cv2_img = cv2.cvtColor(cv2_img, cv2.COLOR_BGR2GRAY) + rospy.loginfo(f"Image shape: {cv2_img.shape}") + try: + faces = self._detect_faces(cv2_img) + except Exception as e: # No face detected + rospy.loginfo(e) + continue + for x, y, w, h in faces: + cv2_face = cv2_img[y : y + h, x : x + w] + # cv2_face = cv2.cvtColor(cv2_face, cv2.COLOR_GRAY2BGR) + face_msg = cv2_img_to_msg(cv2_face) + self._face_pub.publish(face_msg) + encoded_face = infer( + cv2_img_to_msg(cv2_img), self.processor, self.model + ) + encoded_face = encoded_face.flatten() + embedding_vectors.append(encoded_face) + + embedding_vectors = np.array(embedding_vectors) + embedding_vector = np.mean(embedding_vectors, axis=0) + self.learned_faces[request.name] = embedding_vector + rospy.loginfo(f"Learned {request.name}") + + return ClipLearnFaceResponse() diff --git a/common/vision/lasr_vision_cropped_detection/examples/request.py b/common/vision/lasr_vision_cropped_detection/examples/request.py index 368c2c658..7dbb0fc8a 100644 --- a/common/vision/lasr_vision_cropped_detection/examples/request.py +++ b/common/vision/lasr_vision_cropped_detection/examples/request.py @@ -10,11 +10,11 @@ while not rospy.is_shutdown(): request_srv = CroppedDetectionRequest() request = CDRequest() - request.method = "centered" + request.method = "closest" request.use_mask = True request.yolo_model = "yolov8x-seg.pt" request.yolo_model_confidence = 0.5 request.yolo_nms_threshold = 0.3 - request.object_names = ["person", "bottle", "chair"] + request.object_names = ["person"] request_srv.requests = [request] response = service(request_srv) diff --git a/common/vision/lasr_vision_cropped_detection/launch/setup.launch b/common/vision/lasr_vision_cropped_detection/launch/cropped_detection.launch similarity index 66% rename from common/vision/lasr_vision_cropped_detection/launch/setup.launch rename to common/vision/lasr_vision_cropped_detection/launch/cropped_detection.launch index bdacd9b0a..05db9d185 100644 --- a/common/vision/lasr_vision_cropped_detection/launch/setup.launch +++ b/common/vision/lasr_vision_cropped_detection/launch/cropped_detection.launch @@ -1,4 +1,4 @@ - - a + + \ No newline at end of file diff --git a/common/vision/lasr_vision_cropped_detection/src/lasr_vision_cropped_detection/cropped_detection.py b/common/vision/lasr_vision_cropped_detection/src/lasr_vision_cropped_detection/cropped_detection.py index ad282452a..e9360cc57 100644 --- a/common/vision/lasr_vision_cropped_detection/src/lasr_vision_cropped_detection/cropped_detection.py +++ b/common/vision/lasr_vision_cropped_detection/src/lasr_vision_cropped_detection/cropped_detection.py @@ -3,10 +3,11 @@ import numpy as np import cv2 import rospy - +from shapely.validation import explain_validity from sensor_msgs.msg import Image, PointCloud2 from geometry_msgs.msg import Point, PoseWithCovarianceStamped, Polygon from shapely.geometry.polygon import Polygon as ShapelyPolygon +from shapely.geometry.point import Point as ShapelyPoint from lasr_vision_msgs.msg import ( Detection, @@ -58,9 +59,6 @@ def _2d_bbox_crop( else: raise ValueError(f"Invalid 2D crop_method: {crop_method}") - if len(detections) == 0: - raise ValueError("No detections found") - distances = [ np.sqrt((x_to_compare - det.xywh[0]) ** 2 + (y_to_compare - det.xywh[1]) ** 2) for det in detections @@ -120,12 +118,6 @@ def _2d_mask_crop( else: raise ValueError(f"Invalid 2D crop_method: {crop_method}") - if len(detections) == 0: - raise ValueError("No detections found") - - if len(detections[0].xyseg) == 0: - raise ValueError("No segmentation found") - distances = [ np.sqrt((x_to_compare - det.xywh[0]) ** 2 + (y_to_compare - det.xywh[1]) ** 2) for det in detections @@ -171,9 +163,6 @@ def _3d_bbox_crop( List[np.ndarray]: List of cropped images. """ - if len(detections) == 0: - raise ValueError("No detections found") - distances = [ np.sqrt( (robot_location.x - det.point.x) ** 2 @@ -228,8 +217,6 @@ def _3d_mask_crop( Tuple[List[np.ndarray], np.ndarray, List[Detection3D]]: Tuple of cropped images, the combined mask, and the detections. """ - if len(detections) == 0: - raise ValueError("No detections found") distances = [ np.sqrt( (robot_location.x - det.point.x) ** 2 @@ -288,10 +275,20 @@ def filter_detections_by_polygon( filtered_detections: List[Detection3D] = [] for index, polygon in enumerate(polygons): area_polygon = ShapelyPolygon([(point.x, point.y) for point in polygon.points]) + print(f"Area polygon: {area_polygon}") + print(f"Polygon Area: {area_polygon.area}") + print(f"Polygon is valid: {area_polygon.is_valid}") + print(explain_validity(area_polygon)) for detection in detections: - if area_polygon.contains(Point(detection.point.x, detection.point.y)): + print(f"Point: {detection.point}") + if area_polygon.contains( + ShapelyPoint(detection.point.x, detection.point.y) + ): + print(f"Detection {detection} is within polygon {index}") detection_polygon_ids.append(index) filtered_detections.append(detection) + else: + print(f"Detection {detection} is not within polygon {index}") return filtered_detections, detection_polygon_ids @@ -391,7 +388,7 @@ def process_single_detection_request( cropped_images, detections, distances = _3d_bbox_crop( pointcloud_rgb, request.method, - request.robot_location, + robot_location, detections, ) response.detections_3d = detections @@ -406,6 +403,7 @@ def process_single_detection_request( ] debug_publisher = rospy.Publisher(debug_topic, Image, queue_size=10) + closest_pub = rospy.Publisher(debug_topic + "_closest", Image, queue_size=10) combined_mask_debug_publisher = rospy.Publisher( debug_topic + "_mask", Image, queue_size=10 ) @@ -414,36 +412,44 @@ def process_single_detection_request( if combined_mask is not None: # Add distances to the image for i, (dist, detect) in enumerate(zip(distances, detections)): - cv2.putText( - combined_mask, - f"Dist: {round(dist, 2)}", - (detect.xywh[0], detect.xywh[1]), - cv2.FONT_HERSHEY_SIMPLEX, - 1, - (0, 255, 0), - 2, - cv2.LINE_AA, - ) + continue + # cv2.putText( + # combined_mask, + # f"Dist: {round(dist, 2)}", + # (detect.xywh[0], detect.xywh[1]), + # cv2.FONT_HERSHEY_SIMPLEX, + # 1, + # (0, 255, 0), + # 2, + # cv2.LINE_AA, + # ) combined_mask_debug_publisher.publish(cv2_img_to_msg(combined_mask)) response.masked_img = cv2_img_to_msg(combined_mask) + try: + closest_pub.publish(cv2_img_to_msg(cropped_images[0])) + except IndexError: + rospy.logwarn("No detections found") response.distances = distances - - debug_image = np.hstack(cropped_images) - # Add distances to the image - for i, dist in enumerate(distances): - cv2.putText( - debug_image, - f"Dist: {round(dist, 2)}", - (i * cropped_images[0].shape[0] + 150, 50), - cv2.FONT_HERSHEY_SIMPLEX, - 1, - (0, 255, 0), - 2, - cv2.LINE_AA, - ) - - debug_publisher.publish(cv2_img_to_msg(debug_image)) + try: + print("...") + # debug_image = np.hstack(cropped_images) + # # Add distances to the image + # for i, dist in enumerate(distances): + # cv2.putText( + # debug_image, + # f"Dist: {round(dist, 2)}", + # (i * cropped_images[0].shape[0] + 150, 50), + # cv2.FONT_HERSHEY_SIMPLEX, + # 1, + # (0, 255, 0), + # 2, + # cv2.LINE_AA, + # ) + + # debug_publisher.publish(cv2_img_to_msg(debug_image)) + except ValueError: + rospy.logwarn("No detections found") return response diff --git a/common/vision/lasr_vision_deepface/launch/camera.launch b/common/vision/lasr_vision_deepface/launch/camera.launch index a9c85fbb1..47b66da94 100644 --- a/common/vision/lasr_vision_deepface/launch/camera.launch +++ b/common/vision/lasr_vision_deepface/launch/camera.launch @@ -6,9 +6,7 @@ - - - + diff --git a/common/vision/lasr_vision_deepface/launch/service.launch b/common/vision/lasr_vision_deepface/launch/service.launch index 337c5ae88..f749d479f 100644 --- a/common/vision/lasr_vision_deepface/launch/service.launch +++ b/common/vision/lasr_vision_deepface/launch/service.launch @@ -5,7 +5,5 @@ - - - + \ No newline at end of file diff --git a/common/vision/lasr_vision_deepface/nodes/service b/common/vision/lasr_vision_deepface/nodes/service index 0bffa407e..ccc966370 100644 --- a/common/vision/lasr_vision_deepface/nodes/service +++ b/common/vision/lasr_vision_deepface/nodes/service @@ -18,71 +18,58 @@ from lasr_vision_msgs.srv import ( rospy.init_node("recognise_service") -# Determine variables -DEBUG = rospy.get_param("~debug", False) - recognise_debug_publishers = {} learn_face_debug_publishers = {} detect_faces_debug_publisher = None -if DEBUG: - recognise_debug_publisher = rospy.Publisher("/recognise/debug", Image, queue_size=1) - learn_face_debug_publisher = rospy.Publisher( - "/learn_face/debug", Image, queue_size=1 - ) - cropped_face_publisher = rospy.Publisher( - "/learn_face/debug/cropped_query_face", Image, queue_size=1 - ) - detect_faces_debug_publisher = rospy.Publisher( - "/detect_faces/debug", Image, queue_size=1 - ) +recognise_debug_publisher = rospy.Publisher("/recognise/debug", Image, queue_size=1) +learn_face_debug_publisher = rospy.Publisher("/learn_face/debug", Image, queue_size=1) +cropped_face_publisher = rospy.Publisher( + "/learn_face/debug/cropped_query_face", Image, queue_size=1 +) +detect_faces_debug_publisher = rospy.Publisher( + "/detect_faces/debug", Image, queue_size=1 +) def recognise(request: RecogniseRequest) -> RecogniseResponse: - debug_publisher = None - similar_face_debug_publisher = None - cropped_face_publisher = None - if DEBUG: - if request.dataset in recognise_debug_publishers: - debug_publisher, similar_face_debug_publisher, cropped_face_publisher = ( - recognise_debug_publishers[request.dataset] - ) - else: - topic_name = re.sub(r"[\W_]+", "", request.dataset) - debug_publisher = rospy.Publisher( - f"/recognise/debug/{topic_name}", Image, queue_size=1 - ) - similar_face_debug_publisher = rospy.Publisher( - f"/recognise/debug/{topic_name}/similar_face", Image, queue_size=1 - ) - cropped_face_publisher = rospy.Publisher( - "/recognise/debug/cropped_query_face", Image, queue_size=1 - ) - recognise_debug_publishers[request.dataset] = ( - debug_publisher, - similar_face_debug_publisher, - cropped_face_publisher, - ) + if request.dataset in recognise_debug_publishers: + debug_publisher, similar_face_debug_publisher, cropped_face_publisher = ( + recognise_debug_publishers[request.dataset] + ) + else: + topic_name = re.sub(r"[\W_]+", "", request.dataset) + debug_publisher = rospy.Publisher( + f"/recognise/debug/{topic_name}", Image, queue_size=1 + ) + similar_face_debug_publisher = rospy.Publisher( + f"/recognise/debug/{topic_name}/similar_face", Image, queue_size=1 + ) + cropped_face_publisher = rospy.Publisher( + "/recognise/debug/cropped_query_face", Image, queue_size=1 + ) + recognise_debug_publishers[request.dataset] = ( + debug_publisher, + similar_face_debug_publisher, + cropped_face_publisher, + ) return face_recognition.recognise( request, debug_publisher, similar_face_debug_publisher, cropped_face_publisher ) def learn_face(request: LearnFaceRequest) -> LearnFaceResponse: - debug_publisher = None - if DEBUG: - if request.dataset in learn_face_debug_publishers: - debug_publisher = learn_face_debug_publishers[request.dataset] - else: - topic_name = re.sub(r"[\W_]+", "", request.dataset) - debug_publisher = rospy.Publisher( - f"/learn_face/debug/{topic_name}", Image, queue_size=1 - ) + if request.dataset in learn_face_debug_publishers: + debug_publisher = learn_face_debug_publishers[request.dataset] + else: + topic_name = re.sub(r"[\W_]+", "", request.dataset) + debug_publisher = rospy.Publisher( + f"/learn_face/debug/{topic_name}", Image, queue_size=1 + ) face_recognition.create_dataset( - "/xtion/rgb/image_raw", request.dataset, request.name, - request.n_images, + request.images, debug_publisher, ) return LearnFaceResponse() diff --git a/common/vision/lasr_vision_deepface/src/lasr_vision_deepface/deepface.py b/common/vision/lasr_vision_deepface/src/lasr_vision_deepface/deepface.py index d96fe664e..2a888636c 100644 --- a/common/vision/lasr_vision_deepface/src/lasr_vision_deepface/deepface.py +++ b/common/vision/lasr_vision_deepface/src/lasr_vision_deepface/deepface.py @@ -18,7 +18,7 @@ from sensor_msgs.msg import Image -from typing import Union +from typing import Union, List DATASET_ROOT = os.path.join( rospkg.RosPack().get_path("lasr_vision_deepface"), "datasets" @@ -80,31 +80,26 @@ def _extract_face(cv_im: Mat) -> Union[Mat, None]: def create_dataset( - topic: str, dataset: str, name: str, - size: int, - debug_publisher: Union[rospy.Publisher, None], + images: List[Image], + debug_publisher: rospy.Publisher, ) -> None: dataset_path = os.path.join(DATASET_ROOT, dataset, name) if not os.path.exists(dataset_path): os.makedirs(dataset_path) - rospy.loginfo(f"Taking {size} pictures of {name} and saving to {dataset_path}") - - images = [] - for i in range(size): - img_msg = rospy.wait_for_message(topic, Image) - cv_im = cv2_img.msg_to_cv2_img(img_msg) + rospy.loginfo( + f"Received {len(images)} pictures of {name} and saving to {dataset_path}" + ) + cv_images: List[Mat] = [cv2_img.msg_to_cv2_img(img) for img in images] + for i, cv_im in enumerate(cv_images): face_cropped_cv_im = _extract_face(cv_im) if face_cropped_cv_im is None: continue - cv2.imwrite(os.path.join(dataset_path, f"{name}_{i + 1}.png"), face_cropped_cv_im) # type: ignore - rospy.loginfo(f"Took picture {i + 1}") - images.append(face_cropped_cv_im) - if debug_publisher is not None: - debug_publisher.publish( - cv2_img.cv2_img_to_msg(create_image_collage(images)) - ) + cv2.imwrite( + os.path.join(dataset_path, f"{name}_{i + 1}.png"), face_cropped_cv_im + ) + debug_publisher.publish(cv2_img.cv2_img_to_msg(create_image_collage(cv_images))) # Force retraining DeepFace.find( @@ -118,9 +113,9 @@ def create_dataset( def recognise( request: RecogniseRequest, - debug_publisher: Union[rospy.Publisher, None], - debug_inference_pub: Union[rospy.Publisher, None], - cropped_detect_pub: Union[rospy.Publisher, None], + debug_publisher: rospy.Publisher, + debug_inference_pub: rospy.Publisher, + cropped_detect_pub: rospy.Publisher, ) -> RecogniseResponse: # Decode the image rospy.loginfo("Decoding") @@ -159,8 +154,7 @@ def recognise( cropped_image = cv_im[:][y : y + h, x : x + w] - if cropped_detect_pub is not None: - cropped_detect_pub.publish(cv2_img.cv2_img_to_msg(cropped_image)) + cropped_detect_pub.publish(cv2_img.cv2_img_to_msg(cropped_image)) # Draw bounding boxes and labels for debugging cv2.rectangle(cv_im, (x, y), (x + w, y + h), (0, 0, 255), 2) @@ -175,26 +169,24 @@ def recognise( ) # publish to debug topic - if debug_publisher is not None: - debug_publisher.publish(cv2_img.cv2_img_to_msg(cv_im)) - if debug_inference_pub is not None: - result = pd.concat(result) - # check for empty result - if not result.empty: - result_paths = list(result["identity"]) - if len(result_paths) > 5: - result_paths = result_paths[:5] - result_images = [cv2.imread(path) for path in result_paths] - debug_inference_pub.publish( - cv2_img.cv2_img_to_msg(create_image_collage(result_images)) - ) + debug_publisher.publish(cv2_img.cv2_img_to_msg(cv_im)) + result = pd.concat(result) + # check for empty result + if not result.empty: + result_paths = list(result["identity"]) + if len(result_paths) > 5: + result_paths = result_paths[:5] + result_images = [cv2.imread(path) for path in result_paths] + debug_inference_pub.publish( + cv2_img.cv2_img_to_msg(create_image_collage(result_images)) + ) return response def detect_faces( request: DetectFacesRequest, - debug_publisher: Union[rospy.Publisher, None], + debug_publisher: rospy.Publisher, ) -> DetectFacesResponse: cv_im = cv2_img.msg_to_cv2_img(request.image_raw) @@ -204,7 +196,9 @@ def detect_faces( faces = DeepFace.extract_faces( cv_im, detector_backend="mtcnn", enforce_detection=True ) - except ValueError: + except ValueError as e: + rospy.loginfo(f"Error: {e}") + debug_publisher.publish(cv2_img.cv2_img_to_msg(cv_im)) return response for i, face in enumerate(faces): @@ -233,7 +227,6 @@ def detect_faces( ) # publish to debug topic - if debug_publisher is not None: - debug_publisher.publish(cv2_img.cv2_img_to_msg(cv_im)) + debug_publisher.publish(cv2_img.cv2_img_to_msg(cv_im)) return response diff --git a/common/vision/lasr_vision_msgs/CMakeLists.txt b/common/vision/lasr_vision_msgs/CMakeLists.txt index 19c162e93..4b71ef21d 100644 --- a/common/vision/lasr_vision_msgs/CMakeLists.txt +++ b/common/vision/lasr_vision_msgs/CMakeLists.txt @@ -48,6 +48,7 @@ add_message_files( Detection.msg Detection3D.msg BodyPixKeypoint.msg + BodyPixKeypointNormalized.msg BodyPixMask.msg CDRequest.msg CDResponse.msg @@ -68,6 +69,9 @@ add_service_files( DetectFaces.srv CheckKnownPeople.srv CroppedDetection.srv + ClipImageEncoder.srv + ClipLearnFace.srv + ClipRecogniseFace.srv ) # Generate actions in the 'action' folder diff --git a/common/vision/lasr_vision_msgs/msg/BodyPixKeypointNormalized.msg b/common/vision/lasr_vision_msgs/msg/BodyPixKeypointNormalized.msg new file mode 100644 index 000000000..81faad1a1 --- /dev/null +++ b/common/vision/lasr_vision_msgs/msg/BodyPixKeypointNormalized.msg @@ -0,0 +1,8 @@ +# Keypoint.msg + +# name of the keypoint +string keypoint_name + +# the x and y coordinates of the body part +float32 x +float32 y diff --git a/common/vision/lasr_vision_msgs/msg/CDResponse.msg b/common/vision/lasr_vision_msgs/msg/CDResponse.msg index 03091fcfa..95911c281 100644 --- a/common/vision/lasr_vision_msgs/msg/CDResponse.msg +++ b/common/vision/lasr_vision_msgs/msg/CDResponse.msg @@ -22,4 +22,4 @@ uint8[] polygon_ids sensor_msgs/Image rgb_image # The pointcloud used for the 3D crop -sensor_msgs/PointCloud2 pointcloud \ No newline at end of file +sensor_msgs/PointCloud2 pointcloud \ No newline at end of file diff --git a/common/vision/lasr_vision_msgs/srv/BodyPixKeypointDetection.srv b/common/vision/lasr_vision_msgs/srv/BodyPixKeypointDetection.srv index ca056b268..04500b732 100644 --- a/common/vision/lasr_vision_msgs/srv/BodyPixKeypointDetection.srv +++ b/common/vision/lasr_vision_msgs/srv/BodyPixKeypointDetection.srv @@ -7,6 +7,12 @@ string dataset # How certain the detection should be to include float32 confidence +# Whether to return keypoints that are out of bound +bool keep_out_of_bounds + --- # keypoints lasr_vision_msgs/BodyPixKeypoint[] keypoints + +# keypoints +lasr_vision_msgs/BodyPixKeypointNormalized[] normalized_keypoints diff --git a/common/vision/lasr_vision_msgs/srv/ClipImageEncoder.srv b/common/vision/lasr_vision_msgs/srv/ClipImageEncoder.srv new file mode 100644 index 000000000..dae50b645 --- /dev/null +++ b/common/vision/lasr_vision_msgs/srv/ClipImageEncoder.srv @@ -0,0 +1,8 @@ +# Image to encode +sensor_msgs/Image image_raw + +--- + +# Encoded image vector +float32[] encoded_vector + diff --git a/common/vision/lasr_vision_msgs/srv/ClipLearnFace.srv b/common/vision/lasr_vision_msgs/srv/ClipLearnFace.srv new file mode 100644 index 000000000..a41a904c2 --- /dev/null +++ b/common/vision/lasr_vision_msgs/srv/ClipLearnFace.srv @@ -0,0 +1,7 @@ +# Images to use to learn face +sensor_msgs/Image[] raw_imgs + +# Name of person to be learned +string name + +--- \ No newline at end of file diff --git a/common/vision/lasr_vision_msgs/srv/ClipRecogniseFace.srv b/common/vision/lasr_vision_msgs/srv/ClipRecogniseFace.srv new file mode 100644 index 000000000..a191f2dc3 --- /dev/null +++ b/common/vision/lasr_vision_msgs/srv/ClipRecogniseFace.srv @@ -0,0 +1,13 @@ +# Raw image to run inference on +sensor_msgs/Image image_raw + +--- + +# Name of face +string name + +# Distance from detected face +float32 distance + +# Face bounding box +int32[] xywh \ No newline at end of file diff --git a/common/vision/lasr_vision_msgs/srv/LearnFace.srv b/common/vision/lasr_vision_msgs/srv/LearnFace.srv index 9410424fa..146f4aa90 100644 --- a/common/vision/lasr_vision_msgs/srv/LearnFace.srv +++ b/common/vision/lasr_vision_msgs/srv/LearnFace.srv @@ -4,7 +4,7 @@ string dataset # Name to associate string name -# Number of images to take -int32 n_images +# Images +sensor_msgs/Image[] images --- diff --git a/common/vision/lasr_vision_yolov8/src/lasr_vision_yolov8/yolo.py b/common/vision/lasr_vision_yolov8/src/lasr_vision_yolov8/yolo.py index 4171de2de..d1e7065b7 100644 --- a/common/vision/lasr_vision_yolov8/src/lasr_vision_yolov8/yolo.py +++ b/common/vision/lasr_vision_yolov8/src/lasr_vision_yolov8/yolo.py @@ -146,6 +146,9 @@ def detect_3d( width=request.pcl.width, ) detection.point = Point(*centroid) + rospy.loginfo( + f"Detected point: {detection.point} of object {detection.name}" + ) if debug_point_publisher is not None: markers.create_and_publish_marker( diff --git a/skills/CMakeLists.txt b/skills/CMakeLists.txt index 6a5978ed6..1b97b6944 100644 --- a/skills/CMakeLists.txt +++ b/skills/CMakeLists.txt @@ -161,6 +161,7 @@ include_directories( ## in contrast to setup.py, you can choose the destination catkin_install_python(PROGRAMS scripts/unit_test_describe_people.py + scripts/unit_test_adjust_camera.py DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} ) diff --git a/skills/config/motions.yaml b/skills/config/motions.yaml index f85a155d1..9409d156b 100644 --- a/skills/config/motions.yaml +++ b/skills/config/motions.yaml @@ -79,4 +79,66 @@ play_motion: joints: [arm_1_joint, arm_2_joint, arm_3_joint, arm_4_joint, arm_5_joint, arm_6_joint, arm_7_joint] points: - positions: [2.63, 0.10, -3.21, 1.61, 1.53, 0.00, 0.13] - time_from_start: 0.0 \ No newline at end of file + time_from_start: 0.0 + + u3l: + joints: [torso_lift_joint, head_1_joint, head_2_joint] + points: + - positions: [0.4, 0.35, 0.15] + time_from_start: 0.0 + u3m: + joints: [torso_lift_joint, head_1_joint, head_2_joint] + points: + - positions: [0.4, 0.0, 0.15] + time_from_start: 0.0 + u3r: + joints: [torso_lift_joint, head_1_joint, head_2_joint] + points: + - positions: [0.4, -0.35, 0.15] + time_from_start: 0.0 + + u2l: + joints: [torso_lift_joint, head_1_joint, head_2_joint] + points: + - positions: [0.4, 0.35, 0.05] + time_from_start: 0.0 + u2m: + joints: [torso_lift_joint, head_1_joint, head_2_joint] + points: + - positions: [0.4, 0.0, 0.05] + time_from_start: 0.0 + u2r: + joints: [torso_lift_joint, head_1_joint, head_2_joint] + points: + - positions: [0.4, -0.35, 0.05] + time_from_start: 0.0 + u1l: + joints: [torso_lift_joint, head_1_joint, head_2_joint] + points: + - positions: [0.3, 0.35, 0.05] + time_from_start: 0.0 + u1m: + joints: [torso_lift_joint, head_1_joint, head_2_joint] + points: + - positions: [0.3, 0.0, 0.05] + time_from_start: 0.0 + u1r: + joints: [torso_lift_joint, head_1_joint, head_2_joint] + points: + - positions: [0.3, -0.35, 0.05] + time_from_start: 0.0 + ml: + joints: [torso_lift_joint, head_1_joint, head_2_joint] + points: + - positions: [0.2, 0.35, 0.0] + time_from_start: 0.0 + mm: + joints: [torso_lift_joint, head_1_joint, head_2_joint] + points: + - positions: [0.2, 0.0, 0.0] + time_from_start: 0.0 + mr: + joints: [torso_lift_joint, head_1_joint, head_2_joint] + points: + - positions: [0.2, -0.35, 0.0] + time_from_start: 0.0 diff --git a/skills/launch/unit_test_describe_people.launch b/skills/launch/unit_test_describe_people.launch index 649279233..81d43f9b4 100644 --- a/skills/launch/unit_test_describe_people.launch +++ b/skills/launch/unit_test_describe_people.launch @@ -16,10 +16,12 @@ + + - +