diff --git a/common/vision/lasr_vision_bodypix/CMakeLists.txt b/common/vision/lasr_vision_bodypix/CMakeLists.txt index 80ac5e1e..8e784148 100644 --- a/common/vision/lasr_vision_bodypix/CMakeLists.txt +++ b/common/vision/lasr_vision_bodypix/CMakeLists.txt @@ -28,6 +28,10 @@ install(PROGRAMS DESTINATION lib/${PROJECT_NAME} ) +install(PROGRAMS + examples/mask_relay.py + DESTINATION lib/${PROJECT_NAME} +) # Install launch files install(DIRECTORY launch diff --git a/common/vision/lasr_vision_bodypix/examples/keypoint_relay.py b/common/vision/lasr_vision_bodypix/examples/keypoint_relay.py index 7673b7ef..a588d88a 100755 --- a/common/vision/lasr_vision_bodypix/examples/keypoint_relay.py +++ b/common/vision/lasr_vision_bodypix/examples/keypoint_relay.py @@ -5,7 +5,7 @@ import threading from sensor_msgs.msg import Image -from lasr_vision_msgs.srv import BodyPixKeypointDetection, BodyPixKeypointDetectionRequest +from lasr_vision_msgs.srv import BodyPixKeypointDetection class KeypointRelay(Node): def __init__(self, listen_topic, model): @@ -31,9 +31,8 @@ def __init__(self, listen_topic, model): def detect(self, image): self.processing = True self.get_logger().info("Received image message") - # Create a request for the service - req = BodyPixKeypointDetectionRequest() + req = BodyPixKeypointDetection.Request() req.image_raw = image req.dataset = self.model req.confidence = 0.7 @@ -60,17 +59,23 @@ def image_callback(self, image): def main(args=None): + print("start keypoint_relay") # Check if command-line arguments are sufficient if len(sys.argv) < 2: print("Usage: ros2 run lasr_vision_bodypix keypoint_relay.py [resnet50|mobilenet50|...]") sys.exit(1) # Parse the command-line arguments - listen_topic = sys.argv[1] + + listen_topic = '/image_raw' + if isinstance(sys.argv[1], list): + listen_topic = sys.argv[1][0] + model = sys.argv[2] if len(sys.argv) >= 3 else "resnet50" rclpy.init(args=args) keypoint_relay_node = KeypointRelay(listen_topic, model) + keypoint_relay_node.get_logger().info("Keypoint relay node started") try: rclpy.spin(keypoint_relay_node) diff --git a/common/vision/lasr_vision_bodypix/examples/mask_relay.py b/common/vision/lasr_vision_bodypix/examples/mask_relay.py new file mode 100644 index 00000000..bdfbbf66 --- /dev/null +++ b/common/vision/lasr_vision_bodypix/examples/mask_relay.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 + +import sys +import threading +import rclpy +from rclpy.node import Node +from sensor_msgs.msg import Image +from lasr_vision_msgs.srv import BodyPixMaskDetection + + +class ImageListener(Node): + def __init__(self, listen_topic, model): + super().__init__('image_listener') + self.listen_topic = listen_topic + self.model = model + self.processing = False + + # Set up the service client + self.detect_service_client = self.create_client(BodyPixMaskDetection, '/bodypix/mask_detection') + while not self.detect_service_client.wait_for_service(timeout_sec=1.0): + self.get_logger().info('Service not available, waiting...') + + # Set up the subscriber + self.subscription = self.create_subscription( + Image, + self.listen_topic, + self.image_callback, + 10 # QoS profile + ) + self.get_logger().info(f'Started listening on topic: {self.listen_topic} with model: {self.model}') + + def detect(self, image): + self.processing = True + self.get_logger().info("Received image message") + # Create a request for the service + req = BodyPixMaskDetection.Request() + req.image_raw = image + req.dataset = self.model + req.confidence = 0.7 + req.parts = ["left_face", "right_face"] + + # Call the service asynchronously + future = self.detect_service_client.call_async(req) + future.add_done_callback(self.detect_callback) + + def detect_callback(self, future): + try: + response = future.result() + if response is not None: + # Modify masks for demonstration purposes + for mask in response.masks: + mask.mask = [True, False, True, False] + self.get_logger().info(f"Detection response received: {response}") + else: + self.get_logger().error("Service call returned no response") + except Exception as e: + self.get_logger().error(f"Service call failed: {e}") + finally: + self.processing = False + + def image_callback(self, image): + if self.processing: + return + + # Start a new thread for detection to avoid blocking + threading.Thread(target=self.detect, args=(image,)).start() + + +def main(args=None): + print("Starting mask_relay node") + # Check if command-line arguments are sufficient + if len(sys.argv) < 2: + print("Usage: ros2 run lasr_vision_bodypix mask_relay.py [resnet50|mobilenet50|...]") + sys.exit(1) + + # Parse the command-line arguments + listen_topic = '/image_raw' + if isinstance(sys.argv[1], list): + listen_topic = sys.argv[1][0] + + model = sys.argv[2] if len(sys.argv) >= 3 else "resnet50" + + rclpy.init(args=args) + mask_relay_node = ImageListener(listen_topic, model) + mask_relay_node.get_logger().info("Mask relay node started") + + try: + rclpy.spin(mask_relay_node) + except KeyboardInterrupt: + pass + finally: + mask_relay_node.destroy_node() + rclpy.shutdown() + + +if __name__ == "__main__": + main() diff --git a/common/vision/lasr_vision_bodypix/launch/bodypix_launch.py b/common/vision/lasr_vision_bodypix/launch/bodypix_launch.py index a3593bde..7cb3ae33 100644 --- a/common/vision/lasr_vision_bodypix/launch/bodypix_launch.py +++ b/common/vision/lasr_vision_bodypix/launch/bodypix_launch.py @@ -4,11 +4,14 @@ from launch.substitutions import LaunchConfiguration def generate_launch_description(): - # Declare preload argument with default value + # Declare preload argument with default value as a YAML list preload_arg = DeclareLaunchArgument( - 'preload', default_value="['resnet50']", description='Array of models to preload when starting the service' + 'preload', + default_value=['resnet50'], + # default_value="['resnet50', 'mobilenet50']", + description='Array of models to preload when starting the service' ) - + # Create the BodyPix service node bodypix_node = Node( package='lasr_vision_bodypix', diff --git a/common/vision/lasr_vision_bodypix/launch/camera_keypoint_launch.py b/common/vision/lasr_vision_bodypix/launch/camera_keypoint_launch.py index a8003d7c..b23c87d3 100644 --- a/common/vision/lasr_vision_bodypix/launch/camera_keypoint_launch.py +++ b/common/vision/lasr_vision_bodypix/launch/camera_keypoint_launch.py @@ -2,16 +2,24 @@ from launch.actions import DeclareLaunchArgument, IncludeLaunchDescription from launch_ros.actions import Node from launch.launch_description_sources import PythonLaunchDescriptionSource -from launch.substitutions import LaunchConfiguration, TextSubstitution +from launch.substitutions import LaunchConfiguration, TextSubstitution, PythonExpression + import os from ament_index_python.packages import get_package_share_directory def generate_launch_description(): # Declare launch arguments model_arg = DeclareLaunchArgument( - 'model', default_value='resnet50', description='Model to use for the demo' + 'model', default_value="['resnet50']", description='Model to use for the demo' + ) + + image_topic_arg = DeclareLaunchArgument( + 'image_topic', + default_value='/image_raw', # Default input image topic + description='Input image topic for keypoint relay' ) + # Path to the BodyPix launch file bodypix_launch_file = os.path.join( get_package_share_directory('lasr_vision_bodypix'), 'launch', 'bodypix_launch.py' @@ -45,8 +53,15 @@ def generate_launch_description(): name='image_view', output='screen', arguments=[ - TextSubstitution(text='/bodypix/debug/'), - LaunchConfiguration('model') + + # [TextSubstitution(text='/bodypix/debug/'), LaunchConfiguration('model')]# Topic to visualize + PythonExpression([ + "'/bodypix/debug/' + ", # Static string + "''.join(", # Convert list to string + LaunchConfiguration('model'), + ")" + ]) + # '/bodypix/debug/{}'.format(LaunchConfiguration('resnet50')) ], # Constructs the topic path dynamically ), @@ -56,7 +71,17 @@ def generate_launch_description(): executable='keypoint_relay.py', # Specifying the subdirectory name='keypoint_relay', output='screen', - arguments=[LaunchConfiguration('model')], + arguments=[ + # TextSubstitution(text='/bodypix/debug/'), + # LaunchConfiguration('model') + # '/camera/image_raw /{}'.format(LaunchConfiguration('model')) + '/image_raw ', + PythonExpression([ + "''.join(", # Convert model list to a single string + LaunchConfiguration('model'), + ")" + ]) + ], # Constructs the topic path dynamically], ), diff --git a/common/vision/lasr_vision_bodypix/launch/camera_mask_launch.py b/common/vision/lasr_vision_bodypix/launch/camera_mask_launch.py index 08460b31..935ecf8e 100644 --- a/common/vision/lasr_vision_bodypix/launch/camera_mask_launch.py +++ b/common/vision/lasr_vision_bodypix/launch/camera_mask_launch.py @@ -2,14 +2,20 @@ from launch.actions import DeclareLaunchArgument, IncludeLaunchDescription from launch_ros.actions import Node from launch.launch_description_sources import PythonLaunchDescriptionSource -from launch.substitutions import LaunchConfiguration, TextSubstitution +from launch.substitutions import LaunchConfiguration, TextSubstitution, PythonExpression import os from ament_index_python.packages import get_package_share_directory def generate_launch_description(): # Declare launch arguments model_arg = DeclareLaunchArgument( - 'model', default_value='resnet50', description='Model to use for the demo' + 'model', default_value="['resnet50']", description='Model to use for the demo' + ) + + image_topic_arg = DeclareLaunchArgument( + 'image_topic', + default_value='/image_raw', # Default input image topic + description='Input image topic for keypoint relay' ) # Path to the BodyPix launch file @@ -19,7 +25,7 @@ def generate_launch_description(): # Path to the v4l2_camera launch file (replacement for video_stream_opencv) v4l2_camera_launch_file = os.path.join( - get_package_share_directory('v4l2_camera'), 'launch', 'v4l2_camera_node.launch.py' + get_package_share_directory('lasr_vision_bodypix'), 'launch', 'v4l2_camera_launch.py' ) return LaunchDescription([ @@ -41,18 +47,31 @@ def generate_launch_description(): name='image_view', output='screen', arguments=[ - TextSubstitution(text='/bodypix/debug/'), - LaunchConfiguration('model') + # [TextSubstitution(text='/bodypix/debug/'), LaunchConfiguration('model')]# Topic to visualize + PythonExpression([ + "'/bodypix/debug/' + ", # Static string + "''.join(", # Convert list to string + LaunchConfiguration('model'), + ")" + ]) + # '/bodypix/debug/{}'.format(LaunchConfiguration('resnet50')) ], # Constructs the topic path dynamically ), # Start the keypoint relay service Node( package='lasr_vision_bodypix', - executable='keypoint_relay.py', # Removed .py extension, assuming installed without it - name='keypoint_relay', + executable='mask_relay.py', # Removed .py extension, assuming installed without it + name='mask_relay', output='screen', - arguments=[LaunchConfiguration('model')], + arguments=[ + '/image_raw ', + PythonExpression([ + "''.join(", # Convert model list to a single string + LaunchConfiguration('model'), + ")" + ]) + ], # Constructs the topic path dynamically],], ), # Include the v4l2_camera launch file diff --git a/common/vision/lasr_vision_bodypix/launch/v4l2_camera_launch.py b/common/vision/lasr_vision_bodypix/launch/v4l2_camera_launch.py index b5a74b82..27aeac56 100644 --- a/common/vision/lasr_vision_bodypix/launch/v4l2_camera_launch.py +++ b/common/vision/lasr_vision_bodypix/launch/v4l2_camera_launch.py @@ -9,11 +9,11 @@ def generate_launch_description(): name='v4l2_camera_node', output='screen', parameters=[{ - 'video_device': '/dev/video0', # 摄像头设备路径 - 'image_width': 640, # 图像宽度 - 'image_height': 480, # 图像高度 - 'pixel_format': 'YUYV', # 像素格式 - 'frame_rate': 30, # 帧率 + 'video_device': '/dev/video0', # the video device to open + 'image_width': 640, + 'image_height': 480, + 'pixel_format': 'YUYV', # the pixel format of the image + 'frame_rate': 30, }], ), ]) diff --git a/common/vision/lasr_vision_bodypix/nodes/bodypix_services.py b/common/vision/lasr_vision_bodypix/nodes/bodypix_services.py index 3745b17b..ddc9bf28 100644 --- a/common/vision/lasr_vision_bodypix/nodes/bodypix_services.py +++ b/common/vision/lasr_vision_bodypix/nodes/bodypix_services.py @@ -15,15 +15,18 @@ import numpy as np from cv2_pcl import pcl_to_img_msg from typing import Union +from rcl_interfaces.msg import ParameterDescriptor, ParameterType class BodyPixServiceNode(Node): def __init__(self): super().__init__("bodypix_service_node") # Declare and load parameters - self.declare_parameter('preload', []) + # self.declare_parameter('preload', []) + self.declare_parameter('preload', [''], ParameterDescriptor(type=ParameterType.PARAMETER_STRING_ARRAY)) preload_models = self.get_parameter('preload').get_parameter_value().string_array_value + # Preload models for model in preload_models: bodypix.load_model_cached(model) @@ -35,6 +38,7 @@ def __init__(self): self.keypoint_service = self.create_service( BodyPixKeypointDetection, '/bodypix/keypoint_detection', self.detect_keypoints ) + self.get_logger().info("Keypoint detection service registered.") self.detect_wave_service = self.create_service( DetectWave, '/bodypix/detect_wave', self.detect_wave ) diff --git a/common/vision/lasr_vision_bodypix/setup.py b/common/vision/lasr_vision_bodypix/setup.py index df1ee2e5..255cd85a 100644 --- a/common/vision/lasr_vision_bodypix/setup.py +++ b/common/vision/lasr_vision_bodypix/setup.py @@ -16,6 +16,7 @@ 'bodypix_service_node = lasr_vision_bodypix.nodes.bodypix_services:main', 'bodypix_node = lasr_vision_bodypix.bodypix:main', 'keypoint_relay = lasr_vision_bodypix.examples.keypoint_relay:main', + 'mask_relay = lasr_vision_bodypix.examples.mask_relay:main' ], } ) diff --git a/common/vision/lasr_vision_bodypix/src/lasr_vision_bodypix/__init__.py b/common/vision/lasr_vision_bodypix/src/lasr_vision_bodypix/__init__.py index fb8a7222..46ef209d 100755 --- a/common/vision/lasr_vision_bodypix/src/lasr_vision_bodypix/__init__.py +++ b/common/vision/lasr_vision_bodypix/src/lasr_vision_bodypix/__init__.py @@ -2,5 +2,7 @@ snake_to_camel, camel_to_snake, load_model_cached, - BodyPixNode, + run_inference, + detect_masks, + detect_keypoints, ) diff --git a/common/vision/lasr_vision_bodypix/src/lasr_vision_bodypix/bodypix.py b/common/vision/lasr_vision_bodypix/src/lasr_vision_bodypix/bodypix.py index d6ec521e..84add7cf 100755 --- a/common/vision/lasr_vision_bodypix/src/lasr_vision_bodypix/bodypix.py +++ b/common/vision/lasr_vision_bodypix/src/lasr_vision_bodypix/bodypix.py @@ -58,139 +58,137 @@ def load_model_cached(dataset: str): return model -class BodyPixNode(Node): - def __init__(self): - super().__init__('bodypix_service_node') - self.mask_service = self.create_service( - BodyPixMaskDetection, 'detect_masks', self.detect_masks - ) - self.keypoint_service = self.create_service( - BodyPixKeypointDetection, 'detect_keypoints', self.detect_keypoints - ) - self.debug_publisher_mask = self.create_publisher( - SensorImage, '/bodypix/mask_debug', 1 - ) - self.debug_publisher_keypoint = self.create_publisher( - SensorImage, '/bodypix/keypoint_debug', 1 - ) +def run_inference(dataset: str, confidence: float, img: SensorImage, logger=None): + """ + Run inference on an image. + """ + # Decode the image + if logger: + logger.info("Decoding") + img = cv2_img.msg_to_cv2_img(img) - def run_inference(self, dataset: str, confidence: float, img: SensorImage): - self.get_logger().info("Decoding") - img = cv2_img.msg_to_cv2_img(img) + # Load model + if logger: + logger.info("Loading model") + model = load_model_cached(dataset) - self.get_logger().info("Loading model") - model = load_model_cached(dataset) + # Run inference + if logger: + logger.info("Running inference") + result = model.predict_single(img) - self.get_logger().info("Running inference") - result = model.predict_single(img) + mask = result.get_mask(threshold=confidence) + if logger: + logger.info("Inference complete") - mask = result.get_mask(threshold=confidence) - self.get_logger().info("Inference complete") - return result, mask + return result, mask - def detect_masks( - self, request: BodyPixMaskDetection_Request, response: BodyPixMaskDetection_Response - ) -> BodyPixMaskDetection_Response: - result, mask = self.run_inference(request.dataset, request.confidence, request.image_raw) - masks = [] - for part_name in request.parts: - part_mask = result.get_part_mask(mask=tf.identity(mask), part_names=[part_name]).squeeze() +def detect_masks(request: BodyPixMaskDetection_Request, debug_publisher=None, logger=None): + """ + Run BodyPix inference for mask detection. + """ + result, mask = run_inference(request.dataset, request.confidence, request.image_raw, logger) - if np.max(part_mask) == 0: - self.get_logger().warn(f"No masks found for part {part_name}") - continue + masks = [] - bodypix_mask = BodyPixMask() - bodypix_mask.mask = part_mask.flatten().astype(bool).tolist() - bodypix_mask.shape = list(part_mask.shape) - bodypix_mask.part_name = part_name - masks.append(bodypix_mask) - - # Publish to debug topic - if self.debug_publisher_mask is not None: - from tf_bodypix.draw import draw_poses - coloured_mask = result.get_colored_part_mask(mask).astype(np.uint8) - poses = result.get_poses() - coloured_mask = draw_poses( - coloured_mask.copy(), - poses, - keypoints_color=(255, 100, 100), - skeleton_color=(100, 100, 255), - ) - self.debug_publisher_mask.publish(cv2_img.cv2_img_to_msg(coloured_mask)) + for part_name in request.parts: + part_mask = result.get_part_mask(mask=tf.identity(mask), part_names=[part_name]).squeeze() - response.masks = masks - return response + if np.max(part_mask) == 0: + if logger: + logger.warning(f"No masks found for part {part_name}") + continue - def detect_keypoints( - self, request: BodyPixKeypointDetection_Request, response: BodyPixKeypointDetection_Response - ) -> BodyPixKeypointDetection_Response: - result, mask = self.run_inference(request.dataset, request.confidence, request.image_raw) + bodypix_mask = BodyPixMask() + bodypix_mask.mask = part_mask.flatten().astype(bool).tolist() + bodypix_mask.shape = list(part_mask.shape) + bodypix_mask.part_name = part_name + masks.append(bodypix_mask) + # Publish debug visualization if enabled + if debug_publisher: + from tf_bodypix.draw import draw_poses + + coloured_mask = result.get_colored_part_mask(mask).astype(np.uint8) poses = result.get_poses() - detected_keypoints: List[BodyPixKeypoint] = [] - detected_keypoints_normalized: List[BodyPixKeypointNormalized] = [] - - for pose in poses: - for keypoint in pose.keypoints.values(): - x = int(keypoint.position.x) - y = int(keypoint.position.y) - try: - if not request.keep_out_of_bounds: - if x < 0.0 or y < 0.0: - continue - if x >= mask.shape[1] or y >= mask.shape[0]: - continue - except: - continue - self.get_logger().info(f"Keypoint {keypoint.part} at {x}, {y} is in mask") - detected_keypoints.append( - BodyPixKeypoint(keypoint_name=keypoint.part, x=x, y=y) - ) - detected_keypoints_normalized.append( - BodyPixKeypointNormalized( - keypoint_name=keypoint.part, - x=float(x) / mask.shape[1], - y=float(y) / mask.shape[0], - ) - ) + coloured_mask = draw_poses( + coloured_mask.copy(), + poses, + keypoints_color=(255, 100, 100), + skeleton_color=(100, 100, 255), + ) + debug_publisher.publish(cv2_img.cv2_img_to_msg(coloured_mask)) + + response = BodyPixMaskDetection_Response + response.masks = masks + return response + + +def detect_keypoints(request: BodyPixKeypointDetection_Request, debug_publisher=None, logger=None): + """ + Run BodyPix inference for keypoint detection. + """ + result, mask = run_inference(request.dataset, request.confidence, request.image_raw, logger) + + poses = result.get_poses() + detected_keypoints: List[BodyPixKeypoint] = [] + detected_keypoints_normalized: List[BodyPixKeypointNormalized] = [] + + for pose in poses: + for keypoint in pose.keypoints.values(): + x = int(keypoint.position.x) + y = int(keypoint.position.y) + try: + if not request.keep_out_of_bounds: + if x < 0.0 or y < 0.0: + continue + if x >= mask.shape[1] or y >= mask.shape[0]: + continue + except: + continue + + if logger: + logger.info(f"Keypoint {keypoint.part} at {x}, {y} is in mask") - if self.debug_publisher_keypoint is not None: - from tf_bodypix.draw import draw_poses - coloured_mask = result.get_colored_part_mask(mask).astype(np.uint8) - coloured_mask = draw_poses( - coloured_mask.copy(), - poses, - keypoints_color=(255, 100, 100), - skeleton_color=(100, 100, 255), + detected_keypoints.append( + BodyPixKeypoint(keypoint_name=keypoint.part, x=x, y=y) ) - for keypoint in detected_keypoints: - cv2.putText( - coloured_mask, - f"{keypoint.keypoint_name}", - (int(keypoint.x), int(keypoint.y)), - cv2.FONT_HERSHEY_SIMPLEX, - 0.5, - (255, 255, 255), - 2, - cv2.LINE_AA, + detected_keypoints_normalized.append( + BodyPixKeypointNormalized( + keypoint_name=keypoint.part, + x=float(x) / mask.shape[1], + y=float(y) / mask.shape[0], ) - self.debug_publisher_keypoint.publish(cv2_img.cv2_img_to_msg(coloured_mask)) - - response.keypoints = detected_keypoints - response.normalized_keypoints = detected_keypoints_normalized - return response + ) + # Publish debug visualization if enabled + if debug_publisher: + from tf_bodypix.draw import draw_poses -def main(args=None): - rclpy.init(args=args) - node = BodyPixNode() - rclpy.spin(node) - node.destroy_node() - rclpy.shutdown() + coloured_mask = result.get_colored_part_mask(mask).astype(np.uint8) + coloured_mask = draw_poses( + coloured_mask.copy(), + poses, + keypoints_color=(255, 100, 100), + skeleton_color=(100, 100, 255), + ) + for keypoint in detected_keypoints: + cv2.putText( + coloured_mask, + f"{keypoint.keypoint_name}", + (int(keypoint.x), int(keypoint.y)), + cv2.FONT_HERSHEY_SIMPLEX, + 0.5, + (255, 255, 255), + 2, + cv2.LINE_AA, + ) + debug_publisher.publish(cv2_img.cv2_img_to_msg(coloured_mask)) -if __name__ == "__main__": - main() + response = BodyPixKeypointDetection_Response + response.keypoints = detected_keypoints + response.normalized_keypoints = detected_keypoints_normalized + return response \ No newline at end of file diff --git a/common/vision/lasr_vision_msgs/srv/BodyPixKeypointDetection.srv b/common/vision/lasr_vision_msgs/srv/BodyPixKeypointDetection.srv index 36034b64..2587bd0e 100644 --- a/common/vision/lasr_vision_msgs/srv/BodyPixKeypointDetection.srv +++ b/common/vision/lasr_vision_msgs/srv/BodyPixKeypointDetection.srv @@ -1,3 +1,4 @@ +# Request # Image to run inference on sensor_msgs/Image image_raw @@ -10,6 +11,7 @@ float32 confidence # Whether to return keypoints that are out of bound bool keep_out_of_bounds --- +# Response # keypoints lasr_vision_msgs/BodyPixKeypoint[] keypoints