diff --git a/guides/ros-quickstart/index.html b/guides/ros-quickstart/index.html index 306eb987f2..59e0d4aea1 100644 --- a/guides/ros-quickstart/index.html +++ b/guides/ros-quickstart/index.html @@ -23,7 +23,7 @@ - +
@@ -550,9 +550,9 @@The following code snippet demonstrates how to use the Ultralytics YOLO package with ROS. In this example, we subscribe to a camera topic, process the incoming image using YOLO, and publish the detected objects to new topics for detection and segmentation.
First, import the necessary libraries and instantiate two models: one for segmentation and one for detection. Initialize a ROS node (with the name ultralytics
) to enable communication with the ROS master. To ensure a stable connection, we include a brief pause, giving the node sufficient time to establish the connection before proceeding.
Initialize two ROS topics: one for detection and one for segmentation. These topics will be used to publish the annotated images, making them accessible for further processing. The communication between nodes is facilitated using sensor_msgs/Image
messages.
det_image_pub = rospy.Publisher("/ultralytics/detection/image", Image, queue_size=5)
-seg_image_pub = rospy.Publisher("/ultralytics/segmentation/image", Image, queue_size=5)
+from sensor_msgs.msg import Image
+
+det_image_pub = rospy.Publisher("/ultralytics/detection/image", Image, queue_size=5)
+seg_image_pub = rospy.Publisher("/ultralytics/segmentation/image", Image, queue_size=5)
Finally, create a subscriber that listens to messages on the /camera/color/image_raw
topic and calls a callback function for each new message. This callback function receives messages of type sensor_msgs/Image
, converts them into a numpy array using ros_numpy
, processes the images with the previously instantiated YOLO models, annotates the images, and then publishes them back to the respective topics: /ultralytics/detection/image
for detection and /ultralytics/segmentation/image
for segmentation.
-def callback(data):
- """Callback function to process image and publish annotated images."""
- array = ros_numpy.numpify(data)
- if det_image_pub.get_num_connections():
- det_result = detection_model(array)
- det_annotated = det_result[0].plot(show=False)
- det_image_pub.publish(ros_numpy.msgify(Image, det_annotated, encoding="rgb8"))
-
- if seg_image_pub.get_num_connections():
- seg_result = segmentation_model(array)
- seg_annotated = seg_result[0].plot(show=False)
- seg_image_pub.publish(ros_numpy.msgify(Image, seg_annotated, encoding="rgb8"))
-
-
-rospy.Subscriber("/camera/color/image_raw", Image, callback)
+import ros_numpy
+
+
+def callback(data):
+ """Callback function to process image and publish annotated images."""
+ array = ros_numpy.numpify(data)
+ if det_image_pub.get_num_connections():
+ det_result = detection_model(array)
+ det_annotated = det_result[0].plot(show=False)
+ det_image_pub.publish(ros_numpy.msgify(Image, det_annotated, encoding="rgb8"))
+
+ if seg_image_pub.get_num_connections():
+ seg_result = segmentation_model(array)
+ seg_annotated = seg_result[0].plot(show=False)
+ seg_image_pub.publish(ros_numpy.msgify(Image, seg_annotated, encoding="rgb8"))
-while True:
- rospy.spin()
+
+rospy.Subscriber("/camera/color/image_raw", Image, callback)
+
+while True:
+ rospy.spin()
Complete code
@@ -1064,7 +1127,7 @@ Publish Detected Classes w
Standard ROS messages also include std_msgs/String
messages. In many applications, it is not necessary to republish the entire annotated image; instead, only the classes present in the robot's view are needed. The following example demonstrates how to use std_msgs/String
messages to republish the detected classes on the /ultralytics/detection/classes
topic. These messages are more lightweight and provide essential information, making them valuable for various applications.
Example Use Case
Consider a warehouse robot equipped with a camera and object detection model. Instead of sending large annotated images over the network, the robot can publish a list of detected classes as std_msgs/String
messages. For instance, when the robot detects objects like "box", "pallet" and "forklift" it publishes these classes to the /ultralytics/detection/classes
topic. This information can then be used by a central monitoring system to track the inventory in real-time, optimize the robot's path planning to avoid obstacles, or trigger specific actions such as picking up a detected box. This approach reduces the bandwidth required for communication and focuses on transmitting critical data.
-Step-by-Step Usage
+String Step-by-Step Usage
This example demonstrates how to use the Ultralytics YOLO package with ROS. In this example, we subscribe to a camera topic, process the incoming image using YOLO, and publish the detected objects to new topic /ultralytics/detection/classes
using std_msgs/String
messages. The ros_numpy
package is used to convert the ROS Image message to a numpy array for processing with YOLO.
import time
@@ -1114,7 +1177,7 @@ Using YOLO with Depth Images
RGB-D Cameras
When working with depth images, it is essential to ensure that the RGB and depth images are correctly aligned. RGB-D cameras, such as the Intel RealSense series, provide synchronized RGB and depth images, making it easier to combine information from both sources. If using separate RGB and depth cameras, it is crucial to calibrate them to ensure accurate alignment.
-Step-by-Step Usage
+Depth Step-by-Step Usage
In this example, we use YOLO to segment an image and apply the extracted mask to segment the object in the depth image. This allows us to determine the distance of each pixel of the object of interest from the camera's focal center. By obtaining this distance information, we can calculate the distance between the camera and the specific object in the scene. Begin by importing the necessary libraries, creating a ROS node, and instantiating a segmentation model and a ROS topic.
Complete code
-
import time
- import numpy as np
- import ros_numpy
- import rospy
- from sensor_msgs.msg import Image
- from std_msgs.msg import String
+import numpy as np
+import ros_numpy
+import rospy
+from sensor_msgs.msg import Image
+from std_msgs.msg import String
- from ultralytics import YOLO
+from ultralytics import YOLO
- rospy.init_node("ultralytics")
- time.sleep(1)
+rospy.init_node("ultralytics")
+time.sleep(1)
- segmentation_model = YOLO("yolov8m-seg.pt")
+segmentation_model = YOLO("yolov8m-seg.pt")
- classes_pub = rospy.Publisher("/ultralytics/detection/distance", String, queue_size=5)
+classes_pub = rospy.Publisher("/ultralytics/detection/distance", String, queue_size=5)
- def callback(data):
- """Callback function to process depth image and RGB image."""
- image = rospy.wait_for_message("/camera/color/image_raw", Image)
- image = ros_numpy.numpify(image)
- depth = ros_numpy.numpify(data)
- result = segmentation_model(image)
+def callback(data):
+ """Callback function to process depth image and RGB image."""
+ image = rospy.wait_for_message("/camera/color/image_raw", Image)
+ image = ros_numpy.numpify(image)
+ depth = ros_numpy.numpify(data)
+ result = segmentation_model(image)
- for index, cls in enumerate(result[0].boxes.cls):
- class_index = int(cls.cpu().numpy())
- name = result[0].names[class_index]
- mask = result[0].masks.data.cpu().numpy()[index, :, :].astype(int)
- obj = depth[mask == 1]
- obj = obj[~np.isnan(obj)]
- avg_distance = np.mean(obj) if len(obj) else np.inf
+ for index, cls in enumerate(result[0].boxes.cls):
+ class_index = int(cls.cpu().numpy())
+ name = result[0].names[class_index]
+ mask = result[0].masks.data.cpu().numpy()[index, :, :].astype(int)
+ obj = depth[mask == 1]
+ obj = obj[~np.isnan(obj)]
+ avg_distance = np.mean(obj) if len(obj) else np.inf
- classes_pub.publish(String(data=str(all_objects)))
+ classes_pub.publish(String(data=str(all_objects)))
- rospy.Subscriber("/camera/depth/image_raw", Image, callback)
+rospy.Subscriber("/camera/depth/image_raw", Image, callback)
- while True:
- rospy.spin()
- ```
-
-## Use Ultralytics with ROS `sensor_msgs/PointCloud2`
-
-<p align="center">
- <img width="100%" src="https://github.com/ultralytics/ultralytics/assets/3855193/f6393fbe-68a5-459b-ae44-3321375bbd3c" alt="Detection and Segmentation in ROS Gazebo">
-</p>
-
-The `sensor_msgs/PointCloud2` [message type](https://docs.ros.org/en/api/sensor_msgs/html/msg/PointCloud2.html) is a data structure used in ROS to represent 3D point cloud data. This message type is integral to robotic applications, enabling tasks such as 3D mapping, object recognition, and localization.
-
-A point cloud is a collection of data points defined within a three-dimensional coordinate system. These data points represent the external surface of an object or a scene, captured via 3D scanning technologies. Each point in the cloud has `X`, `Y`, and `Z` coordinates, which correspond to its position in space, and may also include additional information such as color and intensity.
-
-!!! warning "reference frame"
-
- When working with `sensor_msgs/PointCloud2`, it's essential to consider the reference frame of the sensor from which the point cloud data was acquired. The point cloud is initially captured in the sensor's reference frame. You can determine this reference frame by listening to the `/tf_static` topic. However, depending on your specific application requirements, you might need to convert the point cloud into another reference frame. This transformation can be achieved using the `tf2_ros` package, which provides tools for managing coordinate frames and transforming data between them.
-
-!!! tip "Obtaining Point clouds"
-
- Point Clouds can be obtained using various sensors:
-
- 1. **LIDAR (Light Detection and Ranging)**: Uses laser pulses to measure distances to objects and create high-precision 3D maps.
- 2. **Depth Cameras**: Capture depth information for each pixel, allowing for 3D reconstruction of the scene.
- 3. **Stereo Cameras**: Utilize two or more cameras to obtain depth information through triangulation.
- 4. **Structured Light Scanners**: Project a known pattern onto a surface and measure the deformation to calculate depth.
-
-### Using YOLO with Point Clouds
-
-To integrate YOLO with `sensor_msgs/PointCloud2` type messages, we can employ a method similar to the one used for depth maps. By leveraging the color information embedded in the point cloud, we can extract a 2D image, perform segmentation on this image using YOLO, and then apply the resulting mask to the three-dimensional points to isolate the 3D object of interest.
-
-For handling point clouds, we recommend using Open3D (`pip install open3d`), a user-friendly Python library. Open3D provides robust tools for managing point cloud data structures, visualizing them, and executing complex operations seamlessly. This library can significantly simplify the process and enhance our ability to manipulate and analyze point clouds in conjunction with YOLO-based segmentation.
-
-#### Step-by-Step Usage
-
-Import the necessary libraries and instantiate the YOLO model for segmentation.
-```python
-import time
-
-import rospy
-
-from ultralytics import YOLO
-
-rospy.init_node("ultralytics")
-time.sleep(1)
-segmentation_model = YOLO("yolov8m-seg.pt")
+while True:
+ rospy.spin()
+
+
+Use Ultralytics with ROS sensor_msgs/PointCloud2
+
+
+
+The sensor_msgs/PointCloud2
message type is a data structure used in ROS to represent 3D point cloud data. This message type is integral to robotic applications, enabling tasks such as 3D mapping, object recognition, and localization.
+A point cloud is a collection of data points defined within a three-dimensional coordinate system. These data points represent the external surface of an object or a scene, captured via 3D scanning technologies. Each point in the cloud has X
, Y
, and Z
coordinates, which correspond to its position in space, and may also include additional information such as color and intensity.
+
+Reference frame
+When working with sensor_msgs/PointCloud2
, it's essential to consider the reference frame of the sensor from which the point cloud data was acquired. The point cloud is initially captured in the sensor's reference frame. You can determine this reference frame by listening to the /tf_static
topic. However, depending on your specific application requirements, you might need to convert the point cloud into another reference frame. This transformation can be achieved using the tf2_ros
package, which provides tools for managing coordinate frames and transforming data between them.
+
+
+Obtaining Point clouds
+Point Clouds can be obtained using various sensors:
+
+- LIDAR (Light Detection and Ranging): Uses laser pulses to measure distances to objects and create high-precision 3D maps.
+- Depth Cameras: Capture depth information for each pixel, allowing for 3D reconstruction of the scene.
+- Stereo Cameras: Utilize two or more cameras to obtain depth information through triangulation.
+- Structured Light Scanners: Project a known pattern onto a surface and measure the deformation to calculate depth.
+
+
+Using YOLO with Point Clouds
+To integrate YOLO with sensor_msgs/PointCloud2
type messages, we can employ a method similar to the one used for depth maps. By leveraging the color information embedded in the point cloud, we can extract a 2D image, perform segmentation on this image using YOLO, and then apply the resulting mask to the three-dimensional points to isolate the 3D object of interest.
+For handling point clouds, we recommend using Open3D (pip install open3d
), a user-friendly Python library. Open3D provides robust tools for managing point cloud data structures, visualizing them, and executing complex operations seamlessly. This library can significantly simplify the process and enhance our ability to manipulate and analyze point clouds in conjunction with YOLO-based segmentation.
+Point Clouds Step-by-Step Usage
+Import the necessary libraries and instantiate the YOLO model for segmentation.
+import time
+
+import rospy
+
+from ultralytics import YOLO
+
+rospy.init_node("ultralytics")
+time.sleep(1)
+segmentation_model = YOLO("yolov8m-seg.pt")
Create a function pointcloud2_to_array
, which transforms a sensor_msgs/PointCloud2
message into two numpy arrays. The sensor_msgs/PointCloud2
messages contain n
points based on the width
and height
of the acquired image. For instance, a 480 x 640
image will have 307,200
points. Each point includes three spatial coordinates (xyz
) and the corresponding color in RGB
format. These can be considered as two separate channels of information.
The function returns the xyz
coordinates and RGB
values in the format of the original camera resolution (width x height
). Most sensors have a maximum distance, known as the clip distance, beyond which values are represented as inf (np.inf
). Before processing, it is important to filter out these null values and assign them a value of 0
.
-import numpy as np
-import ros_numpy
-
-
-def pointcloud2_to_array(pointcloud2: PointCloud2) -> tuple:
- """
- Convert a ROS PointCloud2 message to a numpy array.
-
- Args:
- pointcloud2 (PointCloud2): the PointCloud2 message
-
- Returns:
- (tuple): tuple containing (xyz, rgb)
- """
- pc_array = ros_numpy.point_cloud2.pointcloud2_to_array(pointcloud2)
- split = ros_numpy.point_cloud2.split_rgb_field(pc_array)
- rgb = np.stack([split["b"], split["g"], split["r"]], axis=2)
- xyz = ros_numpy.point_cloud2.get_xyz_points(pc_array, remove_nans=False)
- xyz = np.array(xyz).reshape((pointcloud2.height, pointcloud2.width, 3))
- nan_rows = np.isnan(xyz).all(axis=2)
- xyz[nan_rows] = [0, 0, 0]
- rgb[nan_rows] = [0, 0, 0]
- return xyz, rgb
+import numpy as np
+import ros_numpy
+
+
+def pointcloud2_to_array(pointcloud2: PointCloud2) -> tuple:
+ """
+ Convert a ROS PointCloud2 message to a numpy array.
+
+ Args:
+ pointcloud2 (PointCloud2): the PointCloud2 message
+
+ Returns:
+ (tuple): tuple containing (xyz, rgb)
+ """
+ pc_array = ros_numpy.point_cloud2.pointcloud2_to_array(pointcloud2)
+ split = ros_numpy.point_cloud2.split_rgb_field(pc_array)
+ rgb = np.stack([split["b"], split["g"], split["r"]], axis=2)
+ xyz = ros_numpy.point_cloud2.get_xyz_points(pc_array, remove_nans=False)
+ xyz = np.array(xyz).reshape((pointcloud2.height, pointcloud2.width, 3))
+ nan_rows = np.isnan(xyz).all(axis=2)
+ xyz[nan_rows] = [0, 0, 0]
+ rgb[nan_rows] = [0, 0, 0]
+ return xyz, rgb
Next, subscribe to the /camera/depth/points
topic to receive the point cloud message and convert the sensor_msgs/PointCloud2
message into numpy arrays containing the XYZ coordinates and RGB values (using the pointcloud2_to_array
function). Process the RGB image using the YOLO model to extract segmented objects. For each detected object, extract the segmentation mask and apply it to both the RGB image and the XYZ coordinates to isolate the object in 3D space.
Processing the mask is straightforward since it consists of binary values, with 1
indicating the presence of the object and 0
indicating the absence. To apply the mask, simply multiply the original channels by the mask. This operation effectively isolates the object of interest within the image. Finally, create an Open3D point cloud object and visualize the segmented object in 3D space with associated colors.
-import sys
-
-import open3d as o3d
-
-ros_cloud = rospy.wait_for_message("/camera/depth/points", PointCloud2)
-xyz, rgb = pointcloud2_to_array(ros_cloud)
-result = segmentation_model(rgb)
-
-if not len(result[0].boxes.cls):
- print("No objects detected")
- sys.exit()
-
-classes = result[0].boxes.cls.cpu().numpy().astype(int)
-for index, class_id in enumerate(classes):
- mask = result[0].masks.data.cpu().numpy()[index, :, :].astype(int)
- mask_expanded = np.stack([mask, mask, mask], axis=2)
-
- obj_rgb = rgb * mask_expanded
- obj_xyz = xyz * mask_expanded
-
- pcd = o3d.geometry.PointCloud()
- pcd.points = o3d.utility.Vector3dVector(obj_xyz.reshape((ros_cloud.height * ros_cloud.width, 3)))
- pcd.colors = o3d.utility.Vector3dVector(obj_rgb.reshape((ros_cloud.height * ros_cloud.width, 3)) / 255)
- o3d.visualization.draw_geometries([pcd])
+import sys
+
+import open3d as o3d
+
+ros_cloud = rospy.wait_for_message("/camera/depth/points", PointCloud2)
+xyz, rgb = pointcloud2_to_array(ros_cloud)
+result = segmentation_model(rgb)
+
+if not len(result[0].boxes.cls):
+ print("No objects detected")
+ sys.exit()
+
+classes = result[0].boxes.cls.cpu().numpy().astype(int)
+for index, class_id in enumerate(classes):
+ mask = result[0].masks.data.cpu().numpy()[index, :, :].astype(int)
+ mask_expanded = np.stack([mask, mask, mask], axis=2)
+
+ obj_rgb = rgb * mask_expanded
+ obj_xyz = xyz * mask_expanded
+
+ pcd = o3d.geometry.PointCloud()
+ pcd.points = o3d.utility.Vector3dVector(obj_xyz.reshape((ros_cloud.height * ros_cloud.width, 3)))
+ pcd.colors = o3d.utility.Vector3dVector(obj_rgb.reshape((ros_cloud.height * ros_cloud.width, 3)) / 255)
+ o3d.visualization.draw_geometries([pcd])
Complete code
-
-import sys
-import time
-
- import numpy as np
- import open3d as o3d
- import ros_numpy
- import rospy
-
- from ultralytics import YOLO
-
- rospy.init_node("ultralytics")
- time.sleep(1)
- segmentation_model = YOLO("yolov8m-seg.pt")
-
-
- def pointcloud2_to_array(pointcloud2: PointCloud2) -> tuple:
- """
- Convert a ROS PointCloud2 message to a numpy array.
-
- Args:
- pointcloud2 (PointCloud2): the PointCloud2 message
-
- Returns:
- (tuple): tuple containing (xyz, rgb)
- """
- pc_array = ros_numpy.point_cloud2.pointcloud2_to_array(pointcloud2)
- split = ros_numpy.point_cloud2.split_rgb_field(pc_array)
- rgb = np.stack([split["b"], split["g"], split["r"]], axis=2)
- xyz = ros_numpy.point_cloud2.get_xyz_points(pc_array, remove_nans=False)
- xyz = np.array(xyz).reshape((pointcloud2.height, pointcloud2.width, 3))
- nan_rows = np.isnan(xyz).all(axis=2)
- xyz[nan_rows] = [0, 0, 0]
- rgb[nan_rows] = [0, 0, 0]
- return xyz, rgb
-
-
- ros_cloud = rospy.wait_for_message("/camera/depth/points", PointCloud2)
- xyz, rgb = pointcloud2_to_array(ros_cloud)
- result = segmentation_model(rgb)
-
- if not len(result[0].boxes.cls):
- print("No objects detected")
- sys.exit()
-
- classes = result[0].boxes.cls.cpu().numpy().astype(int)
- for index, class_id in enumerate(classes):
- mask = result[0].masks.data.cpu().numpy()[index, :, :].astype(int)
- mask_expanded = np.stack([mask, mask, mask], axis=2)
-
- obj_rgb = rgb * mask_expanded
- obj_xyz = xyz * mask_expanded
-
- pcd = o3d.geometry.PointCloud()
- pcd.points = o3d.utility.Vector3dVector(obj_xyz.reshape((ros_cloud.height * ros_cloud.width, 3)))
- pcd.colors = o3d.utility.Vector3dVector(obj_rgb.reshape((ros_cloud.height * ros_cloud.width, 3)) / 255)
- o3d.visualization.draw_geometries([pcd])
- ```
-
-<p align="center">
- <img width="100%" src="https://github.com/ultralytics/ultralytics/assets/3855193/3caafc4a-0edd-4e5f-8dd1-37e30be70123" alt="Point Cloud Segmentation with Ultralytics ">
-</p>
+import sys
+import time
+
+import numpy as np
+import open3d as o3d
+import ros_numpy
+import rospy
+
+from ultralytics import YOLO
+
+rospy.init_node("ultralytics")
+time.sleep(1)
+segmentation_model = YOLO("yolov8m-seg.pt")
+
+
+def pointcloud2_to_array(pointcloud2: PointCloud2) -> tuple:
+ """
+ Convert a ROS PointCloud2 message to a numpy array.
+
+ Args:
+ pointcloud2 (PointCloud2): the PointCloud2 message
+
+ Returns:
+ (tuple): tuple containing (xyz, rgb)
+ """
+ pc_array = ros_numpy.point_cloud2.pointcloud2_to_array(pointcloud2)
+ split = ros_numpy.point_cloud2.split_rgb_field(pc_array)
+ rgb = np.stack([split["b"], split["g"], split["r"]], axis=2)
+ xyz = ros_numpy.point_cloud2.get_xyz_points(pc_array, remove_nans=False)
+ xyz = np.array(xyz).reshape((pointcloud2.height, pointcloud2.width, 3))
+ nan_rows = np.isnan(xyz).all(axis=2)
+ xyz[nan_rows] = [0, 0, 0]
+ rgb[nan_rows] = [0, 0, 0]
+ return xyz, rgb
+
+
+ros_cloud = rospy.wait_for_message("/camera/depth/points", PointCloud2)
+xyz, rgb = pointcloud2_to_array(ros_cloud)
+result = segmentation_model(rgb)
+
+if not len(result[0].boxes.cls):
+ print("No objects detected")
+ sys.exit()
+
+classes = result[0].boxes.cls.cpu().numpy().astype(int)
+for index, class_id in enumerate(classes):
+ mask = result[0].masks.data.cpu().numpy()[index, :, :].astype(int)
+ mask_expanded = np.stack([mask, mask, mask], axis=2)
+
+ obj_rgb = rgb * mask_expanded
+ obj_xyz = xyz * mask_expanded
+
+ pcd = o3d.geometry.PointCloud()
+ pcd.points = o3d.utility.Vector3dVector(obj_xyz.reshape((ros_cloud.height * ros_cloud.width, 3)))
+ pcd.colors = o3d.utility.Vector3dVector(obj_rgb.reshape((ros_cloud.height * ros_cloud.width, 3)) / 255)
+ o3d.visualization.draw_geometries([pcd])
+
+
+
+
@@ -1373,7 +1425,7 @@ Step-by-Step Usage
-
+