feat: add yolox type adaptation

Signed-off-by: Yuxuan Liu <[email protected]>
tier4 · Nov 12, 2024 · 79bf927 · 79bf927
1 parent adeeaa1
commit 79bf927
Show file tree

Hide file tree

Showing 9 changed files with 543 additions and 51 deletions.
diff --git a/perception/tensorrt_yolox/CMakeLists.txt b/perception/tensorrt_yolox/CMakeLists.txt
@@ -15,6 +15,7 @@ include(CheckLanguage)
 check_language(CUDA)
 if(CMAKE_CUDA_COMPILER)
   enable_language(CUDA)
+  find_package(autoware_type_adapters)
 else()
   message(WARNING "CUDA is not found. preprocess acceleration using CUDA will not be available.")
 endif()
@@ -32,6 +33,7 @@ ament_auto_add_library(${PROJECT_NAME} SHARED
 
 ament_target_dependencies(${PROJECT_NAME}
   OpenCV
+  autoware_type_adapters
 )
 
 if(CMAKE_CUDA_COMPILER)

diff --git a/perception/tensorrt_yolox/config/yolox_s_plus_opt.param.yaml b/perception/tensorrt_yolox/config/yolox_s_plus_opt.param.yaml
@@ -35,3 +35,6 @@
     clip_value: 6.0 # If positive value is specified, the value of each layer output will be clipped between [0.0, clip_value]. This option is valid only when precision==int8 and used to manually specify the dynamic range instead of using any calibration.
     preprocess_on_gpu: true # If true, pre-processing is performed on GPU.
     calibration_image_list_path: "" # Path to a file which contains path to images. Those images will be used for int8 quantization.
+    is_using_image_transport: true # if true, will use image_transport, false -> will only publish raw images
+    type_adaptation_activated: true # if true, will use type_adaptation to transfer images in GPU.
+    is_publish_debug_rois_image_: true # if true, will draw ROIs in the image.
diff --git a/perception/tensorrt_yolox/config/yolox_tiny.param.yaml b/perception/tensorrt_yolox/config/yolox_tiny.param.yaml
@@ -13,3 +13,4 @@
     clip_value: 0.0 # If positive value is specified, the value of each layer output will be clipped between [0.0, clip_value]. This option is valid only when precision==int8 and used to manually specify the dynamic range instead of using any calibration.
     preprocess_on_gpu: true # If true, pre-processing is performed on GPU.
     calibration_image_list_path: "" # Path to a file which contains path to images. Those images will be used for int8 quantization.
+
diff --git a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp
@@ -25,12 +25,15 @@
 #include <string>
 #include <vector>
 
+#include "type_adapters/image_container.hpp"
+
 namespace tensorrt_yolox
 {
 using cuda_utils::CudaUniquePtr;
 using cuda_utils::CudaUniquePtrHost;
 using cuda_utils::makeCudaStream;
 using cuda_utils::StreamUniquePtr;
+using ImageContainer = autoware::type_adaptation::type_adapters::ImageContainer;
 
 struct Object
 {
@@ -106,6 +109,10 @@ class TrtYoloX
   bool doInference(
     const std::vector<cv::Mat> & images, ObjectArrays & objects, std::vector<cv::Mat> & masks,
     std::vector<cv::Mat> & color_masks);
+
+  bool doInference(
+    const std::vector<std::shared_ptr<ImageContainer>> & images, ObjectArrays & objects, std::vector<cv::Mat> & masks,
+    std::vector<cv::Mat> & color_masks);
 
   /**
    * @brief run inference including pre-process and post-process
@@ -116,6 +123,9 @@ class TrtYoloX
   bool doInferenceWithRoi(
     const std::vector<cv::Mat> & images, ObjectArrays & objects, const std::vector<cv::Rect> & roi);
 
+  bool doInferenceWithRoi(
+    const std::vector<std::shared_ptr<ImageContainer>> & images, ObjectArrays & objects, const std::vector<cv::Rect> & roi);
+
   /**
    * @brief run multi-scale inference including pre-process and post-process
    * @param[out] objects results for object detection
@@ -124,6 +134,9 @@ class TrtYoloX
    */
   bool doMultiScaleInference(
     const cv::Mat & image, ObjectArrays & objects, const std::vector<cv::Rect> & roi);
+
+  bool doMultiScaleInference(
+    const std::shared_ptr<ImageContainer> & image, ObjectArrays & objects, const std::vector<cv::Rect> & roi);
 
   /**
    * @brief allocate buffer for preprocess on GPU
@@ -167,6 +180,7 @@ class TrtYoloX
    * @param[in] images batching images
    */
   void preprocessGpu(const std::vector<cv::Mat> & images);
+  void preprocessGpu(const std::vector<std::shared_ptr<ImageContainer>> & images);
 
   /**
    * @brief run preprocess including resizing, letterbox, NHWC2NCHW and toFloat on CPU
@@ -183,6 +197,8 @@ class TrtYoloX
   void preprocessWithRoiGpu(
     const std::vector<cv::Mat> & images, const std::vector<cv::Rect> & rois);
 
+  void preprocessWithRoiGpu(
+    const std::vector<std::shared_ptr<ImageContainer>> & images, const std::vector<cv::Rect> & rois);
   /**
    * @brief run multi-scale preprocess including resizing, letterbox, NHWC2NCHW and toFloat on CPU
    * @param[in] images batching images
@@ -196,15 +212,26 @@ class TrtYoloX
    * @param[in] rois region of interest
    */
   void multiScalePreprocessGpu(const cv::Mat & image, const std::vector<cv::Rect> & rois);
+  void multiScalePreprocessGpu(const std::shared_ptr<ImageContainer> & image, const std::vector<cv::Rect> & rois);
 
   bool multiScaleFeedforward(const cv::Mat & image, int batch_size, ObjectArrays & objects);
+  bool multiScaleFeedforward(const std::shared_ptr<ImageContainer> & image, int batch_size, ObjectArrays & objects);
+
   bool multiScaleFeedforwardAndDecode(
     const cv::Mat & images, int batch_size, ObjectArrays & objects);
+  bool multiScaleFeedforwardAndDecode(
+    const std::shared_ptr<ImageContainer> & images, int batch_size, ObjectArrays & objects);
 
   bool feedforward(const std::vector<cv::Mat> & images, ObjectArrays & objects);
+  bool feedforward(const std::vector<std::shared_ptr<ImageContainer>> & images, ObjectArrays & objects);
+
   bool feedforwardAndDecode(
     const std::vector<cv::Mat> & images, ObjectArrays & objects, std::vector<cv::Mat> & masks,
     std::vector<cv::Mat> & color_masks);
+  bool feedforwardAndDecode(
+    const std::vector<std::shared_ptr<ImageContainer>> & images, ObjectArrays & objects, std::vector<cv::Mat> & masks,
+    std::vector<cv::Mat> & color_masks);
+
   void decodeOutputs(float * prob, ObjectArray & objects, float scale, cv::Size & img_size) const;
   void generateGridsAndStride(
     const int target_w, const int target_h, const std::vector<int> & strides,

diff --git a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp
@@ -16,6 +16,7 @@
 #define TENSORRT_YOLOX__TENSORRT_YOLOX_NODE_HPP_
 
 #include "object_recognition_utils/object_recognition_utils.hpp"
+#include "type_adapters/image_container.hpp"
 
 #include <autoware/universe_utils/ros/debug_publisher.hpp>
 #include <autoware/universe_utils/system/stop_watch.hpp>
@@ -28,6 +29,7 @@
 #include <std_msgs/msg/header.hpp>
 #include <tier4_perception_msgs/msg/detected_objects_with_feature.hpp>
 #include <tier4_perception_msgs/msg/semantic.hpp>
+#include <tier4_perception_msgs/msg/semantic.hpp>
 
 #if __has_include(<cv_bridge/cv_bridge.hpp>)
 #include <cv_bridge/cv_bridge.hpp>
@@ -48,6 +50,7 @@ namespace tensorrt_yolox
 // cspell: ignore Semseg
 using LabelMap = std::map<int, std::string>;
 using Label = tier4_perception_msgs::msg::Semantic;
+using ImageContainer = autoware::type_adaptation::type_adapters::ImageContainer;
 class TrtYoloXNode : public rclcpp::Node
 {
   struct RoiOverlaySemsegLabel
@@ -74,12 +77,24 @@ class TrtYoloXNode : public rclcpp::Node
 
 private:
   void onConnect();
+  void setUpImageSubscriber();
+  bool checkInputBlocked();
+  uint32_t getNumOutputConnections();
   void onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg);
+  void onGpuImage(std::shared_ptr<ImageContainer> msg);
   bool readLabelFile(const std::string & label_path);
   void replaceLabelMap();
   void overlapSegmentByRoi(
     const tensorrt_yolox::Object & object, cv::Mat & mask, const int width, const int height);
   int mapRoiLabel2SegLabel(const int32_t roi_label_index);
+  void postInference(
+               const std_msgs::msg::Header header,
+               const tensorrt_yolox::ObjectArrays & objects,
+               const int width, const int height,
+               std::vector<cv::Mat> & masks);
+  void drawImageDetection(cv_bridge::CvImagePtr image, 
+        const tensorrt_yolox::ObjectArrays & objects,
+        const int width, const int height);
   image_transport::Publisher image_pub_;
   image_transport::Publisher mask_pub_;
 
@@ -88,13 +103,20 @@ class TrtYoloXNode : public rclcpp::Node
   rclcpp::Publisher<tier4_perception_msgs::msg::DetectedObjectsWithFeature>::SharedPtr objects_pub_;
 
   image_transport::Subscriber image_sub_;
+  rclcpp::Subscription<ImageContainer>::SharedPtr gpu_image_sub_;
+  rclcpp::Publisher<sensor_msgs::msg::Image>::SharedPtr simple_image_pub_;
+  rclcpp::Publisher<sensor_msgs::msg::Image>::SharedPtr simple_mask_pub_;
+  rclcpp::Publisher<sensor_msgs::msg::Image>::SharedPtr simple_color_mask_pub_;
 
   rclcpp::TimerBase::SharedPtr timer_;
 
   LabelMap label_map_;
   std::unique_ptr<tensorrt_yolox::TrtYoloX> trt_yolox_;
   bool is_roi_overlap_segment_;
   bool is_publish_color_mask_;
+  bool is_using_image_transport_;
+  bool type_adaptation_activated_;
+  bool is_publish_debug_rois_image_;
   float overlap_roi_score_threshold_;
   // TODO(badai-nguyen): change to function
   std::map<std::string, int> remap_roi_to_semantic_ = {

diff --git a/perception/tensorrt_yolox/schema/yolox_s_plus_opt.schema.json b/perception/tensorrt_yolox/schema/yolox_s_plus_opt.schema.json
@@ -74,6 +74,21 @@
           "type": "string",
           "default": "",
           "description": "Path to a file which contains path to images. Those images will be used for int8 quantization."
+        },
+        "is_using_image_transport": {
+          "type": "boolean",
+          "default": false,
+          "description": "If true, will use image_transport, false -> will only publish raw images"
+        },
+        "type_adaptation_activated": {
+          "type": "boolean",
+          "default": false,
+          "description": "If true, will use type_adaptation to transfer images in GPU."
+        },
+        "is_publish_debug_rois_image_": {
+          "type": "boolean",
+          "default": true,
+          "description": "If true, will draw ROIs in the image."
         }
       },
       "required": [
Original file line number	Diff line number	Diff line change
Expand Up		@@ -13,3 +13,4 @@
		clip_value: 0.0 # If positive value is specified, the value of each layer output will be clipped between [0.0, clip_value]. This option is valid only when precision==int8 and used to manually specify the dynamic range instead of using any calibration.
		preprocess_on_gpu: true # If true, pre-processing is performed on GPU.
		calibration_image_list_path: "" # Path to a file which contains path to images. Those images will be used for int8 quantization.