Skip to content

Commit

Permalink
feat: add yolox type adaptation
Browse files Browse the repository at this point in the history
Signed-off-by: Yuxuan Liu <[email protected]>
  • Loading branch information
HinsRyu committed Nov 12, 2024
1 parent adeeaa1 commit 79bf927
Show file tree
Hide file tree
Showing 9 changed files with 543 additions and 51 deletions.
2 changes: 2 additions & 0 deletions perception/tensorrt_yolox/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ include(CheckLanguage)
check_language(CUDA)
if(CMAKE_CUDA_COMPILER)
enable_language(CUDA)
find_package(autoware_type_adapters)
else()
message(WARNING "CUDA is not found. preprocess acceleration using CUDA will not be available.")
endif()
Expand All @@ -32,6 +33,7 @@ ament_auto_add_library(${PROJECT_NAME} SHARED

ament_target_dependencies(${PROJECT_NAME}
OpenCV
autoware_type_adapters
)

if(CMAKE_CUDA_COMPILER)
Expand Down
3 changes: 3 additions & 0 deletions perception/tensorrt_yolox/config/yolox_s_plus_opt.param.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,6 @@
clip_value: 6.0 # If positive value is specified, the value of each layer output will be clipped between [0.0, clip_value]. This option is valid only when precision==int8 and used to manually specify the dynamic range instead of using any calibration.
preprocess_on_gpu: true # If true, pre-processing is performed on GPU.
calibration_image_list_path: "" # Path to a file which contains path to images. Those images will be used for int8 quantization.
is_using_image_transport: true # if true, will use image_transport, false -> will only publish raw images
type_adaptation_activated: true # if true, will use type_adaptation to transfer images in GPU.
is_publish_debug_rois_image_: true # if true, will draw ROIs in the image.
1 change: 1 addition & 0 deletions perception/tensorrt_yolox/config/yolox_tiny.param.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@
clip_value: 0.0 # If positive value is specified, the value of each layer output will be clipped between [0.0, clip_value]. This option is valid only when precision==int8 and used to manually specify the dynamic range instead of using any calibration.
preprocess_on_gpu: true # If true, pre-processing is performed on GPU.
calibration_image_list_path: "" # Path to a file which contains path to images. Those images will be used for int8 quantization.

Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,15 @@
#include <string>
#include <vector>

#include "type_adapters/image_container.hpp"

namespace tensorrt_yolox
{
using cuda_utils::CudaUniquePtr;
using cuda_utils::CudaUniquePtrHost;
using cuda_utils::makeCudaStream;
using cuda_utils::StreamUniquePtr;
using ImageContainer = autoware::type_adaptation::type_adapters::ImageContainer;

struct Object
{
Expand Down Expand Up @@ -106,6 +109,10 @@ class TrtYoloX
bool doInference(
const std::vector<cv::Mat> & images, ObjectArrays & objects, std::vector<cv::Mat> & masks,
std::vector<cv::Mat> & color_masks);

bool doInference(
const std::vector<std::shared_ptr<ImageContainer>> & images, ObjectArrays & objects, std::vector<cv::Mat> & masks,
std::vector<cv::Mat> & color_masks);

/**
* @brief run inference including pre-process and post-process
Expand All @@ -116,6 +123,9 @@ class TrtYoloX
bool doInferenceWithRoi(
const std::vector<cv::Mat> & images, ObjectArrays & objects, const std::vector<cv::Rect> & roi);

bool doInferenceWithRoi(
const std::vector<std::shared_ptr<ImageContainer>> & images, ObjectArrays & objects, const std::vector<cv::Rect> & roi);

/**
* @brief run multi-scale inference including pre-process and post-process
* @param[out] objects results for object detection
Expand All @@ -124,6 +134,9 @@ class TrtYoloX
*/
bool doMultiScaleInference(
const cv::Mat & image, ObjectArrays & objects, const std::vector<cv::Rect> & roi);

bool doMultiScaleInference(
const std::shared_ptr<ImageContainer> & image, ObjectArrays & objects, const std::vector<cv::Rect> & roi);

/**
* @brief allocate buffer for preprocess on GPU
Expand Down Expand Up @@ -167,6 +180,7 @@ class TrtYoloX
* @param[in] images batching images
*/
void preprocessGpu(const std::vector<cv::Mat> & images);
void preprocessGpu(const std::vector<std::shared_ptr<ImageContainer>> & images);

/**
* @brief run preprocess including resizing, letterbox, NHWC2NCHW and toFloat on CPU
Expand All @@ -183,6 +197,8 @@ class TrtYoloX
void preprocessWithRoiGpu(
const std::vector<cv::Mat> & images, const std::vector<cv::Rect> & rois);

void preprocessWithRoiGpu(
const std::vector<std::shared_ptr<ImageContainer>> & images, const std::vector<cv::Rect> & rois);
/**
* @brief run multi-scale preprocess including resizing, letterbox, NHWC2NCHW and toFloat on CPU
* @param[in] images batching images
Expand All @@ -196,15 +212,26 @@ class TrtYoloX
* @param[in] rois region of interest
*/
void multiScalePreprocessGpu(const cv::Mat & image, const std::vector<cv::Rect> & rois);
void multiScalePreprocessGpu(const std::shared_ptr<ImageContainer> & image, const std::vector<cv::Rect> & rois);

bool multiScaleFeedforward(const cv::Mat & image, int batch_size, ObjectArrays & objects);
bool multiScaleFeedforward(const std::shared_ptr<ImageContainer> & image, int batch_size, ObjectArrays & objects);

bool multiScaleFeedforwardAndDecode(
const cv::Mat & images, int batch_size, ObjectArrays & objects);
bool multiScaleFeedforwardAndDecode(
const std::shared_ptr<ImageContainer> & images, int batch_size, ObjectArrays & objects);

bool feedforward(const std::vector<cv::Mat> & images, ObjectArrays & objects);
bool feedforward(const std::vector<std::shared_ptr<ImageContainer>> & images, ObjectArrays & objects);

bool feedforwardAndDecode(
const std::vector<cv::Mat> & images, ObjectArrays & objects, std::vector<cv::Mat> & masks,
std::vector<cv::Mat> & color_masks);
bool feedforwardAndDecode(
const std::vector<std::shared_ptr<ImageContainer>> & images, ObjectArrays & objects, std::vector<cv::Mat> & masks,
std::vector<cv::Mat> & color_masks);

void decodeOutputs(float * prob, ObjectArray & objects, float scale, cv::Size & img_size) const;
void generateGridsAndStride(
const int target_w, const int target_h, const std::vector<int> & strides,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#define TENSORRT_YOLOX__TENSORRT_YOLOX_NODE_HPP_

#include "object_recognition_utils/object_recognition_utils.hpp"
#include "type_adapters/image_container.hpp"

#include <autoware/universe_utils/ros/debug_publisher.hpp>
#include <autoware/universe_utils/system/stop_watch.hpp>
Expand All @@ -28,6 +29,7 @@
#include <std_msgs/msg/header.hpp>
#include <tier4_perception_msgs/msg/detected_objects_with_feature.hpp>
#include <tier4_perception_msgs/msg/semantic.hpp>
#include <tier4_perception_msgs/msg/semantic.hpp>

#if __has_include(<cv_bridge/cv_bridge.hpp>)
#include <cv_bridge/cv_bridge.hpp>
Expand All @@ -48,6 +50,7 @@ namespace tensorrt_yolox
// cspell: ignore Semseg
using LabelMap = std::map<int, std::string>;
using Label = tier4_perception_msgs::msg::Semantic;
using ImageContainer = autoware::type_adaptation::type_adapters::ImageContainer;
class TrtYoloXNode : public rclcpp::Node
{
struct RoiOverlaySemsegLabel
Expand All @@ -74,12 +77,24 @@ class TrtYoloXNode : public rclcpp::Node

private:
void onConnect();
void setUpImageSubscriber();
bool checkInputBlocked();
uint32_t getNumOutputConnections();
void onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg);
void onGpuImage(std::shared_ptr<ImageContainer> msg);
bool readLabelFile(const std::string & label_path);
void replaceLabelMap();
void overlapSegmentByRoi(
const tensorrt_yolox::Object & object, cv::Mat & mask, const int width, const int height);
int mapRoiLabel2SegLabel(const int32_t roi_label_index);
void postInference(
const std_msgs::msg::Header header,
const tensorrt_yolox::ObjectArrays & objects,
const int width, const int height,
std::vector<cv::Mat> & masks);
void drawImageDetection(cv_bridge::CvImagePtr image,
const tensorrt_yolox::ObjectArrays & objects,
const int width, const int height);
image_transport::Publisher image_pub_;
image_transport::Publisher mask_pub_;

Expand All @@ -88,13 +103,20 @@ class TrtYoloXNode : public rclcpp::Node
rclcpp::Publisher<tier4_perception_msgs::msg::DetectedObjectsWithFeature>::SharedPtr objects_pub_;

image_transport::Subscriber image_sub_;
rclcpp::Subscription<ImageContainer>::SharedPtr gpu_image_sub_;
rclcpp::Publisher<sensor_msgs::msg::Image>::SharedPtr simple_image_pub_;
rclcpp::Publisher<sensor_msgs::msg::Image>::SharedPtr simple_mask_pub_;
rclcpp::Publisher<sensor_msgs::msg::Image>::SharedPtr simple_color_mask_pub_;

rclcpp::TimerBase::SharedPtr timer_;

LabelMap label_map_;
std::unique_ptr<tensorrt_yolox::TrtYoloX> trt_yolox_;
bool is_roi_overlap_segment_;
bool is_publish_color_mask_;
bool is_using_image_transport_;
bool type_adaptation_activated_;
bool is_publish_debug_rois_image_;
float overlap_roi_score_threshold_;
// TODO(badai-nguyen): change to function
std::map<std::string, int> remap_roi_to_semantic_ = {
Expand Down
15 changes: 15 additions & 0 deletions perception/tensorrt_yolox/schema/yolox_s_plus_opt.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,21 @@
"type": "string",
"default": "",
"description": "Path to a file which contains path to images. Those images will be used for int8 quantization."
},
"is_using_image_transport": {
"type": "boolean",
"default": false,
"description": "If true, will use image_transport, false -> will only publish raw images"
},
"type_adaptation_activated": {
"type": "boolean",
"default": false,
"description": "If true, will use type_adaptation to transfer images in GPU."
},
"is_publish_debug_rois_image_": {
"type": "boolean",
"default": true,
"description": "If true, will draw ROIs in the image."
}
},
"required": [
Expand Down
Loading

0 comments on commit 79bf927

Please sign in to comment.