kornia · edgarriba · Sep 8, 2024 · Sep 8, 2024 · Sep 8, 2024 · Sep 8, 2024
diff --git a/Cargo.toml b/Cargo.toml
@@ -2,6 +2,7 @@
 resolver = "2"
 members = [
     "crates/kornia-core",
+    "crates/kornia-dnn",
     "crates/kornia-image",
     "crates/kornia-io",
     "crates/kornia-imgproc",
@@ -26,6 +27,7 @@ version = "0.1.6+dev"
 
 [workspace.dependencies]
 kornia-core = { path = "crates/kornia-core", version = "0.1.6+dev" }
+kornia-dnn = { path = "crates/kornia-dnn", version = "0.1.6+dev" }
 kornia-image = { path = "crates/kornia-image", version = "0.1.6+dev" }
 kornia-io = { path = "crates/kornia-io", version = "0.1.6+dev" }
 kornia-imgproc = { path = "crates/kornia-imgproc", version = "0.1.6+dev" }

diff --git a/crates/kornia-dnn/Cargo.toml b/crates/kornia-dnn/Cargo.toml
@@ -0,0 +1,22 @@
+[package]
+name = "kornia-dnn"
+authors.workspace = true
+categories.workspace = true
+description.workspace = true
+edition.workspace = true
+homepage.workspace = true
+include.workspace = true
+license.workspace = true
+license-file.workspace = true
+readme.workspace = true
+repository.workspace = true
+rust-version.workspace = true
+version.workspace = true
+
+[dependencies]
+kornia-core = { workspace = true }
+kornia-image = { workspace = true }
+ort = { version = "2.0.0-rc.4", features = [
+    "load-dynamic",
+], default-features = false }
+thiserror = "1"
diff --git a/crates/kornia-dnn/src/error.rs b/crates/kornia-dnn/src/error.rs
@@ -0,0 +1,11 @@
+#[derive(thiserror::Error, Debug)]
+pub enum DnnError {
+    #[error("Failed to load model")]
+    OrtError(#[from] ort::Error),
+
+    #[error("Image error")]
+    ImageError(#[from] kornia_image::ImageError),
+
+    #[error("Tensor error")]
+    TensorError(#[from] kornia_core::TensorError),
+}
diff --git a/crates/kornia-dnn/src/lib.rs b/crates/kornia-dnn/src/lib.rs
@@ -0,0 +1,27 @@
+//! # Kornia DNN
+//!
+//! This module contains DNN (Deep Neural Network) related functionality.
+
+/// Error type for the dnn module.
+pub mod error;
+
+/// This module contains the RT-DETR model.
+pub mod rtdetr;
+
+// TODO: put this in to some sort of structs pool module
+/// Represents a detected object in an image.
+#[derive(Debug)]
+pub struct Detection {
+    /// The class label of the detected object.
+    pub label: u32,
+    /// The confidence score of the detection (typically between 0 and 1).
+    pub score: f32,
+    /// The x-coordinate of the top-left corner of the bounding box.
+    pub x: f32,
+    /// The y-coordinate of the top-left corner of the bounding box.
+    pub y: f32,
+    /// The width of the bounding box.
+    pub w: f32,
+    /// The height of the bounding box.
+    pub h: f32,
+}
diff --git a/crates/kornia-dnn/src/rtdetr.rs b/crates/kornia-dnn/src/rtdetr.rs
@@ -0,0 +1,174 @@
+//! # RT-DETR
+//!
+//! This module contains the RT-DETR model.
+//!
+//! The RT-DETR model is a state-of-the-art object detection model.
+
+use std::path::PathBuf;
+
+use crate::error::DnnError;
+use crate::Detection;
+use kornia_core::{CpuAllocator, Tensor};
+use kornia_image::Image;
+use ort::{GraphOptimizationLevel, Session};
+
+/// Builder for the RT-DETR detector.
+///
+/// This struct provides a convenient way to configure and create an `RTDETRDetector` instance.
+pub struct RTDETRDetectorBuilder {
+    /// Path to the RT-DETR model file.
+    pub model_path: PathBuf,
+    /// Path to the ONNX Runtime dynamic library.
+    pub ort_dylib_path: PathBuf,
+    /// Number of threads to use for inference.
+    pub num_threads: usize,
+}
+
+impl RTDETRDetectorBuilder {
+    /// Creates a new `RTDETRDetectorBuilder` with default settings.
+    ///
+    /// # Arguments
+    ///
+    /// * `model_path` - Path to the RT-DETR model file.
+    /// * `ort_dylib_path` - Path to the ONNX Runtime dynamic library.
+    ///
+    /// # Returns
+    ///
+    /// A `Result` containing the `RTDETRDetectorBuilder` if successful, or a `DnnError` if an error occurred.
+    pub fn new(model_path: PathBuf, ort_dylib_path: PathBuf) -> Result<Self, DnnError> {
+        Ok(Self {
+            model_path,
+            ort_dylib_path,
+            num_threads: 4,
+        })
+    }
+
+    /// Sets the number of threads to use for inference.
+    ///
+    /// # Arguments
+    ///
+    /// * `num_threads` - The number of threads to use.
+    ///
+    /// # Returns
+    ///
+    /// The updated `RTDETRDetectorBuilder` instance.
+    pub fn with_num_threads(mut self, num_threads: usize) -> Self {
+        self.num_threads = num_threads;
+        self
+    }
+
+    /// Builds and returns an `RTDETRDetector` instance.
+    ///
+    /// # Returns
+    ///
+    /// A `Result` containing the `RTDETRDetector` if successful, or a `DnnError` if an error occurred.
+    pub fn build(self) -> Result<RTDETRDetector, DnnError> {
+        RTDETRDetector::new(self.model_path, self.ort_dylib_path, self.num_threads)
+    }
+}
+
+/// RT-DETR object detector.
+///
+/// This struct represents an instance of the RT-DETR object detection model.
+pub struct RTDETRDetector {
+    session: Session,
+}
+
+impl RTDETRDetector {
+    // TODO: default to hf hub
+    /// Creates a new `RTDETRDetector` instance.
+    ///
+    /// # Arguments
+    ///
+    /// * `model_path` - Path to the RT-DETR model file.
+    /// * `ort_dylib_path` - Path to the ONNX Runtime dynamic library.
+    /// * `num_threads` - Number of threads to use for inference.
+    ///
+    /// # Returns
+    ///
+    /// A `Result` containing the `RTDETRDetector` if successful, or a `DnnError` if an error occurred.
+    pub fn new(
+        model_path: PathBuf,
+        ort_dylib_path: PathBuf,
+        num_threads: usize,
+    ) -> Result<Self, DnnError> {
+        // set the ort dylib path
+        std::env::set_var("ORT_DYLIB_PATH", ort_dylib_path);
+
+        // create the ort session
+        let session = Session::builder()?
+            .with_optimization_level(GraphOptimizationLevel::Level3)?
+            .with_intra_threads(num_threads)?
+            .commit_from_file(model_path)?;
+
+        Ok(Self { session })
+    }
+
+    /// Runs object detection on the given image.
+    ///
+    /// # Arguments
+    ///
+    /// * `image` - The input image as an `Image<u8, 3>`.
+    ///
+    /// # Returns
+    ///
+    /// A `Result` containing a vector of `Detection` objects if successful, or a `DnnError` if an error occurred.
+    pub fn run(&self, image: &Image<u8, 3>) -> Result<Vec<Detection>, DnnError> {
+        // TODO: explore pre-allocating memory for the image
+        // cast and scale the image to f32
+        let mut image_hwc_f32 = Image::from_size_val(image.size(), 0.0f32)?;
+        kornia_image::ops::cast_and_scale(image, &mut image_hwc_f32, 1.0 / 255.)?;
+
+        // convert to HWC -> CHW
+        let image_chw = image_hwc_f32.permute_axes([2, 0, 1]).as_contiguous();
+
+        // TODO: create a Tensor::insert_axis in kornia-rs
+        let image_nchw = Tensor::from_shape_vec(
+            [
+                1,
+                image_chw.shape[0],
+                image_chw.shape[1],
+                image_chw.shape[2],
+            ],
+            image_chw.into_vec(),
+            CpuAllocator,
+        )?;
+
+        // make the ort tensor
+        let ort_tensor = ort::Tensor::from_array((image_nchw.shape, image_nchw.into_vec()))?;
+
+        // run the model
+        let outputs = self.session.run(ort::inputs!["input" => ort_tensor]?)?;
+
+        // extract the output tensor
+        let (out_shape, out_ort) = outputs[0].try_extract_raw_tensor::<f32>()?;
+
+        let out_tensor = Tensor::<f32, 3>::from_shape_vec(
+            [
+                out_shape[0] as usize,
+                out_shape[1] as usize,
+                out_shape[2] as usize,
+            ],
+            out_ort.to_vec(),
+            CpuAllocator,
+        )?;
+
+        // parse the output tensor
+        // we expect the output tensor to be a tensor of shape [1, N, 6]
+        // where each element is a detection [label, score, x, y, w, h]
+        let detections = out_tensor
+            .as_slice()
+            .chunks_exact(6)
+            .map(|chunk| Detection {
+                label: chunk[0] as u32,
+                score: chunk[1],
+                x: chunk[2],
+                y: chunk[3],
+                w: chunk[4],
+                h: chunk[5],
+            })
+            .collect::<Vec<_>>();
+
+        Ok(detections)
+    }
+}
diff --git a/crates/kornia/Cargo.toml b/crates/kornia/Cargo.toml
@@ -17,6 +17,7 @@ jpegturbo = ["kornia-io/jpegturbo"]
 
 [dependencies]
 kornia-core.workspace = true
+kornia-dnn.workspace = true
 kornia-image.workspace = true
 kornia-imgproc.workspace = true
 kornia-io = { workspace = true, features = [] }
diff --git a/crates/kornia/src/lib.rs b/crates/kornia/src/lib.rs
@@ -1,6 +1,9 @@
 #[doc(inline)]
 pub use kornia_core as core;
 
+#[doc(inline)]
+pub use kornia_dnn as dnn;
+
 #[doc(inline)]
 pub use kornia_image as image;
 

diff --git a/examples/rtdetr/Cargo.toml b/examples/rtdetr/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+name = "rtdetr"
+version = "0.1.0"
+authors = ["Edgar Riba <[email protected]>"]
+license = "Apache-2.0"
+edition = "2021"
+publish = false
+
+[dependencies]
+clap = { version = "4.5.4", features = ["derive"] }
+ctrlc = "3.4.4"
+kornia = { workspace = true, features = ["gstreamer"] }
+rerun = "0.18"
+tokio = { version = "1" }
diff --git a/examples/rtdetr/README.md b/examples/rtdetr/README.md
@@ -0,0 +1,28 @@
+An example showing how to use the RTDETR model with the `kornia::dnn` module and the webcam with the `kornia::io` module with the ability to cancel the feed after a certain amount of time. This example will display the webcam feed in a [`rerun`](https://github.com/rerun-io/rerun) window.
+
+NOTE: This example requires the gstremer backend to be enabled. To enable the gstreamer backend, use the `gstreamer` feature flag when building the `kornia` crate and its dependencies.
+
+## Prerequisites
+
+Maily you need to download onnxruntime from: <https://github.com/microsoft/onnxruntime/releases>
+
+## Usage
+
+```bash
+Usage: rtdetr [OPTIONS] --model-path <MODEL_PATH> --ort-dylib-path <ORT_DYLIB_PATH>
+
+Options:
+  -c, --camera-id <CAMERA_ID>              [default: 0]
+  -f, --fps <FPS>                          [default: 5]
+  -m, --model-path <MODEL_PATH>
+  -o, --ort-dylib-path <ORT_DYLIB_PATH>
+  -n, --num-threads <NUM_THREADS>          [default: 8]
+  -s, --score-threshold <SCORE_THRESHOLD>  [default: 0.75]
+  -h, --help                               Print help
+```
+
+Example:
+
+```bash
+cargo run --bin rtdetr --release -- --camera-id 0 --model-path rtdetr.onnx --ort-dylib-path /path/to/libonnxruntime.so --num-threads 8 --score-threshold 0.75
+```