kornia · edgarriba · Sep 28, 2024 · Sep 22, 2024 · Sep 22, 2024 · Sep 25, 2024
diff --git a/crates/kornia-core/src/serde.rs b/crates/kornia-core/src/serde.rs
@@ -7,9 +7,10 @@ use crate::{
 use serde::ser::SerializeStruct;
 use serde::Deserialize;
 
-impl<T, const N: usize, A: TensorAllocator> serde::Serialize for Tensor<T, N, A>
+impl<T, const N: usize, A> serde::Serialize for Tensor<T, N, A>
 where
     T: serde::Serialize + SafeTensorType,
+    A: TensorAllocator + 'static,
 {
     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
     where
@@ -23,7 +24,7 @@ where
     }
 }
 
-impl<'de, T, const N: usize, A: TensorAllocator + Default> serde::Deserialize<'de>
+impl<'de, T, const N: usize, A: TensorAllocator + Default + 'static> serde::Deserialize<'de>
     for Tensor<T, N, A>
 where
     T: serde::Deserialize<'de> + SafeTensorType,

diff --git a/crates/kornia-core/src/storage.rs b/crates/kornia-core/src/storage.rs
@@ -18,6 +18,30 @@ impl SafeTensorType for i64 {}
 impl SafeTensorType for f32 {}
 impl SafeTensorType for f64 {}
 
+/// Represents the owner of custom Arrow Buffer memory allocations.
+///
+/// This struct is used to facilitate the automatic deallocation of the memory it owns,
+/// using the `Drop` trait.
+pub struct TensorCustomAllocationOwner<A: TensorAllocator> {
+    /// The allocator used to allocate the tensor storage.
+    alloc: A,
+    /// The layout used for the allocation.
+    layout: Layout,
+    /// The pointer to the allocated memory
+    ptr: *const u8,
+}
+
+// SAFETY: TensorCustomAllocationOwner is never modifed from multiple threads.
+impl<A: TensorAllocator> std::panic::RefUnwindSafe for TensorCustomAllocationOwner<A> {}
+unsafe impl<A: TensorAllocator> Sync for TensorCustomAllocationOwner<A> {}
+unsafe impl<A: TensorAllocator> Send for TensorCustomAllocationOwner<A> {}
+
+impl<A: TensorAllocator> Drop for TensorCustomAllocationOwner<A> {
+    fn drop(&mut self) {
+        self.alloc.dealloc(self.ptr as *mut u8, self.layout);
+    }
+}
+
 /// Represents a contiguous memory region that can be shared with other buffers and across thread boundaries.
 ///
 /// This struct provides methods to create, access, and manage tensor storage using a custom allocator.
@@ -35,9 +59,10 @@ where
     alloc: A,
 }
 
-impl<T, A: TensorAllocator> TensorStorage<T, A>
+impl<T, A> TensorStorage<T, A>
 where
     T: SafeTensorType + Clone,
+    A: TensorAllocator + 'static,
 {
     /// Creates a new tensor storage with the given length and allocator.
     ///
@@ -51,16 +76,21 @@ where
     /// A new tensor storage if successful, otherwise an error.
     pub fn new(len: usize, alloc: A) -> Result<Self, TensorAllocatorError> {
         // allocate memory for tensor storage
-        let ptr =
-            alloc.alloc(Layout::array::<T>(len).map_err(TensorAllocatorError::LayoutError)?)?;
+        let layout = Layout::array::<T>(len).map_err(TensorAllocatorError::LayoutError)?;
+        let ptr = alloc.alloc(layout)?;
+        let owner = TensorCustomAllocationOwner {
+            alloc: alloc.clone(),
+            layout,
+            ptr,
+        };
 
         // create the buffer
         let buffer = unsafe {
             // SAFETY: `ptr` is non-null and properly aligned, and `len` is the correct size.
             Buffer::from_custom_allocation(
                 NonNull::new_unchecked(ptr),
                 len * std::mem::size_of::<T>(),
-                Arc::new(Vec::<T>::with_capacity(len)),
+                Arc::new(owner),
             )
         };
 
@@ -223,7 +253,7 @@ where
         let buffer = Buffer::from_custom_allocation(
             NonNull::new_unchecked(ptr as *mut u8),
             len * std::mem::size_of::<T>(),
-            Arc::new(Vec::<T>::with_capacity(len)),
+            Arc::new(()),
         );
 
         // create tensor storage
@@ -238,7 +268,7 @@ where
 impl<T, A> Clone for TensorStorage<T, A>
 where
     T: SafeTensorType + Clone,
-    A: TensorAllocator + Clone,
+    A: TensorAllocator + Clone + 'static,
 {
     fn clone(&self) -> Self {
         let mut new_storage = Self::new(self.len(), self.alloc.clone())
@@ -253,6 +283,8 @@ mod tests {
     use super::*;
     use crate::allocator::CpuAllocator;
     use std::alloc::Layout;
+    use std::cell::RefCell;
+    use std::rc::Rc;
 
     #[test]
     fn test_tensor_storage() -> Result<(), TensorAllocatorError> {
@@ -365,4 +397,76 @@ mod tests {
         assert_eq!(result_vec.capacity(), original_vec_capacity);
         assert!(std::ptr::eq(result_vec.as_ptr(), original_vec_ptr));
     }
+
+    #[test]
+    fn test_tensor_storage_allocator() {
+        // A test TensorAllocator that keeps a count of the bytes that are allocated but not yet
+        // deallocated via the allocator.
+        #[derive(Clone)]
+        struct TestAllocator {
+            bytes_allocated: Rc<RefCell<i32>>,
+        }
+        impl TensorAllocator for TestAllocator {
+            fn alloc(&self, layout: Layout) -> Result<*mut u8, TensorAllocatorError> {
+                *self.bytes_allocated.borrow_mut() += layout.size() as i32;
+                CpuAllocator.alloc(layout)
+            }
+            fn dealloc(&self, ptr: *mut u8, layout: Layout) {
+                *self.bytes_allocated.borrow_mut() -= layout.size() as i32;
+                CpuAllocator.dealloc(ptr, layout)
+            }
+        }
+
+        let allocator = TestAllocator {
+            bytes_allocated: Rc::new(RefCell::new(0)),
+        };
+        let len = 1024;
+
+        // TensorStorage::new()
+        // Deallocation should happen when `storage` goes out of scope.
+        {
+            let _storage = TensorStorage::<u8, _>::new(len, allocator.clone()).unwrap();
+            assert_eq!(*allocator.bytes_allocated.borrow(), len as i32);
+        }
+        assert_eq!(*allocator.bytes_allocated.borrow(), 0);
+
+        // TensorStorage::new() -> TensorStorage::into_vec()
+        // TensorStorage::into_vec() consumes the storage and creates a copy (in this case).
+        // This should cause deallocation of the original memory.
+        {
+            let storage = TensorStorage::<u8, _>::new(len, allocator.clone()).unwrap();
+            assert_eq!(*allocator.bytes_allocated.borrow(), len as i32);
+
+            let _vec = storage.into_vec();
+            assert_eq!(*allocator.bytes_allocated.borrow(), 0);
+        }
+        assert_eq!(*allocator.bytes_allocated.borrow(), 0);
+
+        // TensorStorage::from_vec()  -> TensorStorage::into_vec()
+        // TensorStorage::from_vec() currently does not use the custom allocator, so the
+        // bytes_allocated value should not change.
+        {
+            let vec = Vec::<u8>::with_capacity(len);
+            let storage = TensorStorage::<u8, _>::from_vec(vec, allocator.clone());
+            assert_eq!(*allocator.bytes_allocated.borrow(), 0);
+
+            let _vec = storage.into_vec();
+            assert_eq!(*allocator.bytes_allocated.borrow(), 0);
+        }
+        assert_eq!(*allocator.bytes_allocated.borrow(), 0);
+
+        // TensorStorage::from_ptr()
+        // TensorStorage::from_ptr() does not take ownership of buffer. So the memory should not be
+        // deallocated when the TensorStorage goes out of scope.
+        // In this case, the memory will be deallocated when the vector goes out of scope.
+        {
+            let mut vec = Vec::<u8>::with_capacity(len);
+            {
+                let _storage =
+                    unsafe { TensorStorage::<u8, _>::from_ptr(vec.as_mut_ptr(), len, &allocator) };
+                assert_eq!(*allocator.bytes_allocated.borrow(), 0);
+            }
+            assert_eq!(*allocator.bytes_allocated.borrow(), 0);
+        }
+    }
 }
diff --git a/crates/kornia-core/src/tensor.rs b/crates/kornia-core/src/tensor.rs
@@ -90,7 +90,7 @@ where
 impl<T, const N: usize, A> Tensor<T, N, A>
 where
     T: SafeTensorType,
-    A: TensorAllocator,
+    A: TensorAllocator + 'static,
 {
     /// Create a new `Tensor` with uninitialized data.
     ///
@@ -875,7 +875,7 @@ where
 impl<T, const N: usize, A> Clone for Tensor<T, N, A>
 where
     T: SafeTensorType + Clone,
-    A: TensorAllocator + Clone,
+    A: TensorAllocator + Clone + 'static,
 {
     fn clone(&self) -> Self {
         Self {

diff --git a/crates/kornia-core/src/view.rs b/crates/kornia-core/src/view.rs
@@ -14,7 +14,7 @@ pub struct TensorView<'a, T: SafeTensorType, const N: usize, A: TensorAllocator>
     pub strides: [usize; N],
 }
 
-impl<'a, T: SafeTensorType, const N: usize, A: TensorAllocator> TensorView<'a, T, N, A> {
+impl<'a, T: SafeTensorType, const N: usize, A: TensorAllocator + 'static> TensorView<'a, T, N, A> {
     /// Returns the data slice of the tensor.
     #[inline]
     pub fn as_slice(&self) -> &[T] {