diff --git a/cudax/include/cuda/experimental/__algorithm/copy.cuh b/cudax/include/cuda/experimental/__algorithm/copy.cuh
index 9054bf0ea5e..b01bac3c1ce 100644
--- a/cudax/include/cuda/experimental/__algorithm/copy.cuh
+++ b/cudax/include/cuda/experimental/__algorithm/copy.cuh
@@ -53,8 +53,7 @@ void __copy_bytes_impl(stream_ref __stream, _CUDA_VSTD::span<_SrcTy> __src, _CUD
 
 //! @brief Launches a bytewise memory copy from source to destination into the provided stream.
 //!
-//! Both source and destination needs to either be a `contiguous_range` or implicitly
-//! implicitly/launch transform to one.
+//! Both source and destination needs to either be a `contiguous_range` or implicitly/launch transform to one.
 //! Both source and destination type is required to be trivially copyable.
 //!
 //! This call might be synchronous if either source or destination is pagable host memory.
diff --git a/cudax/include/cuda/experimental/__device/arch_traits.cuh b/cudax/include/cuda/experimental/__device/arch_traits.cuh
index dd6d5c863f5..8cb3894cc4d 100644
--- a/cudax/include/cuda/experimental/__device/arch_traits.cuh
+++ b/cudax/include/cuda/experimental/__device/arch_traits.cuh
@@ -176,6 +176,28 @@ inline constexpr arch_traits_t sm_600_traits = []() constexpr {
   return __traits;
 }();
 
+inline constexpr arch_traits_t sm_610_traits = []() constexpr {
+  arch_traits_t __traits{};
+  __traits.compute_capability_major             = 6;
+  __traits.compute_capability_minor             = 1;
+  __traits.compute_capability                   = 610;
+  __traits.max_shared_memory_per_multiprocessor = 96 * 1024;
+  __traits.max_blocks_per_multiprocessor        = 32;
+  __traits.max_threads_per_multiprocessor       = 2048;
+  __traits.max_warps_per_multiprocessor =
+    __traits.max_threads_per_multiprocessor / detail::arch_common_traits::warp_size;
+  __traits.reserved_shared_memory_per_block  = 0;
+  __traits.max_shared_memory_per_block_optin = 48 * 1024;
+
+  __traits.cluster_supported  = false;
+  __traits.redux_intrinisic   = false;
+  __traits.elect_intrinsic    = false;
+  __traits.cp_async_supported = false;
+  __traits.tma_supported      = false;
+
+  return __traits;
+}();
+
 inline constexpr arch_traits_t sm_700_traits = []() constexpr {
   arch_traits_t __traits{};
   __traits.compute_capability_major             = 7;
@@ -330,6 +352,8 @@ _CCCL_HOST_DEVICE inline constexpr arch_traits_t arch_traits(unsigned int __sm_v
   {
     case 600:
       return detail::sm_600_traits;
+    case 610:
+      return detail::sm_610_traits;
     case 700:
       return detail::sm_700_traits;
     case 750: