Skip to content

Commit

Permalink
Add sm_61 traits (#2848)
Browse files Browse the repository at this point in the history
  • Loading branch information
pciolkosz authored Nov 17, 2024
1 parent e7c8b34 commit a1b5763
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 2 deletions.
3 changes: 1 addition & 2 deletions cudax/include/cuda/experimental/__algorithm/copy.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,7 @@ void __copy_bytes_impl(stream_ref __stream, _CUDA_VSTD::span<_SrcTy> __src, _CUD

//! @brief Launches a bytewise memory copy from source to destination into the provided stream.
//!
//! Both source and destination needs to either be a `contiguous_range` or implicitly
//! implicitly/launch transform to one.
//! Both source and destination needs to either be a `contiguous_range` or implicitly/launch transform to one.
//! Both source and destination type is required to be trivially copyable.
//!
//! This call might be synchronous if either source or destination is pagable host memory.
Expand Down
24 changes: 24 additions & 0 deletions cudax/include/cuda/experimental/__device/arch_traits.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,28 @@ inline constexpr arch_traits_t sm_600_traits = []() constexpr {
return __traits;
}();

inline constexpr arch_traits_t sm_610_traits = []() constexpr {
arch_traits_t __traits{};
__traits.compute_capability_major = 6;
__traits.compute_capability_minor = 1;
__traits.compute_capability = 610;
__traits.max_shared_memory_per_multiprocessor = 96 * 1024;
__traits.max_blocks_per_multiprocessor = 32;
__traits.max_threads_per_multiprocessor = 2048;
__traits.max_warps_per_multiprocessor =
__traits.max_threads_per_multiprocessor / detail::arch_common_traits::warp_size;
__traits.reserved_shared_memory_per_block = 0;
__traits.max_shared_memory_per_block_optin = 48 * 1024;

__traits.cluster_supported = false;
__traits.redux_intrinisic = false;
__traits.elect_intrinsic = false;
__traits.cp_async_supported = false;
__traits.tma_supported = false;

return __traits;
}();

inline constexpr arch_traits_t sm_700_traits = []() constexpr {
arch_traits_t __traits{};
__traits.compute_capability_major = 7;
Expand Down Expand Up @@ -330,6 +352,8 @@ _CCCL_HOST_DEVICE inline constexpr arch_traits_t arch_traits(unsigned int __sm_v
{
case 600:
return detail::sm_600_traits;
case 610:
return detail::sm_610_traits;
case 700:
return detail::sm_700_traits;
case 750:
Expand Down

0 comments on commit a1b5763

Please sign in to comment.