diff --git a/thrust/thrust/system/cuda/detail/core/triple_chevron_launch.h b/thrust/thrust/system/cuda/detail/core/triple_chevron_launch.h index af68892dcdf..8b2e6078529 100644 --- a/thrust/thrust/system/cuda/detail/core/triple_chevron_launch.h +++ b/thrust/thrust/system/cuda/detail/core/triple_chevron_launch.h @@ -55,7 +55,7 @@ struct _CCCL_VISIBILITY_HIDDEN triple_chevron Size const shared_mem; cudaStream_t const stream; - THRUST_RUNTIME_FUNCTION triple_chevron(dim3 grid_, dim3 block_, Size shared_mem_ = 0, cudaStream_t stream_ = 0) + THRUST_RUNTIME_FUNCTION triple_chevron(dim3 grid_, dim3 block_, Size shared_mem_ = 0, cudaStream_t stream_ = nullptr) : grid(grid_) , block(block_) , shared_mem(shared_mem_) @@ -84,22 +84,18 @@ struct _CCCL_VISIBILITY_HIDDEN triple_chevron size_t _CCCL_DEVICE argument_pack_size(size_t size, Args const&...) const { // TODO(bgruber): replace by fold over comma in C++17 (make sure order of evaluation is left to right!) - int dummy[] = {(size += align_up(size) + sizeof(Args), 0)...}; - (void) dummy; + int dummy[] = {(size = align_up(size) + sizeof(Args), 0)...}; + static_cast(dummy); return size; } template - void _CCCL_DEVICE copy_arg(char* buffer, size_t& offset, Arg arg) const + void _CCCL_DEVICE copy_arg(char* buffer, size_t& offset, const Arg& arg) const { // TODO(bgruber): we should make sure that we can actually byte-wise copy Arg, but this fails with some tests // static_assert(::cuda::std::is_trivially_copyable::value, ""); - offset = align_up(offset); - for (int i = 0; i != sizeof(Arg); ++i) - { - buffer[offset + i] = reinterpret_cast(&arg)[i]; - } + ::memcpy(buffer + offset, static_cast(&arg), sizeof(arg)); offset += sizeof(Arg); } @@ -110,7 +106,7 @@ struct _CCCL_VISIBILITY_HIDDEN triple_chevron { // TODO(bgruber): replace by fold over comma in C++17 (make sure order of evaluation is left to right!) int dummy[] = {(copy_arg(buffer, offset, args), 0)...}; - (void) dummy; + static_cast(dummy); } #ifdef THRUST_RDC_ENABLED