From 19a3c3917ea343cbe0b24a5228b568478a7509e0 Mon Sep 17 00:00:00 2001 From: Ivana Date: Sun, 10 Nov 2024 16:10:47 +0800 Subject: [PATCH 1/3] Templatise `Fill()` functions used by `Storage` This is the preparation for refactoring the `Storage` classes. --- src/backend/BoolStorage.cpp | 36 ++--- src/backend/ComplexDoubleStorage.cpp | 44 +++--- src/backend/ComplexFloatStorage.cpp | 44 +++--- src/backend/DoubleStorage.cpp | 36 ++--- src/backend/FloatStorage.cpp | 36 ++--- src/backend/Int16Storage.cpp | 36 ++--- src/backend/Int32Storage.cpp | 36 ++--- src/backend/Int64Storage.cpp | 36 ++--- src/backend/Uint16Storage.cpp | 36 ++--- src/backend/Uint32Storage.cpp | 36 ++--- src/backend/Uint64Storage.cpp | 36 ++--- src/backend/utils_internal_cpu/CMakeLists.txt | 1 - src/backend/utils_internal_cpu/Fill_cpu.cpp | 145 ------------------ src/backend/utils_internal_cpu/Fill_cpu.hpp | 52 ++++--- src/backend/utils_internal_gpu/cuFill_gpu.cu | 138 +++++------------ src/backend/utils_internal_gpu/cuFill_gpu.hpp | 38 +++-- 16 files changed, 291 insertions(+), 495 deletions(-) delete mode 100644 src/backend/utils_internal_cpu/Fill_cpu.cpp diff --git a/src/backend/BoolStorage.cpp b/src/backend/BoolStorage.cpp index 6285101ae..b39a2c827 100644 --- a/src/backend/BoolStorage.cpp +++ b/src/backend/BoolStorage.cpp @@ -411,10 +411,10 @@ namespace cytnx { void BoolStorage::fill(const cytnx_double &val) { cytnx_bool tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_b(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU - utils_internal::cuFill_gpu_b(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -424,10 +424,10 @@ namespace cytnx { void BoolStorage::fill(const cytnx_float &val) { cytnx_bool tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_b(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU - utils_internal::cuFill_gpu_b(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -437,10 +437,10 @@ namespace cytnx { void BoolStorage::fill(const cytnx_int64 &val) { cytnx_bool tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_b(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU - utils_internal::cuFill_gpu_b(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -450,10 +450,10 @@ namespace cytnx { void BoolStorage::fill(const cytnx_uint64 &val) { cytnx_bool tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_b(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU - utils_internal::cuFill_gpu_b(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -463,10 +463,10 @@ namespace cytnx { void BoolStorage::fill(const cytnx_int32 &val) { cytnx_bool tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_b(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU - utils_internal::cuFill_gpu_b(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -476,10 +476,10 @@ namespace cytnx { void BoolStorage::fill(const cytnx_uint32 &val) { cytnx_bool tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_b(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU - utils_internal::cuFill_gpu_b(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -489,10 +489,10 @@ namespace cytnx { void BoolStorage::fill(const cytnx_int16 &val) { cytnx_bool tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_b(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU - utils_internal::cuFill_gpu_b(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -502,10 +502,10 @@ namespace cytnx { void BoolStorage::fill(const cytnx_uint16 &val) { cytnx_bool tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_b(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU - utils_internal::cuFill_gpu_b(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -514,10 +514,10 @@ namespace cytnx { } void BoolStorage::fill(const cytnx_bool &val) { if (this->device == Device.cpu) { - utils_internal::Fill_cpu_b(this->Mem, (void *)(&val), this->len); + utils_internal::FillCpu(this->Mem, val, this->len); } else { #ifdef UNI_GPU - utils_internal::cuFill_gpu_b(this->Mem, (void *)(&val), this->len); + utils_internal::FillGpu(this->Mem, val, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); diff --git a/src/backend/ComplexDoubleStorage.cpp b/src/backend/ComplexDoubleStorage.cpp index e1f4d1b3d..9fe023b5b 100644 --- a/src/backend/ComplexDoubleStorage.cpp +++ b/src/backend/ComplexDoubleStorage.cpp @@ -395,11 +395,11 @@ namespace cytnx { void ComplexDoubleStorage::fill(const cytnx_complex128 &val) { if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cd(this->Mem, (void *)(&val), this->len); + utils_internal::FillCpu(this->Mem, val, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cd(this->Mem, (void *)(&val), this->len); + utils_internal::FillGpu(this->Mem, val, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -409,11 +409,11 @@ namespace cytnx { void ComplexDoubleStorage::fill(const cytnx_complex64 &val) { cytnx_complex128 tmp(val.real(), val.imag()); if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cd(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cd(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -423,11 +423,11 @@ namespace cytnx { void ComplexDoubleStorage::fill(const cytnx_double &val) { cytnx_complex128 tmp(val, 0); if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cd(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cd(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -437,11 +437,11 @@ namespace cytnx { void ComplexDoubleStorage::fill(const cytnx_float &val) { cytnx_complex128 tmp(val, 0); if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cd(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cd(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -451,11 +451,11 @@ namespace cytnx { void ComplexDoubleStorage::fill(const cytnx_int64 &val) { cytnx_complex128 tmp(val, 0); if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cd(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cd(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -465,11 +465,11 @@ namespace cytnx { void ComplexDoubleStorage::fill(const cytnx_uint64 &val) { cytnx_complex128 tmp(val, 0); if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cd(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cd(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -479,11 +479,11 @@ namespace cytnx { void ComplexDoubleStorage::fill(const cytnx_int32 &val) { cytnx_complex128 tmp(val, 0); if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cd(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cd(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -493,11 +493,11 @@ namespace cytnx { void ComplexDoubleStorage::fill(const cytnx_uint32 &val) { cytnx_complex128 tmp(val, 0); if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cd(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cd(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -507,11 +507,11 @@ namespace cytnx { void ComplexDoubleStorage::fill(const cytnx_int16 &val) { cytnx_complex128 tmp(val, 0); if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cd(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cd(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -521,11 +521,11 @@ namespace cytnx { void ComplexDoubleStorage::fill(const cytnx_uint16 &val) { cytnx_complex128 tmp(val, 0); if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cd(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cd(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -535,11 +535,11 @@ namespace cytnx { void ComplexDoubleStorage::fill(const cytnx_bool &val) { cytnx_complex128 tmp(val, 0); if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cd(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cd(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); diff --git a/src/backend/ComplexFloatStorage.cpp b/src/backend/ComplexFloatStorage.cpp index 219eb304f..06755ffdf 100644 --- a/src/backend/ComplexFloatStorage.cpp +++ b/src/backend/ComplexFloatStorage.cpp @@ -397,11 +397,11 @@ namespace cytnx { void ComplexFloatStorage::fill(const cytnx_complex128 &val) { cytnx_complex64 tmp(val.real(), val.imag()); if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cf(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cf(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -410,11 +410,11 @@ namespace cytnx { } void ComplexFloatStorage::fill(const cytnx_complex64 &val) { if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cf(this->Mem, (void *)(&val), this->len); + utils_internal::FillCpu(this->Mem, val, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cf(this->Mem, (void *)(&val), this->len); + utils_internal::FillGpu(this->Mem, val, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -424,11 +424,11 @@ namespace cytnx { void ComplexFloatStorage::fill(const cytnx_double &val) { cytnx_complex64 tmp(val, 0); if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cf(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cf(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -438,11 +438,11 @@ namespace cytnx { void ComplexFloatStorage::fill(const cytnx_float &val) { cytnx_complex64 tmp(val, 0); if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cf(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cf(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -452,11 +452,11 @@ namespace cytnx { void ComplexFloatStorage::fill(const cytnx_int64 &val) { cytnx_complex64 tmp(val, 0); if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cf(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cf(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -466,11 +466,11 @@ namespace cytnx { void ComplexFloatStorage::fill(const cytnx_uint64 &val) { cytnx_complex64 tmp(val, 0); if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cf(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cf(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -480,11 +480,11 @@ namespace cytnx { void ComplexFloatStorage::fill(const cytnx_int32 &val) { cytnx_complex64 tmp(val, 0); if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cf(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cf(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -494,11 +494,11 @@ namespace cytnx { void ComplexFloatStorage::fill(const cytnx_uint32 &val) { cytnx_complex64 tmp(val, 0); if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cf(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cf(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -508,11 +508,11 @@ namespace cytnx { void ComplexFloatStorage::fill(const cytnx_int16 &val) { cytnx_complex64 tmp(val, 0); if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cf(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cf(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -522,11 +522,11 @@ namespace cytnx { void ComplexFloatStorage::fill(const cytnx_uint16 &val) { cytnx_complex64 tmp(val, 0); if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cf(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cf(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -536,11 +536,11 @@ namespace cytnx { void ComplexFloatStorage::fill(const cytnx_bool &val) { cytnx_complex64 tmp(val, 0); if (this->device == Device.cpu) { - utils_internal::Fill_cpu_cf(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_cf(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); diff --git a/src/backend/DoubleStorage.cpp b/src/backend/DoubleStorage.cpp index 69000037f..994a05bc1 100644 --- a/src/backend/DoubleStorage.cpp +++ b/src/backend/DoubleStorage.cpp @@ -405,11 +405,11 @@ namespace cytnx { } void DoubleStorage::fill(const cytnx_double &val) { if (this->device == Device.cpu) { - utils_internal::Fill_cpu_d(this->Mem, (void *)(&val), this->len); + utils_internal::FillCpu(this->Mem, val, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_d(this->Mem, (void *)(&val), this->len); + utils_internal::FillGpu(this->Mem, val, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -419,11 +419,11 @@ namespace cytnx { void DoubleStorage::fill(const cytnx_float &val) { cytnx_double tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_d(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_d(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -433,11 +433,11 @@ namespace cytnx { void DoubleStorage::fill(const cytnx_int64 &val) { cytnx_double tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_d(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_d(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -447,11 +447,11 @@ namespace cytnx { void DoubleStorage::fill(const cytnx_uint64 &val) { cytnx_double tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_d(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_d(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -461,11 +461,11 @@ namespace cytnx { void DoubleStorage::fill(const cytnx_int32 &val) { cytnx_double tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_d(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_d(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -475,11 +475,11 @@ namespace cytnx { void DoubleStorage::fill(const cytnx_uint32 &val) { cytnx_double tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_d(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_d(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -489,11 +489,11 @@ namespace cytnx { void DoubleStorage::fill(const cytnx_int16 &val) { cytnx_double tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_d(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_d(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -503,11 +503,11 @@ namespace cytnx { void DoubleStorage::fill(const cytnx_uint16 &val) { cytnx_double tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_d(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_d(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -517,11 +517,11 @@ namespace cytnx { void DoubleStorage::fill(const cytnx_bool &val) { cytnx_double tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_d(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_d(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); diff --git a/src/backend/FloatStorage.cpp b/src/backend/FloatStorage.cpp index ef7239fba..1b35b87f7 100644 --- a/src/backend/FloatStorage.cpp +++ b/src/backend/FloatStorage.cpp @@ -397,10 +397,10 @@ namespace cytnx { void FloatStorage::fill(const cytnx_double &val) { cytnx_float tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_f(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU - utils_internal::cuFill_gpu_f(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -409,10 +409,10 @@ namespace cytnx { } void FloatStorage::fill(const cytnx_float &val) { if (this->device == Device.cpu) { - utils_internal::Fill_cpu_f(this->Mem, (void *)(&val), this->len); + utils_internal::FillCpu(this->Mem, val, this->len); } else { #ifdef UNI_GPU - utils_internal::cuFill_gpu_f(this->Mem, (void *)(&val), this->len); + utils_internal::FillGpu(this->Mem, val, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -422,10 +422,10 @@ namespace cytnx { void FloatStorage::fill(const cytnx_int64 &val) { cytnx_float tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_f(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU - utils_internal::cuFill_gpu_f(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -435,10 +435,10 @@ namespace cytnx { void FloatStorage::fill(const cytnx_uint64 &val) { cytnx_float tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_f(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU - utils_internal::cuFill_gpu_f(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -448,10 +448,10 @@ namespace cytnx { void FloatStorage::fill(const cytnx_int32 &val) { cytnx_float tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_f(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU - utils_internal::cuFill_gpu_f(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -461,10 +461,10 @@ namespace cytnx { void FloatStorage::fill(const cytnx_uint32 &val) { cytnx_float tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_f(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU - utils_internal::cuFill_gpu_f(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -474,10 +474,10 @@ namespace cytnx { void FloatStorage::fill(const cytnx_int16 &val) { cytnx_float tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_f(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU - utils_internal::cuFill_gpu_f(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -487,10 +487,10 @@ namespace cytnx { void FloatStorage::fill(const cytnx_uint16 &val) { cytnx_float tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_f(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU - utils_internal::cuFill_gpu_f(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -500,10 +500,10 @@ namespace cytnx { void FloatStorage::fill(const cytnx_bool &val) { cytnx_float tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_f(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU - utils_internal::cuFill_gpu_f(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); diff --git a/src/backend/Int16Storage.cpp b/src/backend/Int16Storage.cpp index e8e909a94..d21a3c286 100644 --- a/src/backend/Int16Storage.cpp +++ b/src/backend/Int16Storage.cpp @@ -394,11 +394,11 @@ namespace cytnx { void Int16Storage::fill(const cytnx_double &val) { cytnx_int16 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i16(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i16(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -408,11 +408,11 @@ namespace cytnx { void Int16Storage::fill(const cytnx_float &val) { cytnx_int16 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i16(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i16(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -422,11 +422,11 @@ namespace cytnx { void Int16Storage::fill(const cytnx_int64 &val) { cytnx_int16 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i16(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i16(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -436,11 +436,11 @@ namespace cytnx { void Int16Storage::fill(const cytnx_uint64 &val) { cytnx_int16 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i16(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i16(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -450,11 +450,11 @@ namespace cytnx { void Int16Storage::fill(const cytnx_int32 &val) { cytnx_int16 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i16(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i16(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -464,11 +464,11 @@ namespace cytnx { void Int16Storage::fill(const cytnx_uint32 &val) { cytnx_int16 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i16(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i16(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -478,11 +478,11 @@ namespace cytnx { void Int16Storage::fill(const cytnx_uint16 &val) { cytnx_int16 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i16(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i16(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -491,11 +491,11 @@ namespace cytnx { } void Int16Storage::fill(const cytnx_int16 &val) { if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i16(this->Mem, (void *)(&val), this->len); + utils_internal::FillCpu(this->Mem, val, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i16(this->Mem, (void *)(&val), this->len); + utils_internal::FillGpu(this->Mem, val, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -505,11 +505,11 @@ namespace cytnx { void Int16Storage::fill(const cytnx_bool &val) { cytnx_int16 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i16(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i16(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); diff --git a/src/backend/Int32Storage.cpp b/src/backend/Int32Storage.cpp index 30088d61d..2535b78ff 100644 --- a/src/backend/Int32Storage.cpp +++ b/src/backend/Int32Storage.cpp @@ -397,11 +397,11 @@ namespace cytnx { void Int32Storage::fill(const cytnx_double &val) { cytnx_int32 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i32(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i32(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -411,11 +411,11 @@ namespace cytnx { void Int32Storage::fill(const cytnx_float &val) { cytnx_int32 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i32(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i32(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -425,11 +425,11 @@ namespace cytnx { void Int32Storage::fill(const cytnx_int64 &val) { cytnx_int32 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i32(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i32(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -439,11 +439,11 @@ namespace cytnx { void Int32Storage::fill(const cytnx_uint64 &val) { cytnx_int32 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i32(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i32(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -452,11 +452,11 @@ namespace cytnx { } void Int32Storage::fill(const cytnx_int32 &val) { if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i32(this->Mem, (void *)(&val), this->len); + utils_internal::FillCpu(this->Mem, val, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i32(this->Mem, (void *)(&val), this->len); + utils_internal::FillGpu(this->Mem, val, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -466,11 +466,11 @@ namespace cytnx { void Int32Storage::fill(const cytnx_uint32 &val) { cytnx_int32 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i32(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i32(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -480,11 +480,11 @@ namespace cytnx { void Int32Storage::fill(const cytnx_int16 &val) { cytnx_int32 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i32(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i32(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -494,11 +494,11 @@ namespace cytnx { void Int32Storage::fill(const cytnx_uint16 &val) { cytnx_int32 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i32(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i32(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -508,11 +508,11 @@ namespace cytnx { void Int32Storage::fill(const cytnx_bool &val) { cytnx_int32 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i32(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i32(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); diff --git a/src/backend/Int64Storage.cpp b/src/backend/Int64Storage.cpp index adbcba8a7..01b3860ff 100644 --- a/src/backend/Int64Storage.cpp +++ b/src/backend/Int64Storage.cpp @@ -397,11 +397,11 @@ namespace cytnx { void Int64Storage::fill(const cytnx_double &val) { cytnx_int64 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i64(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i64(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -411,11 +411,11 @@ namespace cytnx { void Int64Storage::fill(const cytnx_float &val) { cytnx_int64 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i64(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i64(this->Mem, (void *)(&val), this->len); + utils_internal::FillGpu(this->Mem, val, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -424,11 +424,11 @@ namespace cytnx { } void Int64Storage::fill(const cytnx_int64 &val) { if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i64(this->Mem, (void *)(&val), this->len); + utils_internal::FillCpu(this->Mem, val, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i64(this->Mem, (void *)(&val), this->len); + utils_internal::FillGpu(this->Mem, val, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -438,11 +438,11 @@ namespace cytnx { void Int64Storage::fill(const cytnx_uint64 &val) { cytnx_int64 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i64(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i64(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -452,11 +452,11 @@ namespace cytnx { void Int64Storage::fill(const cytnx_int32 &val) { cytnx_int64 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i64(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i64(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -466,11 +466,11 @@ namespace cytnx { void Int64Storage::fill(const cytnx_uint32 &val) { cytnx_int64 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i64(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i64(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -480,11 +480,11 @@ namespace cytnx { void Int64Storage::fill(const cytnx_uint16 &val) { cytnx_int64 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i64(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i64(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -494,11 +494,11 @@ namespace cytnx { void Int64Storage::fill(const cytnx_int16 &val) { cytnx_int64 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i64(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i64(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -508,11 +508,11 @@ namespace cytnx { void Int64Storage::fill(const cytnx_bool &val) { cytnx_int64 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_i64(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_i64(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); diff --git a/src/backend/Uint16Storage.cpp b/src/backend/Uint16Storage.cpp index e96ad2c93..1352674d2 100644 --- a/src/backend/Uint16Storage.cpp +++ b/src/backend/Uint16Storage.cpp @@ -395,11 +395,11 @@ namespace cytnx { void Uint16Storage::fill(const cytnx_double &val) { cytnx_uint16 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u16(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u16(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -409,11 +409,11 @@ namespace cytnx { void Uint16Storage::fill(const cytnx_float &val) { cytnx_uint16 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u16(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u16(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -423,11 +423,11 @@ namespace cytnx { void Uint16Storage::fill(const cytnx_int64 &val) { cytnx_uint16 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u16(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u16(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -437,11 +437,11 @@ namespace cytnx { void Uint16Storage::fill(const cytnx_uint64 &val) { cytnx_uint16 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u16(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u16(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -451,11 +451,11 @@ namespace cytnx { void Uint16Storage::fill(const cytnx_int32 &val) { cytnx_uint16 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u16(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u16(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -465,11 +465,11 @@ namespace cytnx { void Uint16Storage::fill(const cytnx_uint32 &val) { cytnx_uint16 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u16(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u16(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -478,11 +478,11 @@ namespace cytnx { } void Uint16Storage::fill(const cytnx_uint16 &val) { if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u16(this->Mem, (void *)(&val), this->len); + utils_internal::FillCpu(this->Mem, val, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u16(this->Mem, (void *)(&val), this->len); + utils_internal::FillGpu(this->Mem, val, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -492,11 +492,11 @@ namespace cytnx { void Uint16Storage::fill(const cytnx_int16 &val) { cytnx_uint16 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u16(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u16(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -506,11 +506,11 @@ namespace cytnx { void Uint16Storage::fill(const cytnx_bool &val) { cytnx_uint16 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u16(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u16(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); diff --git a/src/backend/Uint32Storage.cpp b/src/backend/Uint32Storage.cpp index f7157c12d..1c9103a56 100644 --- a/src/backend/Uint32Storage.cpp +++ b/src/backend/Uint32Storage.cpp @@ -401,11 +401,11 @@ namespace cytnx { void Uint32Storage::fill(const cytnx_double &val) { cytnx_uint32 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u32(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u32(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -415,11 +415,11 @@ namespace cytnx { void Uint32Storage::fill(const cytnx_float &val) { cytnx_uint32 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u32(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u32(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -429,11 +429,11 @@ namespace cytnx { void Uint32Storage::fill(const cytnx_int64 &val) { cytnx_uint32 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u32(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u32(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -443,11 +443,11 @@ namespace cytnx { void Uint32Storage::fill(const cytnx_uint64 &val) { cytnx_uint32 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u32(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u32(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -457,11 +457,11 @@ namespace cytnx { void Uint32Storage::fill(const cytnx_int32 &val) { cytnx_uint32 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u32(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u32(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -470,11 +470,11 @@ namespace cytnx { } void Uint32Storage::fill(const cytnx_uint32 &val) { if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u32(this->Mem, (void *)(&val), this->len); + utils_internal::FillCpu(this->Mem, val, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u32(this->Mem, (void *)(&val), this->len); + utils_internal::FillGpu(this->Mem, val, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -484,11 +484,11 @@ namespace cytnx { void Uint32Storage::fill(const cytnx_uint16 &val) { cytnx_uint32 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u32(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u32(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -498,11 +498,11 @@ namespace cytnx { void Uint32Storage::fill(const cytnx_int16 &val) { cytnx_uint32 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u32(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u32(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -512,11 +512,11 @@ namespace cytnx { void Uint32Storage::fill(const cytnx_bool &val) { cytnx_uint32 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u32(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u32(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); diff --git a/src/backend/Uint64Storage.cpp b/src/backend/Uint64Storage.cpp index 231a8cf95..d0c3fb43c 100644 --- a/src/backend/Uint64Storage.cpp +++ b/src/backend/Uint64Storage.cpp @@ -396,11 +396,11 @@ namespace cytnx { void Uint64Storage::fill(const cytnx_double &val) { cytnx_uint64 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u64(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u64(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -410,11 +410,11 @@ namespace cytnx { void Uint64Storage::fill(const cytnx_float &val) { cytnx_uint64 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u64(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u64(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -424,11 +424,11 @@ namespace cytnx { void Uint64Storage::fill(const cytnx_int64 &val) { cytnx_uint64 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u64(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u64(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -437,11 +437,11 @@ namespace cytnx { } void Uint64Storage::fill(const cytnx_uint64 &val) { if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u64(this->Mem, (void *)(&val), this->len); + utils_internal::FillCpu(this->Mem, val, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u64(this->Mem, (void *)(&val), this->len); + utils_internal::FillGpu(this->Mem, val, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -451,11 +451,11 @@ namespace cytnx { void Uint64Storage::fill(const cytnx_int32 &val) { cytnx_uint64 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u64(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u64(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -465,11 +465,11 @@ namespace cytnx { void Uint64Storage::fill(const cytnx_uint32 &val) { cytnx_uint64 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u64(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u64(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -479,11 +479,11 @@ namespace cytnx { void Uint64Storage::fill(const cytnx_uint16 &val) { cytnx_uint64 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u64(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u64(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -493,11 +493,11 @@ namespace cytnx { void Uint64Storage::fill(const cytnx_int16 &val) { cytnx_uint64 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u64(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u64(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); @@ -507,11 +507,11 @@ namespace cytnx { void Uint64Storage::fill(const cytnx_bool &val) { cytnx_uint64 tmp = val; if (this->device == Device.cpu) { - utils_internal::Fill_cpu_u64(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillCpu(this->Mem, tmp, this->len); } else { #ifdef UNI_GPU checkCudaErrors(cudaSetDevice(this->device)); - utils_internal::cuFill_gpu_u64(this->Mem, (void *)(&tmp), this->len); + utils_internal::FillGpu(this->Mem, tmp, this->len); #else cytnx_error_msg(true, "[ERROR][fill] fatal internal, %s", "storage is on gpu without CUDA support\n"); diff --git a/src/backend/utils_internal_cpu/CMakeLists.txt b/src/backend/utils_internal_cpu/CMakeLists.txt index 993be4911..752d9bccb 100644 --- a/src/backend/utils_internal_cpu/CMakeLists.txt +++ b/src/backend/utils_internal_cpu/CMakeLists.txt @@ -14,7 +14,6 @@ target_sources_local(cytnx Complexmem_cpu.hpp Alloc_cpu.cpp Cast_cpu.cpp - Fill_cpu.cpp GetElems_cpu.cpp GetElems_contiguous_cpu.cpp Movemem_cpu.cpp diff --git a/src/backend/utils_internal_cpu/Fill_cpu.cpp b/src/backend/utils_internal_cpu/Fill_cpu.cpp deleted file mode 100644 index 6fec2f46e..000000000 --- a/src/backend/utils_internal_cpu/Fill_cpu.cpp +++ /dev/null @@ -1,145 +0,0 @@ -#include "Fill_cpu.hpp" -#include "backend/Storage.hpp" -#ifdef UNI_OMP - #include -#endif - -using namespace std; - -namespace cytnx { - namespace utils_internal { - - void Fill_cpu_cd(void* in, void* val, const cytnx_uint64& Nelem) { - cytnx_complex128* ptr = (cytnx_complex128*)in; - cytnx_complex128 _val = *((cytnx_complex128*)val); - -#ifdef UNI_OMP - #pragma omp parallel for schedule(dynamic) -#endif - for (cytnx_uint64 i = 0; i < Nelem; i++) { - ptr[i] = _val; - } - } - - void Fill_cpu_cf(void* in, void* val, const cytnx_uint64& Nelem) { - cytnx_complex64* ptr = (cytnx_complex64*)in; - cytnx_complex64 _val = *((cytnx_complex64*)val); - -#ifdef UNI_OMP - #pragma omp parallel for schedule(dynamic) -#endif - for (cytnx_uint64 i = 0; i < Nelem; i++) { - ptr[i] = _val; - } - } - - void Fill_cpu_d(void* in, void* val, const cytnx_uint64& Nelem) { - cytnx_double* ptr = (cytnx_double*)in; - cytnx_double _val = *((cytnx_double*)val); - -#ifdef UNI_OMP - #pragma omp parallel for schedule(dynamic) -#endif - for (cytnx_uint64 i = 0; i < Nelem; i++) { - ptr[i] = _val; - } - } - - void Fill_cpu_f(void* in, void* val, const cytnx_uint64& Nelem) { - cytnx_float* ptr = (cytnx_float*)in; - cytnx_float _val = *((cytnx_float*)val); - -#ifdef UNI_OMP - #pragma omp parallel for schedule(dynamic) -#endif - for (cytnx_uint64 i = 0; i < Nelem; i++) { - ptr[i] = _val; - } - } - - void Fill_cpu_i64(void* in, void* val, const cytnx_uint64& Nelem) { - cytnx_int64* ptr = (cytnx_int64*)in; - cytnx_int64 _val = *((cytnx_int64*)val); - -#ifdef UNI_OMP - #pragma omp parallel for schedule(dynamic) -#endif - for (cytnx_uint64 i = 0; i < Nelem; i++) { - ptr[i] = _val; - } - } - - void Fill_cpu_u64(void* in, void* val, const cytnx_uint64& Nelem) { - cytnx_uint64* ptr = (cytnx_uint64*)in; - cytnx_uint64 _val = *((cytnx_uint64*)val); - -#ifdef UNI_OMP - #pragma omp parallel for schedule(dynamic) -#endif - for (cytnx_uint64 i = 0; i < Nelem; i++) { - ptr[i] = _val; - } - } - - void Fill_cpu_i32(void* in, void* val, const cytnx_uint64& Nelem) { - cytnx_int32* ptr = (cytnx_int32*)in; - cytnx_int32 _val = *((cytnx_int32*)val); - -#ifdef UNI_OMP - #pragma omp parallel for schedule(dynamic) -#endif - for (cytnx_uint64 i = 0; i < Nelem; i++) { - ptr[i] = _val; - } - } - - void Fill_cpu_u32(void* in, void* val, const cytnx_uint64& Nelem) { - cytnx_uint32* ptr = (cytnx_uint32*)in; - cytnx_uint32 _val = *((cytnx_uint32*)val); - -#ifdef UNI_OMP - #pragma omp parallel for schedule(dynamic) -#endif - for (cytnx_uint64 i = 0; i < Nelem; i++) { - ptr[i] = _val; - } - } - - void Fill_cpu_i16(void* in, void* val, const cytnx_uint64& Nelem) { - cytnx_int16* ptr = (cytnx_int16*)in; - cytnx_int16 _val = *((cytnx_int16*)val); - -#ifdef UNI_OMP - #pragma omp parallel for schedule(dynamic) -#endif - for (cytnx_uint64 i = 0; i < Nelem; i++) { - ptr[i] = _val; - } - } - - void Fill_cpu_u16(void* in, void* val, const cytnx_uint64& Nelem) { - cytnx_uint16* ptr = (cytnx_uint16*)in; - cytnx_uint16 _val = *((cytnx_uint16*)val); - -#ifdef UNI_OMP - #pragma omp parallel for schedule(dynamic) -#endif - for (cytnx_uint64 i = 0; i < Nelem; i++) { - ptr[i] = _val; - } - } - - void Fill_cpu_b(void* in, void* val, const cytnx_uint64& Nelem) { - cytnx_bool* ptr = (cytnx_bool*)in; - cytnx_bool _val = *((cytnx_bool*)val); - -#ifdef UNI_OMP - #pragma omp parallel for schedule(dynamic) -#endif - for (cytnx_uint64 i = 0; i < Nelem; i++) { - ptr[i] = _val; - } - } - - } // namespace utils_internal -} // namespace cytnx diff --git a/src/backend/utils_internal_cpu/Fill_cpu.hpp b/src/backend/utils_internal_cpu/Fill_cpu.hpp index 5063289b5..5d25b72d9 100644 --- a/src/backend/utils_internal_cpu/Fill_cpu.hpp +++ b/src/backend/utils_internal_cpu/Fill_cpu.hpp @@ -1,29 +1,39 @@ -#ifndef _H_Fill_cpu_ -#define _H_Fill_cpu_ +#ifndef SRC_BACKEND_UTILS_INTERNAL_CPU_FILL_CPU_H_ +#define SRC_BACKEND_UTILS_INTERNAL_CPU_FILL_CPU_H_ -#include -#include -#include -#include #include "Type.hpp" -#include "backend/Storage.hpp" -#include "cytnx_error.hpp" + +#ifdef UNI_OMP + #include +#endif + namespace cytnx { namespace utils_internal { - void Fill_cpu_cd(void *in, void *val, const cytnx_uint64 &Nelem); - void Fill_cpu_cf(void *in, void *val, const cytnx_uint64 &Nelem); - void Fill_cpu_d(void *in, void *val, const cytnx_uint64 &Nelem); - void Fill_cpu_f(void *in, void *val, const cytnx_uint64 &Nelem); - void Fill_cpu_i64(void *in, void *val, const cytnx_uint64 &Nelem); - void Fill_cpu_u64(void *in, void *val, const cytnx_uint64 &Nelem); - void Fill_cpu_i32(void *in, void *val, const cytnx_uint64 &Nelem); - void Fill_cpu_u32(void *in, void *val, const cytnx_uint64 &Nelem); - void Fill_cpu_u16(void *in, void *val, const cytnx_uint64 &Nelem); - void Fill_cpu_i16(void *in, void *val, const cytnx_uint64 &Nelem); - void Fill_cpu_b(void *in, void *val, const cytnx_uint64 &Nelem); + /** + * @brief Assign the given value to the first `count` elements in the range beginning at + * `first`. + * + * This function act the same as `std::fill_n`. The execution will be parallelized when OMP is + * enabled. + * + * @tparam DType the data type of the elements in the range + * + * @param first the beginning of the range + * @param value the value to be assigned + * @param count the number of elements to modify + */ + template + void FillCpu(void *first, const DType &value, cytnx_uint64 count) { + DType *typed_first = reinterpret_cast(first); +#ifdef UNI_OMP + #pragma omp parallel for schedule(dynamic) +#endif + for (cytnx_uint64 i = 0; i < count; i++) { + typed_first[i] = value; + } + } } // namespace utils_internal - } // namespace cytnx -#endif +#endif // SRC_BACKEND_UTILS_INTERNAL_CPU_FILL_CPU_H_ diff --git a/src/backend/utils_internal_gpu/cuFill_gpu.cu b/src/backend/utils_internal_gpu/cuFill_gpu.cu index 680c97c4b..4faa445fb 100644 --- a/src/backend/utils_internal_gpu/cuFill_gpu.cu +++ b/src/backend/utils_internal_gpu/cuFill_gpu.cu @@ -1,117 +1,51 @@ -#include "cuFill_gpu.hpp" -#include "backend/Storage.hpp" -#ifdef UNI_OMP - #include -#endif +#include "backend/utils_internal_gpu/cuFill_gpu.hpp" -using namespace std; -namespace cytnx { - namespace utils_internal { - - template - __global__ void cuFill_kernel(T3* des, T3 val, cytnx_uint64 Nelem) { - if (blockIdx.x * blockDim.x + threadIdx.x < Nelem) { - des[blockIdx.x * blockDim.x + threadIdx.x] = val; - } - } - - //======================================================================== - void cuFill_gpu_cd(void* in, void* val, const cytnx_uint64& Nelem) { - cuDoubleComplex* ptr = (cuDoubleComplex*)in; - cuDoubleComplex _val = *((cuDoubleComplex*)val); - - cytnx_uint64 NBlocks = Nelem / 512; - if (Nelem % 512) NBlocks += 1; - cuFill_kernel<<>>(ptr, _val, Nelem); - } - - void cuFill_gpu_cf(void* in, void* val, const cytnx_uint64& Nelem) { - cuFloatComplex* ptr = (cuFloatComplex*)in; - cuFloatComplex _val = *((cuFloatComplex*)val); - - cytnx_uint64 NBlocks = Nelem / 512; - if (Nelem % 512) NBlocks += 1; - cuFill_kernel<<>>(ptr, _val, Nelem); - } - - void cuFill_gpu_d(void* in, void* val, const cytnx_uint64& Nelem) { - cytnx_double* ptr = (cytnx_double*)in; - cytnx_double _val = *((cytnx_double*)val); - - cytnx_uint64 NBlocks = Nelem / 512; - if (Nelem % 512) NBlocks += 1; - cuFill_kernel<<>>(ptr, _val, Nelem); - } +#include - void cuFill_gpu_f(void* in, void* val, const cytnx_uint64& Nelem) { - cytnx_float* ptr = (cytnx_float*)in; - cytnx_float _val = *((cytnx_float*)val); +#include "cuda/std/complex" - cytnx_uint64 NBlocks = Nelem / 512; - if (Nelem % 512) NBlocks += 1; - cuFill_kernel<<>>(ptr, _val, Nelem); - } - - void cuFill_gpu_i64(void* in, void* val, const cytnx_uint64& Nelem) { - cytnx_int64* ptr = (cytnx_int64*)in; - cytnx_int64 _val = *((cytnx_int64*)val); - - cytnx_uint64 NBlocks = Nelem / 512; - if (Nelem % 512) NBlocks += 1; - cuFill_kernel<<>>(ptr, _val, Nelem); - } - - void cuFill_gpu_u64(void* in, void* val, const cytnx_uint64& Nelem) { - cytnx_uint64* ptr = (cytnx_uint64*)in; - cytnx_uint64 _val = *((cytnx_uint64*)val); - - cytnx_uint64 NBlocks = Nelem / 512; - if (Nelem % 512) NBlocks += 1; - cuFill_kernel<<>>(ptr, _val, Nelem); - } +#include "Type.hpp" - void cuFill_gpu_i32(void* in, void* val, const cytnx_uint64& Nelem) { - cytnx_int32* ptr = (cytnx_int32*)in; - cytnx_int32 _val = *((cytnx_int32*)val); +namespace cytnx { + namespace utils_internal { - cytnx_uint64 NBlocks = Nelem / 512; - if (Nelem % 512) NBlocks += 1; - cuFill_kernel<<>>(ptr, _val, Nelem); + template + __global__ void FillGpuKernel(CudaDType* first, CudaDType value, cytnx_uint64 count) { + if (blockIdx.x * blockDim.x + threadIdx.x < count) { + first[blockIdx.x * blockDim.x + threadIdx.x] = value; + } } - void cuFill_gpu_u32(void* in, void* val, const cytnx_uint64& Nelem) { - cytnx_uint32* ptr = (cytnx_uint32*)in; - cytnx_uint32 _val = *((cytnx_uint32*)val); + template + struct ToCudaDType { + typedef DType type; + }; - cytnx_uint64 NBlocks = Nelem / 512; - if (Nelem % 512) NBlocks += 1; - cuFill_kernel<<>>(ptr, _val, Nelem); - } + template + struct ToCudaDType> { + typedef cuda::std::complex type; + }; - void cuFill_gpu_i16(void* in, void* val, const cytnx_uint64& Nelem) { - cytnx_int16* ptr = (cytnx_int16*)in; - cytnx_int16 _val = *((cytnx_int16*)val); + template + void FillGpu(void* first, const DType& value, cytnx_uint64 count) { + using CudaDType = typename ToCudaDType::type; - cytnx_uint64 NBlocks = Nelem / 512; - if (Nelem % 512) NBlocks += 1; - cuFill_kernel<<>>(ptr, _val, Nelem); + CudaDType* typed_first = reinterpret_cast(first); + cytnx_uint64 block_count = (count + 511) / 512; + FillGpuKernel<<>>(typed_first, static_cast(value), count); } - void cuFill_gpu_u16(void* in, void* val, const cytnx_uint64& Nelem) { - cytnx_uint16* ptr = (cytnx_uint16*)in; - cytnx_uint16 _val = *((cytnx_uint16*)val); + template void FillGpu(void*, const cytnx_complex128&, cytnx_uint64); + template void FillGpu(void*, const cytnx_complex64&, cytnx_uint64); + template void FillGpu(void*, const cytnx_double&, cytnx_uint64); + template void FillGpu(void*, const cytnx_float&, cytnx_uint64); + template void FillGpu(void*, const cytnx_uint64&, cytnx_uint64); + template void FillGpu(void*, const cytnx_int64&, cytnx_uint64); + template void FillGpu(void*, const cytnx_uint32&, cytnx_uint64); + template void FillGpu(void*, const cytnx_int32&, cytnx_uint64); + template void FillGpu(void*, const cytnx_uint16&, cytnx_uint64); + template void FillGpu(void*, const cytnx_int16&, cytnx_uint64); + template void FillGpu(void*, const cytnx_bool&, cytnx_uint64); - cytnx_uint64 NBlocks = Nelem / 512; - if (Nelem % 512) NBlocks += 1; - cuFill_kernel<<>>(ptr, _val, Nelem); - } - void cuFill_gpu_b(void* in, void* val, const cytnx_uint64& Nelem) { - cytnx_bool* ptr = (cytnx_bool*)in; - cytnx_bool _val = *((cytnx_bool*)val); - - cytnx_uint64 NBlocks = Nelem / 512; - if (Nelem % 512) NBlocks += 1; - cuFill_kernel<<>>(ptr, _val, Nelem); - } } // namespace utils_internal } // namespace cytnx diff --git a/src/backend/utils_internal_gpu/cuFill_gpu.hpp b/src/backend/utils_internal_gpu/cuFill_gpu.hpp index 4d30bdcd6..9e25e4d04 100644 --- a/src/backend/utils_internal_gpu/cuFill_gpu.hpp +++ b/src/backend/utils_internal_gpu/cuFill_gpu.hpp @@ -1,28 +1,26 @@ -#ifndef _H_cuFill_gpu_ -#define _H_cuFill_gpu_ +#ifndef SRC_BACKEND_UTILS_INTERNAL_GPU_CUFILL_GPU_H_ +#define SRC_BACKEND_UTILS_INTERNAL_GPU_CUFILL_GPU_H_ -#include -#include -#include -#include #include "Type.hpp" -#include "backend/Storage.hpp" -#include "cytnx_error.hpp" namespace cytnx { namespace utils_internal { - void cuFill_gpu_cd(void* in, void* val, const cytnx_uint64&); - void cuFill_gpu_cf(void* in, void* val, const cytnx_uint64&); - void cuFill_gpu_d(void* in, void* val, const cytnx_uint64&); - void cuFill_gpu_f(void* in, void* val, const cytnx_uint64&); - void cuFill_gpu_i64(void* in, void* val, const cytnx_uint64&); - void cuFill_gpu_u64(void* in, void* val, const cytnx_uint64&); - void cuFill_gpu_i32(void* in, void* val, const cytnx_uint64&); - void cuFill_gpu_u32(void* in, void* val, const cytnx_uint64&); - void cuFill_gpu_u16(void* in, void* val, const cytnx_uint64&); - void cuFill_gpu_i16(void* in, void* val, const cytnx_uint64&); - void cuFill_gpu_b(void* in, void* val, const cytnx_uint64&); + + /** + * @brief Assign the given value to the first `count` elements in the range beginning at + * `first`. + * + * This function act the same as `std::fill_n` and is implemented in CUDA. + * + * @tparam DType the data type of the elements in the range + * + * @param first the beginning of the range + * @param value the value to be assigned + * @param count the number of elements to modify + */ + template + void FillGpu(void* first, const DType& value, cytnx_uint64 count); } // namespace utils_internal } // namespace cytnx -#endif +#endif // SRC_BACKEND_UTILS_INTERNAL_GPU_CUFILL_GPU_H_ From 086034b6065fc84f5b0a52dbc083687dcc8e6bad Mon Sep 17 00:00:00 2001 From: Ivana Date: Sun, 10 Nov 2024 16:15:14 +0800 Subject: [PATCH 2/3] Increase block size used in FillGpu() to 1024 The maximum x- or y-dimension of a block is 1024 with compute capability above 2.x. The compute capability requirements of cuTENSOR and cuQuantum are much higher than 2.x. Furthermore, it's not easy to find a device that only supports the compute capability below or eqaul 1.3. --- src/backend/utils_internal_gpu/cuFill_gpu.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/backend/utils_internal_gpu/cuFill_gpu.cu b/src/backend/utils_internal_gpu/cuFill_gpu.cu index 4faa445fb..0a1b080cc 100644 --- a/src/backend/utils_internal_gpu/cuFill_gpu.cu +++ b/src/backend/utils_internal_gpu/cuFill_gpu.cu @@ -31,8 +31,8 @@ namespace cytnx { using CudaDType = typename ToCudaDType::type; CudaDType* typed_first = reinterpret_cast(first); - cytnx_uint64 block_count = (count + 511) / 512; - FillGpuKernel<<>>(typed_first, static_cast(value), count); + cytnx_uint64 block_count = (count + 1023) / 1024; + FillGpuKernel<<>>(typed_first, static_cast(value), count); } template void FillGpu(void*, const cytnx_complex128&, cytnx_uint64); From eed01c21e031ff2dd7001b443edc4dc6e7152742 Mon Sep 17 00:00:00 2001 From: Ivana Date: Tue, 19 Nov 2024 14:16:31 +0800 Subject: [PATCH 3/3] Change the scheduling type to static for FillCpu() Dynamic scheduling is 1000 times slower than the static scheduling in this case. Below is the result and the code for benchmarking. ``` Total time for FillCpu: 0.0185553 seconds Total time for FillCpuDynamic: 21.2537 seconds ``` ```cpp /** compile command: g++ -std=c++17 -fopenmp -O3 -o fill.o fill.cpp && ./fill.o */ using namespace std; template void FillCpu(void *first, const DType &value, size_t count) { DType *typed_first = reinterpret_cast(first); for (int i = 0; i < count; ++i) { typed_first[i] = value; } } template void FillCpuDynamic(void *first, const DType &value, size_t count) { DType *typed_first = reinterpret_cast(first); for (int i = 0; i < count; ++i) { typed_first[i] = value; } } int main() { int count = 100000; int num_iterations = 10000; int *ptr = reinterpret_cast(malloc(sizeof(int) * count)); int value = 10; { auto start = chrono::high_resolution_clock::now(); for (int iter = 0; iter < num_iterations; ++iter) { FillCpu(reinterpret_cast(ptr), value, count); } auto end = chrono::high_resolution_clock::now(); const std::chrono::duration total_time = end - start; cout << "Total time for FillCpu: " << total_time.count() << " seconds" << endl; } { auto start = chrono::high_resolution_clock::now(); for (int iter = 0; iter < num_iterations; ++iter) { FillCpuDynamic(reinterpret_cast(ptr), value, count); } auto end = chrono::high_resolution_clock::now(); const std::chrono::duration total_time = end - start; cout << "Total time for FillCpuDynamic: " << total_time.count() << " seconds" << endl; } } ``` --- src/backend/utils_internal_cpu/Fill_cpu.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/utils_internal_cpu/Fill_cpu.hpp b/src/backend/utils_internal_cpu/Fill_cpu.hpp index 5d25b72d9..aeb75a651 100644 --- a/src/backend/utils_internal_cpu/Fill_cpu.hpp +++ b/src/backend/utils_internal_cpu/Fill_cpu.hpp @@ -27,7 +27,7 @@ namespace cytnx { void FillCpu(void *first, const DType &value, cytnx_uint64 count) { DType *typed_first = reinterpret_cast(first); #ifdef UNI_OMP - #pragma omp parallel for schedule(dynamic) + #pragma omp parallel for schedule(static) #endif for (cytnx_uint64 i = 0; i < count; i++) { typed_first[i] = value;